Spaces:

MohamedRashad
/

Arabic-Spark-TTS

Running

App Files Files Community

Arabic-Spark-TTS / app.py

MohamedRashad

Update app.py

ec9c685 verified about 1 month ago

raw

history blame contribute delete

6.35 kB

	import spaces
	import logging
	import os
	from datetime import datetime

	import gradio as gr
	import soundfile as sf
	import torch
	from huggingface_hub import snapshot_download

	from SparkTTS import SparkTTS
	from sparktts.utils.token_parser import LEVELS_MAP_UI

	download_path = snapshot_download("MrEzzat/Spark_TTS_Arabic")
	print(f"Model downloaded to: {download_path}")
	model = SparkTTS(download_path, device="cuda:0" if torch.cuda.is_available() else "cpu")
	print(model)


	@spaces.GPU()
	def run_tts(
	text,
	prompt_text=None,
	prompt_speech=None,
	gender=None,
	pitch=None,
	speed=None,
	save_dir="example/results",
	):
	"""Perform TTS inference and save the generated audio."""
	logging.info(f"Saving audio to: {save_dir}")

	if prompt_text is not None:
	prompt_text = None if len(prompt_text) <= 1 else prompt_text

	# Ensure the save directory exists
	os.makedirs(save_dir, exist_ok=True)

	# Generate unique filename using timestamp
	timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
	save_path = os.path.join(save_dir, f"{timestamp}.wav")

	logging.info("Starting inference...")

	# Perform inference and save the output audio
	with torch.no_grad():
	wav = model.inference(
	text,
	prompt_speech,
	prompt_text,
	gender,
	pitch,
	speed,
	)

	sf.write(save_path, wav, samplerate=16000)

	logging.info(f"Audio saved at: {save_path}")

	return save_path


	# Define callback function for voice cloning
	def voice_clone(text, prompt_text, prompt_speech):
	"""
	Gradio callback to clone voice using text and optional prompt speech.
	- text: The input text to be synthesised.
	- prompt_text: Additional textual info for the prompt (optional).
	- prompt_speech: Audio files used as reference.
	"""
	prompt_text_clean = None if len(prompt_text) < 2 else prompt_text

	audio_output_path = run_tts(
	text, prompt_text=prompt_text_clean, prompt_speech=prompt_speech
	)
	return audio_output_path


	# Define callback function for creating new voices
	def voice_creation(text, gender, pitch, speed):
	"""
	Gradio callback to create a synthetic voice with adjustable parameters.
	- text: The input text for synthesis.
	- gender: 'male' or 'female'.
	- pitch/speed: Ranges mapped by LEVELS_MAP_UI.
	"""
	pitch_val = LEVELS_MAP_UI[int(pitch)]
	speed_val = LEVELS_MAP_UI[int(speed)]
	audio_output_path = run_tts(text, gender=gender, pitch=pitch_val, speed=speed_val)
	return audio_output_path


	with gr.Blocks() as app:
	# Use HTML for centered title
	gr.HTML(
	'<h1 style="text-align: center;">Arabic Spark-TTS by <a href="https://huggingface.co/MrEzzat/Spark_TTS_Arabic">MrEzzat</a></h1>'
	)

	with gr.Tabs():
	# Voice Creation Tab
	with gr.TabItem("Voice Creation"):
	gr.Markdown("### Create your own voice based on the following parameters")

	with gr.Row():
	with gr.Column():
	gender = gr.Radio(
	choices=["male", "female"], value="male", label="Gender"
	)
	pitch = gr.Slider(
	minimum=1, maximum=5, step=1, value=3, label="Pitch"
	)
	speed = gr.Slider(
	minimum=1, maximum=5, step=1, value=3, label="Speed"
	)
	with gr.Column():
	text_input_creation = gr.Textbox(
	label="Input Text",
	lines=3,
	placeholder="أكتب هنا النص الذي تريد تحويله إلى كلام.",
	value="يمكنك إنشاء صوت مخصص عن طريق ضبط معلمات مثل النغمة والسرعة.",
	text_align="right",
	rtl=True,
	)
	create_button = gr.Button("Create Voice", variant="primary")

	audio_output = gr.Audio(
	label="Generated Audio", autoplay=True, streaming=True
	)

	create_button.click(
	voice_creation,
	inputs=[text_input_creation, gender, pitch, speed],
	outputs=[audio_output],
	)
	# Voice Clone Tab
	with gr.TabItem("Voice Clone"):
	gr.Markdown(
	"## Write the text you want to synthesize on the right and upload or record a prompt audio on the left (leave the text of the prompt speech empty if you are getting errors in generation)."
	)

	with gr.Row(equal_height=False):
	with gr.Column():
	prompt_wav = gr.Audio(
	type="filepath",
	label="Upload a prompt audio file, or record one using the microphone.",
	)
	prompt_text_input = gr.Textbox(
	label="Text of prompt speech (Optional; recommended for cloning in the same language.)",
	lines=3,
	placeholder="أكتب هنا نص الكلام في الملف الصوتي المرفق.",
	text_align="right",
	rtl=True,
	)
	with gr.Column():
	text_input = gr.Textbox(
	label="Text to synthesize",
	lines=10,
	value="السلام عليكم ورحمة الله",
	placeholder="أكتب هنا النص الذي تريد تحويله إلى كلام.",
	text_align="right",
	rtl=True,
	)
	generate_button_clone = gr.Button("Generate", variant="primary")

	audio_output = gr.Audio(
	label="Generated Audio", autoplay=True, streaming=True
	)

	generate_button_clone.click(
	voice_clone,
	inputs=[
	text_input,
	prompt_text_input,
	prompt_wav,
	],
	outputs=[audio_output],
	)

	app.queue().launch()