Spaces:

tacab
/

TTS_SOMALI

Sleeping

App Files Files Community

zakihassan04 commited on May 19

Commit

5660185

verified ·

1 Parent(s): 721f33e

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -64

app.py CHANGED Viewed

@@ -1,64 +1,57 @@
-"""
-app.py - Hugging Face Space for Somali Multi-Speaker TTS
-This script:
-  - Installs required dependencies
-  - Installs VITS inference package
-  - Downloads fine-tuned multi-speaker model from HF Hub: "Somali-tts/somali_tts_model"
-  - Runs a Gradio interface to synthesize Somali text with Male/Female voices
-Usage:
-  Push this file to a Hugging Face Space (Gradio) and include a requirements.txt with core dependencies.
-"""
-import subprocess, sys
-# 1. Install core dependencies
-_deps = ["gradio", "numpy", "soundfile", "huggingface_hub", "torch"]
-subprocess.run([sys.executable, "-m", "pip", "install", *_deps], check=True)
-# 2. Install VITS inference package
-subprocess.run([sys.executable, "-m", "pip", "install", "git+https://github.com/jaywalnut310/vits.git"], check=True)
-import gradio as gr
-import numpy as np
-import os
-from huggingface_hub import snapshot_download
-from vits.inference import Synthesizer
-# 3. Download the multi-speaker model from HF Hub
-MODEL_REPO = "Somali-tts/somali_tts_model"
-local_dir  = snapshot_download(repo_id=MODEL_REPO)
-CONFIG_FILE = os.path.join(local_dir, "config.json")
-CHECKPOINT  = os.path.join(local_dir, "checkpoint.pth")
-# 4. Initialize the VITS synthesizer
-synthesizer = Synthesizer(CHECKPOINT, CONFIG_FILE)
-# 5. Text-to-speech function
-def tts(text: str, speaker: str):
-    """
-    Args:
-      text (str): Somali text to synthesize
-      speaker (str): "Male" or "Female"
-    Returns:
-      tuple: (numpy.ndarray waveform, int sample_rate)
-    """
-    spk_id = 0 if speaker.lower().startswith("m") else 1
-    wav = synthesizer.tts(text, speaker_id=spk_id)
-    return wav, synthesizer.sample_rate
-# 6. Build Gradio interface
-demo = gr.Interface(
-    fn=tts,
-    inputs=[
-        gr.Textbox(lines=3, label="Enter Somali text to synthesize", placeholder="Qor qoraalka halkan…"),
-        gr.Radio(choices=["Male", "Female"], label="Select speaker voice")
-    ],
-    outputs=gr.Audio(type="numpy", label="Generated Speech"),
-    title="Somali Multi-Speaker TTS",
-    description="Select Male or Female voice to synthesize Somali text.",
-    allow_flagging="never"
-)
-# 7. Launch the app
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+"""
+app.py - Hugging Face Space for Somali Multi-Speaker TTS
+This script:
+  - Loads a fine-tuned multi-speaker VITS model from HF Hub: "Somali-tts/somali_tts_model"
+  - Runs a Gradio interface to synthesize Somali text with Male/Female voices
+Setup requirements:
+  Create a `requirements.txt` alongside this file with:
+    gradio
+    numpy
+    soundfile
+    huggingface-hub
+    torch
+    git+https://github.com/jaywalnut310/vits.git
+Push both `app.py` and `requirements.txt` to your Hugging Face Space (Gradio template).
+"""
+import gradio as gr
+import numpy as np
+import os
+from huggingface_hub import snapshot_download
+from vits.inference import Synthesizer
+# Download the multi-speaker model from HF Hub
+MODEL_REPO = "Somali-tts/somali_tts_model"
+local_dir  = snapshot_download(repo_id=MODEL_REPO)
+CONFIG_FILE = os.path.join(local_dir, "config.json")
+CHECKPOINT  = os.path.join(local_dir, "checkpoint.pth")
+# Initialize the VITS synthesizer
+synthesizer = Synthesizer(CHECKPOINT, CONFIG_FILE)
+# Text-to-speech function
+# text: Somali text to synthesize
+# speaker: "Male" or "Female"
+def tts(text: str, speaker: str):
+    spk_id = 0 if speaker.lower().startswith("m") else 1
+    wav = synthesizer.tts(text, speaker_id=spk_id)
+    return wav, synthesizer.sample_rate
+# Build Gradio interface
+demo = gr.Interface(
+    fn=tts,
+    inputs=[
+        gr.Textbox(lines=3, label="Enter Somali text to synthesize", placeholder="Qor qoraalka halkan…"),
+        gr.Radio(choices=["Male", "Female"], label="Select speaker voice")
+    ],
+    outputs=gr.Audio(type="numpy", label="Generated Speech"),
+    title="Somali Multi-Speaker TTS",
+    description="Select Male or Female voice to synthesize Somali text.",
+    allow_flagging="never"
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()