zakihassan04 commited on
Commit
5660185
·
verified ·
1 Parent(s): 721f33e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -64
app.py CHANGED
@@ -1,64 +1,57 @@
1
- """
2
- app.py - Hugging Face Space for Somali Multi-Speaker TTS
3
-
4
- This script:
5
- - Installs required dependencies
6
- - Installs VITS inference package
7
- - Downloads fine-tuned multi-speaker model from HF Hub: "Somali-tts/somali_tts_model"
8
- - Runs a Gradio interface to synthesize Somali text with Male/Female voices
9
-
10
- Usage:
11
- Push this file to a Hugging Face Space (Gradio) and include a requirements.txt with core dependencies.
12
- """
13
- import subprocess, sys
14
-
15
- # 1. Install core dependencies
16
- _deps = ["gradio", "numpy", "soundfile", "huggingface_hub", "torch"]
17
- subprocess.run([sys.executable, "-m", "pip", "install", *_deps], check=True)
18
- # 2. Install VITS inference package
19
- subprocess.run([sys.executable, "-m", "pip", "install", "git+https://github.com/jaywalnut310/vits.git"], check=True)
20
-
21
- import gradio as gr
22
- import numpy as np
23
- import os
24
- from huggingface_hub import snapshot_download
25
- from vits.inference import Synthesizer
26
-
27
- # 3. Download the multi-speaker model from HF Hub
28
- MODEL_REPO = "Somali-tts/somali_tts_model"
29
- local_dir = snapshot_download(repo_id=MODEL_REPO)
30
- CONFIG_FILE = os.path.join(local_dir, "config.json")
31
- CHECKPOINT = os.path.join(local_dir, "checkpoint.pth")
32
-
33
- # 4. Initialize the VITS synthesizer
34
- synthesizer = Synthesizer(CHECKPOINT, CONFIG_FILE)
35
-
36
- # 5. Text-to-speech function
37
- def tts(text: str, speaker: str):
38
- """
39
- Args:
40
- text (str): Somali text to synthesize
41
- speaker (str): "Male" or "Female"
42
- Returns:
43
- tuple: (numpy.ndarray waveform, int sample_rate)
44
- """
45
- spk_id = 0 if speaker.lower().startswith("m") else 1
46
- wav = synthesizer.tts(text, speaker_id=spk_id)
47
- return wav, synthesizer.sample_rate
48
-
49
- # 6. Build Gradio interface
50
- demo = gr.Interface(
51
- fn=tts,
52
- inputs=[
53
- gr.Textbox(lines=3, label="Enter Somali text to synthesize", placeholder="Qor qoraalka halkan…"),
54
- gr.Radio(choices=["Male", "Female"], label="Select speaker voice")
55
- ],
56
- outputs=gr.Audio(type="numpy", label="Generated Speech"),
57
- title="Somali Multi-Speaker TTS",
58
- description="Select Male or Female voice to synthesize Somali text.",
59
- allow_flagging="never"
60
- )
61
-
62
- # 7. Launch the app
63
- if __name__ == "__main__":
64
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ """
2
+ app.py - Hugging Face Space for Somali Multi-Speaker TTS
3
+
4
+ This script:
5
+ - Loads a fine-tuned multi-speaker VITS model from HF Hub: "Somali-tts/somali_tts_model"
6
+ - Runs a Gradio interface to synthesize Somali text with Male/Female voices
7
+
8
+ Setup requirements:
9
+ Create a `requirements.txt` alongside this file with:
10
+ gradio
11
+ numpy
12
+ soundfile
13
+ huggingface-hub
14
+ torch
15
+ git+https://github.com/jaywalnut310/vits.git
16
+
17
+ Push both `app.py` and `requirements.txt` to your Hugging Face Space (Gradio template).
18
+ """
19
+ import gradio as gr
20
+ import numpy as np
21
+ import os
22
+ from huggingface_hub import snapshot_download
23
+ from vits.inference import Synthesizer
24
+
25
+ # Download the multi-speaker model from HF Hub
26
+ MODEL_REPO = "Somali-tts/somali_tts_model"
27
+ local_dir = snapshot_download(repo_id=MODEL_REPO)
28
+ CONFIG_FILE = os.path.join(local_dir, "config.json")
29
+ CHECKPOINT = os.path.join(local_dir, "checkpoint.pth")
30
+
31
+ # Initialize the VITS synthesizer
32
+ synthesizer = Synthesizer(CHECKPOINT, CONFIG_FILE)
33
+
34
+ # Text-to-speech function
35
+ # text: Somali text to synthesize
36
+ # speaker: "Male" or "Female"
37
+ def tts(text: str, speaker: str):
38
+ spk_id = 0 if speaker.lower().startswith("m") else 1
39
+ wav = synthesizer.tts(text, speaker_id=spk_id)
40
+ return wav, synthesizer.sample_rate
41
+
42
+ # Build Gradio interface
43
+ demo = gr.Interface(
44
+ fn=tts,
45
+ inputs=[
46
+ gr.Textbox(lines=3, label="Enter Somali text to synthesize", placeholder="Qor qoraalka halkan…"),
47
+ gr.Radio(choices=["Male", "Female"], label="Select speaker voice")
48
+ ],
49
+ outputs=gr.Audio(type="numpy", label="Generated Speech"),
50
+ title="Somali Multi-Speaker TTS",
51
+ description="Select Male or Female voice to synthesize Somali text.",
52
+ allow_flagging="never"
53
+ )
54
+
55
+ # Launch the app
56
+ if __name__ == "__main__":
57
+ demo.launch()