Moustafa1111111111 commited on
Commit
4390e63
Β·
1 Parent(s): 6864301

Cleaned up: switched to runtime model download with huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -23
  2. local_server_new.py +15 -10
  3. requirements.txt +1 -0
Dockerfile CHANGED
@@ -10,39 +10,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
10
  wget \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
- # Set the working directory
14
  WORKDIR /app
15
 
16
- # Copy TTS and install it
17
- COPY TTS /app/TTS
18
- WORKDIR /app/TTS
19
  ENV BLIS_ARCH="generic"
20
  ENV COQUI_TTS_AGREED=1
21
- RUN pip install -r requirements.txt --timeout=300
22
- RUN pip install -e . --timeout=300
23
 
24
- # Go back to main app dir
25
- WORKDIR /app
26
 
27
- # Download XTTS model files
28
- RUN mkdir -p /app/models/xtts_v2
29
- RUN wget -O /app/models/xtts_v2/config.json https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json?download=true
30
- RUN wget -O /app/models/xtts_v2/model.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth?download=true
31
- RUN wget -O /app/models/xtts_v2/vocab.json https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json?download=true
32
- RUN wget -O /app/models/xtts_v2/dvae.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth?download=true
33
- RUN wget -O /app/models/xtts_v2/speakers_xtts.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/speakers_xtts.pth?download=true
34
 
35
- # Add speaker reference and other files
 
36
  COPY audio/speaker_reference.wav /app/audio/speaker_reference.wav
37
  COPY Web_Page /app/Web_Page
38
- COPY local_server_new.py /app/
39
- COPY requirements.txt /app/
40
-
41
- # Install app requirements
42
- RUN pip install -r /app/requirements.txt --timeout=300
43
 
44
- # Expose default HF port
45
  EXPOSE 7860
46
 
47
- # Run the server directly
48
  CMD ["python", "-m", "uvicorn", "local_server_new:app", "--host", "0.0.0.0", "--port", "7860"]
 
10
  wget \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
+ # Set working directory
14
  WORKDIR /app
15
 
16
+ # Set environment variables
 
 
17
  ENV BLIS_ARCH="generic"
18
  ENV COQUI_TTS_AGREED=1
 
 
19
 
20
+ # Install Coqui TTS directly
21
+ RUN pip install git+https://github.com/coqui-ai/TTS.git
22
 
23
+ # Install other dependencies
24
+ COPY requirements.txt /app/
25
+ RUN pip install -r /app/requirements.txt --timeout=300
 
 
 
 
26
 
27
+ # Copy app files
28
+ COPY local_server_new.py /app/
29
  COPY audio/speaker_reference.wav /app/audio/speaker_reference.wav
30
  COPY Web_Page /app/Web_Page
 
 
 
 
 
31
 
32
+ # Expose the port used by Hugging Face Spaces
33
  EXPOSE 7860
34
 
35
+ # Start the FastAPI server
36
  CMD ["python", "-m", "uvicorn", "local_server_new:app", "--host", "0.0.0.0", "--port", "7860"]
local_server_new.py CHANGED
@@ -8,6 +8,7 @@ import os
8
  from TTS.api import TTS
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
10
  from langdetect import detect
 
11
 
12
  # Allowlist XttsConfig so torch.load doesn't raise UnpicklingError
13
  from torch.serialization import add_safe_globals
@@ -32,17 +33,21 @@ app.add_middleware(
32
  allow_headers=["*"],
33
  )
34
 
35
- # Load TTS model from local files
36
  try:
37
- model_dir = "/app/models/xtts_v2"
38
- config_path = os.path.join(model_dir, "config.json")
39
- # When providing config_path, TTS might expect the directory for model_path
 
 
 
 
 
40
  tts = TTS(model_path=model_dir, config_path=config_path).to("cuda" if torch.cuda.is_available() else "cpu")
41
- print("XTTS v2 model loaded successfully from local files.")
42
  except Exception as e:
43
- print(f"Error loading XTTS v2 model from local files: {e}")
44
- print("Falling back to loading by model name (license might be required).")
45
- tts = TTS("tts_models/multilingual/multi-dataset-xtts_v2").to("cuda" if torch.cuda.is_available() else "cpu")
46
 
47
  # Load sentiment models
48
  arabic_model_name = "aubmindlab/bert-base-arabertv02-twitter"
@@ -115,7 +120,7 @@ def text_to_speech(msg: Message):
115
  text=text,
116
  file_path=output_filename,
117
  emotion=emotion,
118
- speaker_wav="/app/audio/speaker_reference.wav", # Updated path
119
  language=language
120
  )
121
  return {
@@ -129,4 +134,4 @@ def text_to_speech(msg: Message):
129
  # βœ… Serve the audio file
130
  @app.get("/audio")
131
  def get_audio():
132
- return FileResponse("output.wav", media_type="audio/wav", filename="output.wav")
 
8
  from TTS.api import TTS
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
10
  from langdetect import detect
11
+ from huggingface_hub import hf_hub_download
12
 
13
  # Allowlist XttsConfig so torch.load doesn't raise UnpicklingError
14
  from torch.serialization import add_safe_globals
 
33
  allow_headers=["*"],
34
  )
35
 
36
+ # βœ… Dynamically download XTTS v2 model files from Hugging Face
37
  try:
38
+ print("Downloading XTTS v2 model files from Hugging Face...")
39
+ model_path = hf_hub_download("coqui/XTTS-v2", "model.pth")
40
+ config_path = hf_hub_download("coqui/XTTS-v2", "config.json")
41
+ vocab_path = hf_hub_download("coqui/XTTS-v2", "vocab.json")
42
+ dvae_path = hf_hub_download("coqui/XTTS-v2", "dvae.pth")
43
+ speakers_path = hf_hub_download("coqui/XTTS-v2", "speakers_xtts.pth")
44
+
45
+ model_dir = os.path.dirname(model_path)
46
  tts = TTS(model_path=model_dir, config_path=config_path).to("cuda" if torch.cuda.is_available() else "cpu")
47
+ print("βœ… XTTS v2 model loaded successfully.")
48
  except Exception as e:
49
+ print(f"❌ Failed to load XTTS v2 model: {e}")
50
+ raise RuntimeError("Failed to initialize TTS model.")
 
51
 
52
  # Load sentiment models
53
  arabic_model_name = "aubmindlab/bert-base-arabertv02-twitter"
 
120
  text=text,
121
  file_path=output_filename,
122
  emotion=emotion,
123
+ speaker_wav="/app/audio/speaker_reference.wav",
124
  language=language
125
  )
126
  return {
 
134
  # βœ… Serve the audio file
135
  @app.get("/audio")
136
  def get_audio():
137
+ return FileResponse("output.wav", media_type="audio/wav", filename="output.wav")
requirements.txt CHANGED
@@ -175,4 +175,5 @@ Werkzeug==3.1.3
175
  wrapt==1.17.2
176
  yarl==1.19.0
177
  zipp==3.21.0
 
178
  # Force rebuild
 
175
  wrapt==1.17.2
176
  yarl==1.19.0
177
  zipp==3.21.0
178
+ huggingface_hub>=0.19.0
179
  # Force rebuild