anuj-exe commited on
Commit
49bf0ac
·
verified ·
1 Parent(s): 52d9830

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -1,10 +1,10 @@
1
- # app.py
2
  from fastapi import FastAPI, Query
3
  from fastapi.responses import StreamingResponse
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
  import torch
6
  import io
7
  import soundfile as sf
 
8
 
9
  app = FastAPI(title="SpeechT5 TTS API")
10
 
@@ -13,8 +13,20 @@ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
13
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
14
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
15
 
16
- # Dummy speaker embedding (flat, neutral voice)
17
- speaker_embeddings = torch.zeros((1, 512))
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  @app.get("/speak")
 
 
1
  from fastapi import FastAPI, Query
2
  from fastapi.responses import StreamingResponse
3
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
4
  import torch
5
  import io
6
  import soundfile as sf
7
+ import requests
8
 
9
  app = FastAPI(title="SpeechT5 TTS API")
10
 
 
13
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
14
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
15
 
16
+
17
+ # Function to load a speaker embedding from a URL
18
+ def load_speaker_embedding(url: str) -> torch.Tensor:
19
+ response = requests.get(url)
20
+ response.raise_for_status()
21
+ # Load the .bin file as a float32 tensor
22
+ embedding = torch.frombuffer(response.content, dtype=torch.float32)
23
+ return embedding.unsqueeze(0) # Add batch dimension
24
+
25
+
26
+ # Example: load US female 1
27
+ speaker_embeddings = load_speaker_embedding(
28
+ "https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_slt_arctic-wav-arctic_a0001.bin"
29
+ )
30
 
31
 
32
  @app.get("/speak")