anuj-exe commited on
Commit
4f0a2be
·
verified ·
1 Parent(s): a4ab86f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import StreamingResponse
3
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
4
+ import torch
5
+ import soundfile as sf
6
+ import io
7
+
8
+ app = FastAPI()
9
+
10
+ # Load processor & model
11
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
12
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
13
+
14
+ # Dummy speaker embedding (flat voice). You can later replace with real embeddings.
15
+ speaker_embeddings = torch.zeros((1, 512))
16
+
17
+ @app.get("/speak")
18
+ def speak(text: str):
19
+ inputs = processor(text=text, return_tensors="pt")
20
+
21
+ # Generate speech
22
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)
23
+
24
+ # Save into memory buffer
25
+ buf = io.BytesIO()
26
+ sf.write(buf, speech.numpy(), 16000, format="WAV")
27
+ buf.seek(0)
28
+
29
+ return StreamingResponse(buf, media_type="audio/wav")