Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from fastapi.responses import StreamingResponse | |
| from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech | |
| import torch | |
| import soundfile as sf | |
| import io | |
| app = FastAPI() | |
| # Load processor & model | |
| processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
| model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
| # Dummy speaker embedding (flat voice). You can later replace with real embeddings. | |
| speaker_embeddings = torch.zeros((1, 512)) | |
| def speak(text: str): | |
| inputs = processor(text=text, return_tensors="pt") | |
| # Generate speech | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings) | |
| # Save into memory buffer | |
| buf = io.BytesIO() | |
| sf.write(buf, speech.numpy(), 16000, format="WAV") | |
| buf.seek(0) | |
| return StreamingResponse(buf, media_type="audio/wav") | |