Spaces:
Running
Running
File size: 1,261 Bytes
18c74ed bc2a0f7 080eb0f 18c74ed 080eb0f bc2a0f7 080eb0f bc2a0f7 080eb0f 938a6cd 080eb0f bc2a0f7 18c74ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import os
from typing import Generator, Iterator
import numpy as np
from fastrtc import KokoroTTSOptions, get_tts_model
from stream2sentence import generate_sentences
from mcp_host.tts.utils import KOKORO_TO_STD_LANG, VOICES
__all__ = ["stream_text_to_speech"]
if not os.getenv("LOCALE_RUN"):
model = get_tts_model(model="kokoro")
def stream_text_to_speech(
text_stream: Iterator[str], voice: str | None = None
) -> Generator[tuple[int, np.ndarray], None, None]:
"""
Convert text to speech using the specified voice.
Args:
text_stream (Iterator[str]): An iterator that yields text strings to convert to speech.
voice (str | None): The voice to use for the conversion. Default to af_heart.
Yields:
np.ndarray: The audio as a NumPy array.
"""
voice = voice or "af_heart"
if voice not in VOICES.values():
raise ValueError(f"Voice '{voice}' is not available.")
kokoro_lang = voice[0]
standard_lang_code = KOKORO_TO_STD_LANG.get(kokoro_lang, "en")
options = KokoroTTSOptions(voice=voice, lang=standard_lang_code)
for text in generate_sentences(text_stream, language=standard_lang_code):
for audio in model.stream_tts_sync(text, options):
yield audio
|