Spaces:
Running
Running
| import os | |
| from typing import Generator, Iterator | |
| import numpy as np | |
| from fastrtc import KokoroTTSOptions, get_tts_model | |
| from stream2sentence import generate_sentences | |
| from mcp_host.tts.utils import KOKORO_TO_STD_LANG, VOICES | |
| __all__ = ["stream_text_to_speech"] | |
| if not os.getenv("LOCALE_RUN"): | |
| model = get_tts_model(model="kokoro") | |
| def stream_text_to_speech( | |
| text_stream: Iterator[str], voice: str | None = None | |
| ) -> Generator[tuple[int, np.ndarray], None, None]: | |
| """ | |
| Convert text to speech using the specified voice. | |
| Args: | |
| text_stream (Iterator[str]): An iterator that yields text strings to convert to speech. | |
| voice (str | None): The voice to use for the conversion. Default to af_heart. | |
| Yields: | |
| np.ndarray: The audio as a NumPy array. | |
| """ | |
| voice = voice or "af_heart" | |
| if voice not in VOICES.values(): | |
| raise ValueError(f"Voice '{voice}' is not available.") | |
| kokoro_lang = voice[0] | |
| standard_lang_code = KOKORO_TO_STD_LANG.get(kokoro_lang, "en") | |
| options = KokoroTTSOptions(voice=voice, lang=standard_lang_code) | |
| for text in generate_sentences(text_stream, language=standard_lang_code): | |
| for audio in model.stream_tts_sync(text, options): | |
| yield audio | |