Spaces:

sitatech
/

vibe-shopping

Running

File size: 1,261 Bytes

18c74ed
bc2a0f7
080eb0f
 
 
 
 
 
 
 
 
 
18c74ed
 
080eb0f
 
bc2a0f7
080eb0f
bc2a0f7
080eb0f
 
 
 
 
 
 
 
 
 
 
938a6cd
080eb0f
 
 
 
 
 
 
 
bc2a0f7
18c74ed

import os
from typing import Generator, Iterator

import numpy as np
from fastrtc import KokoroTTSOptions, get_tts_model
from stream2sentence import generate_sentences

from mcp_host.tts.utils import KOKORO_TO_STD_LANG, VOICES

__all__ = ["stream_text_to_speech"]


if not os.getenv("LOCALE_RUN"):
    model = get_tts_model(model="kokoro")


def stream_text_to_speech(
    text_stream: Iterator[str], voice: str | None = None
) -> Generator[tuple[int, np.ndarray], None, None]:
    """
    Convert text to speech using the specified voice.

    Args:
        text_stream (Iterator[str]): An iterator that yields text strings to convert to speech.
        voice (str | None): The voice to use for the conversion. Default to af_heart.

    Yields:
        np.ndarray: The audio as a NumPy array.
    """
    voice = voice or "af_heart"
    if voice not in VOICES.values():
        raise ValueError(f"Voice '{voice}' is not available.")

    kokoro_lang = voice[0]
    standard_lang_code = KOKORO_TO_STD_LANG.get(kokoro_lang, "en")

    options = KokoroTTSOptions(voice=voice, lang=standard_lang_code)

    for text in generate_sentences(text_stream, language=standard_lang_code):
        for audio in model.stream_tts_sync(text, options):
            yield audio