Spaces:
Running
Running
| """Kokoro TTS CLI | |
| Example usage: | |
| python3 -m kokoro --text "The sky above the port was the color of television, tuned to a dead channel." -o file.wav --debug | |
| echo "Bom dia mundo, como vão vocês" > text.txt | |
| python3 -m kokoro -i text.txt -l p --voice pm_alex > audio.wav | |
| Common issues: | |
| pip not installed: `uv pip install pip` | |
| (Temporary workaround while https://github.com/explosion/spaCy/issues/13747 is not fixed) | |
| espeak not installed: `apt-get install espeak-ng` | |
| """ | |
| import argparse | |
| import wave | |
| from pathlib import Path | |
| from typing import Generator, TYPE_CHECKING | |
| import numpy as np | |
| from loguru import logger | |
| languages = [ | |
| "a", # American English | |
| "b", # British English | |
| "h", # Hindi | |
| "e", # Spanish | |
| "f", # French | |
| "i", # Italian | |
| "p", # Brazilian Portuguese | |
| "j", # Japanese | |
| "z", # Mandarin Chinese | |
| ] | |
| if TYPE_CHECKING: | |
| from kokoro import KPipeline | |
| def generate_audio( | |
| text: str, kokoro_language: str, voice: str, speed=1 | |
| ) -> Generator["KPipeline.Result", None, None]: | |
| from kokoro import KPipeline | |
| if not voice.startswith(kokoro_language): | |
| logger.warning(f"Voice {voice} is not made for language {kokoro_language}") | |
| pipeline = KPipeline(lang_code=kokoro_language) | |
| yield from pipeline(text, voice=voice, speed=speed, split_pattern=r"\n+") | |
| def generate_and_save_audio( | |
| output_file: Path, text: str, kokoro_language: str, voice: str, speed=1 | |
| ) -> None: | |
| with wave.open(str(output_file.resolve()), "wb") as wav_file: | |
| wav_file.setnchannels(1) # Mono audio | |
| wav_file.setsampwidth(2) # 2 bytes per sample (16-bit audio) | |
| wav_file.setframerate(24000) # Sample rate | |
| for result in generate_audio( | |
| text, kokoro_language=kokoro_language, voice=voice, speed=speed | |
| ): | |
| logger.debug(result.phonemes) | |
| if result.audio is None: | |
| continue | |
| audio_bytes = (result.audio.numpy() * 32767).astype(np.int16).tobytes() | |
| wav_file.writeframes(audio_bytes) | |
| def main() -> None: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "-m", | |
| "--voice", | |
| default="af_heart", | |
| help="Voice to use", | |
| ) | |
| parser.add_argument( | |
| "-l", | |
| "--language", | |
| help="Language to use (defaults to the one corresponding to the voice)", | |
| choices=languages, | |
| ) | |
| parser.add_argument( | |
| "-o", | |
| "--output-file", | |
| "--output_file", | |
| type=Path, | |
| help="Path to output WAV file", | |
| required=True, | |
| ) | |
| parser.add_argument( | |
| "-i", | |
| "--input-file", | |
| "--input_file", | |
| type=Path, | |
| help="Path to input text file (default: stdin)", | |
| ) | |
| parser.add_argument( | |
| "-t", | |
| "--text", | |
| help="Text to use instead of reading from stdin", | |
| ) | |
| parser.add_argument( | |
| "-s", | |
| "--speed", | |
| type=float, | |
| default=1.0, | |
| help="Speech speed", | |
| ) | |
| parser.add_argument( | |
| "--debug", | |
| action="store_true", | |
| help="Print DEBUG messages to console", | |
| ) | |
| args = parser.parse_args() | |
| if args.debug: | |
| logger.level("DEBUG") | |
| logger.debug(args) | |
| lang = args.language or args.voice[0] | |
| if args.text is not None and args.input_file is not None: | |
| raise Exception("You cannot specify both 'text' and 'input_file'") | |
| elif args.text: | |
| text = args.text | |
| elif args.input_file: | |
| file: Path = args.input_file | |
| text = file.read_text() | |
| else: | |
| import sys | |
| print("Press Ctrl+D to stop reading input and start generating", flush=True) | |
| text = '\n'.join(sys.stdin) | |
| logger.debug(f"Input text: {text!r}") | |
| out_file: Path = args.output_file | |
| if not out_file.suffix == ".wav": | |
| logger.warning("The output file name should end with .wav") | |
| generate_and_save_audio( | |
| output_file=out_file, | |
| text=text, | |
| kokoro_language=lang, | |
| voice=args.voice, | |
| speed=args.speed, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |