Spaces:
Running
Running
| import gradio as gr | |
| import tempfile | |
| import uuid | |
| import os | |
| from kittentts import KittenTTS | |
| import soundfile as sf | |
| # Initialize the TTS model | |
| model = KittenTTS("KittenML/kitten-tts-nano-0.1") | |
| def generate_speech(text, voice, speed): | |
| """ | |
| Generate speech from text using KittenTTS | |
| Args: | |
| text (str): Text to convert to speech | |
| voice (str): Voice to use for generation | |
| speed (float): Speed of speech generation | |
| Returns: | |
| str: Path to generated audio file | |
| """ | |
| if not text.strip(): | |
| return None, "Please enter some text to generate speech." | |
| try: | |
| # Generate audio | |
| audio = model.generate(text, voice=voice, speed=speed) | |
| # Create temporary file with UUID | |
| temp_dir = tempfile.gettempdir() | |
| unique_filename = f"kitten_tts_{uuid.uuid4()}.wav" | |
| output_path = os.path.join(temp_dir, unique_filename) | |
| # Save audio file | |
| sf.write(output_path, audio, 24000) | |
| return output_path | |
| except Exception as e: | |
| return None | |
| def get_available_voices(): | |
| """Get list of available voices from the model""" | |
| try: | |
| voices = model.available_voices | |
| return voices if voices else ["expr-voice-5-m"] # Default voice as fallback | |
| except: | |
| return ["expr-voice-5-m"] # Default voice as fallback | |
| # Get available voices | |
| available_voices = get_available_voices() | |
| # Create Gradio interface | |
| with gr.Blocks(title="KittenTTS - Text to Speech", theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# π± KittenTTS - Text to Speech Generator") | |
| gr.Markdown("Convert your text to high-quality speech using KittenTTS nano model!") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Input components | |
| text_input = gr.Textbox( | |
| label="Text to Convert", | |
| placeholder="Enter the text you want to convert to speech...", | |
| lines=4, | |
| max_lines=10 | |
| ) | |
| with gr.Row(): | |
| voice_dropdown = gr.Dropdown( | |
| choices=available_voices, | |
| value=available_voices[0] if available_voices else "expr-voice-5-m", | |
| label="Voice Selection", | |
| info="Choose the voice for speech generation" | |
| ) | |
| speed_slider = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| step=0.01, | |
| value=1.25, | |
| label="Speech Speed", | |
| info="Adjust the speed of speech (0.5x to 2.0x)" | |
| ) | |
| generate_btn = gr.Button("π΅ Generate Speech", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # Output components | |
| audio_output = gr.Audio( | |
| label="Generated Speech", | |
| type="filepath", | |
| interactive=False, | |
| autoplay=True | |
| ) | |
| # Example inputs | |
| gr.Markdown("## π Example Texts") | |
| examples = gr.Examples( | |
| examples=[ | |
| ["Hello! This is a test of the KittenTTS model.", available_voices[2] if available_voices else "expr-voice-5-m", 1.25], | |
| ["The quick brown fox jumps over the lazy dog.", available_voices[1] if available_voices else "expr-voice-5-m", 1.5], | |
| ["Welcome to the world of high-quality text-to-speech synthesis!", available_voices[5] if available_voices else "expr-voice-5-m", 1], | |
| ], | |
| inputs=[text_input, voice_dropdown, speed_slider], | |
| outputs=[audio_output], | |
| fn=generate_speech, | |
| label="Click on an example to try it out", | |
| cache_examples = "lazy" | |
| ) | |
| # Model information | |
| with gr.Accordion("βΉοΈ Model Information", open=False): | |
| gr.Markdown(""" | |
| **Model:** KittenML/kitten-tts-nano-0.1 | |
| **Features:** | |
| - High-quality text-to-speech synthesis | |
| - Works without GPU acceleration | |
| - Multiple voice options | |
| - Adjustable speech speed | |
| - 24kHz audio output | |
| **Usage:** | |
| 1. Enter your text in the text box | |
| 2. Select a voice from the dropdown | |
| 3. Adjust the speech speed if needed | |
| 4. Click "Generate Speech" to create audio | |
| Generated files are saved in temporary directory with unique UUID filenames. | |
| """) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown, speed_slider], | |
| outputs=[audio_output] | |
| ) | |
| # Auto-generate on Enter key (optional) | |
| text_input.submit( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown, speed_slider], | |
| outputs=[audio_output] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| app.queue(default_concurrency_limit=100).launch() |