Spaces:
Running
Running
Upload 4 files
Browse files- app.py +54 -40
- config.json +3 -3
app.py
CHANGED
|
@@ -26,15 +26,15 @@ DEFAULT_CONFIG = {
|
|
| 26 |
'max_tokens': 250,
|
| 27 |
'model': 'google/gemma-3-27b-it',
|
| 28 |
'api_key_var': 'API_KEY',
|
| 29 |
-
'theme': '
|
| 30 |
'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
|
| 31 |
'enable_dynamic_urls': True,
|
| 32 |
'enable_file_upload': True,
|
| 33 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
| 34 |
'language': 'Italian',
|
| 35 |
'enable_tts': True,
|
| 36 |
-
'tts_model': '
|
| 37 |
-
'tts_voice': '
|
| 38 |
'locked': False
|
| 39 |
}
|
| 40 |
|
|
@@ -533,51 +533,66 @@ def verify_hf_token_access() -> Tuple[bool, str]:
|
|
| 533 |
|
| 534 |
|
| 535 |
def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
|
| 536 |
-
"""Generate TTS audio using
|
| 537 |
if not ENABLE_TTS or not text:
|
| 538 |
return None, "TTS disabled or no text provided"
|
| 539 |
|
| 540 |
-
|
| 541 |
-
if not
|
| 542 |
-
return None, "⚠️
|
| 543 |
|
| 544 |
# Limit text length for TTS
|
| 545 |
-
text = text[:
|
| 546 |
|
| 547 |
-
#
|
| 548 |
-
|
|
|
|
| 549 |
|
| 550 |
for attempt in range(max_retries):
|
| 551 |
try:
|
| 552 |
-
headers = {
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
|
| 556 |
response = requests.post(
|
| 557 |
api_url,
|
| 558 |
headers=headers,
|
| 559 |
json=payload,
|
| 560 |
-
timeout=
|
| 561 |
)
|
| 562 |
|
| 563 |
if response.status_code == 200:
|
| 564 |
-
#
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
return None, "
|
| 577 |
|
| 578 |
else:
|
| 579 |
try:
|
| 580 |
-
error_msg = response.json().get('error', 'Unknown error')
|
| 581 |
except:
|
| 582 |
error_msg = response.text if response.text else 'Unknown error'
|
| 583 |
return None, f"❌ API Error ({response.status_code}): {error_msg}"
|
|
@@ -712,11 +727,11 @@ def create_interface():
|
|
| 712 |
if not last_message:
|
| 713 |
return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
|
| 714 |
|
| 715 |
-
|
| 716 |
|
| 717 |
-
if
|
| 718 |
return (
|
| 719 |
-
|
| 720 |
gr.update(visible=True),
|
| 721 |
gr.update(value=status_msg, visible=True)
|
| 722 |
)
|
|
@@ -992,20 +1007,19 @@ def create_interface():
|
|
| 992 |
info="Enable text-to-speech for assistant responses"
|
| 993 |
)
|
| 994 |
edit_tts_model = gr.Dropdown(
|
| 995 |
-
label="TTS Model",
|
| 996 |
choices=[
|
| 997 |
-
"
|
| 998 |
-
"
|
| 999 |
-
"espnet/kan-bayashi_ljspeech_vits",
|
| 1000 |
-
"microsoft/speecht5_tts"
|
| 1001 |
],
|
| 1002 |
-
value=config.get('tts_model', '
|
| 1003 |
allow_custom_value=True
|
| 1004 |
)
|
| 1005 |
edit_tts_voice = gr.Dropdown(
|
| 1006 |
label="Voice",
|
| 1007 |
-
choices=["
|
| 1008 |
-
value=config.get('tts_voice', '
|
|
|
|
| 1009 |
)
|
| 1010 |
|
| 1011 |
# Configuration actions
|
|
@@ -1095,8 +1109,8 @@ def create_interface():
|
|
| 1095 |
DEFAULT_CONFIG['enable_dynamic_urls'],
|
| 1096 |
DEFAULT_CONFIG['enable_file_upload'],
|
| 1097 |
DEFAULT_CONFIG.get('enable_tts', False),
|
| 1098 |
-
DEFAULT_CONFIG.get('tts_model', '
|
| 1099 |
-
DEFAULT_CONFIG.get('tts_voice', '
|
| 1100 |
"✅ Reset to default configuration"
|
| 1101 |
)
|
| 1102 |
else:
|
|
|
|
| 26 |
'max_tokens': 250,
|
| 27 |
'model': 'google/gemma-3-27b-it',
|
| 28 |
'api_key_var': 'API_KEY',
|
| 29 |
+
'theme': 'Base',
|
| 30 |
'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
|
| 31 |
'enable_dynamic_urls': True,
|
| 32 |
'enable_file_upload': True,
|
| 33 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
| 34 |
'language': 'Italian',
|
| 35 |
'enable_tts': True,
|
| 36 |
+
'tts_model': 'openai/tts-1-hd',
|
| 37 |
+
'tts_voice': 'onyx',
|
| 38 |
'locked': False
|
| 39 |
}
|
| 40 |
|
|
|
|
| 533 |
|
| 534 |
|
| 535 |
def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
|
| 536 |
+
"""Generate TTS audio using OpenAI's TTS API through OpenRouter"""
|
| 537 |
if not ENABLE_TTS or not text:
|
| 538 |
return None, "TTS disabled or no text provided"
|
| 539 |
|
| 540 |
+
api_key = os.getenv(API_KEY_VAR)
|
| 541 |
+
if not api_key:
|
| 542 |
+
return None, f"⚠️ {API_KEY_VAR} not configured for TTS"
|
| 543 |
|
| 544 |
# Limit text length for TTS
|
| 545 |
+
text = text[:1000] # OpenAI supports up to 4096 chars but let's be reasonable
|
| 546 |
|
| 547 |
+
# OpenAI TTS models and voices
|
| 548 |
+
model = TTS_MODEL if TTS_MODEL.startswith("openai/") else "openai/tts-1"
|
| 549 |
+
voice = TTS_VOICE if TTS_VOICE in ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] else "alloy"
|
| 550 |
|
| 551 |
for attempt in range(max_retries):
|
| 552 |
try:
|
| 553 |
+
headers = {
|
| 554 |
+
"Authorization": f"Bearer {api_key}",
|
| 555 |
+
"HTTP-Referer": "https://huggingface.co",
|
| 556 |
+
"X-Title": SPACE_NAME,
|
| 557 |
+
"Content-Type": "application/json"
|
| 558 |
+
}
|
| 559 |
|
| 560 |
+
# OpenRouter endpoint for OpenAI TTS
|
| 561 |
+
api_url = "https://openrouter.ai/api/v1/audio/speech"
|
| 562 |
+
|
| 563 |
+
payload = {
|
| 564 |
+
"model": model,
|
| 565 |
+
"input": text,
|
| 566 |
+
"voice": voice,
|
| 567 |
+
"response_format": "mp3", # Can be mp3, opus, aac, flac
|
| 568 |
+
"speed": 1.0 # 0.25 to 4.0
|
| 569 |
+
}
|
| 570 |
|
| 571 |
response = requests.post(
|
| 572 |
api_url,
|
| 573 |
headers=headers,
|
| 574 |
json=payload,
|
| 575 |
+
timeout=30
|
| 576 |
)
|
| 577 |
|
| 578 |
if response.status_code == 200:
|
| 579 |
+
# OpenAI returns MP3 audio data
|
| 580 |
+
# Convert to format Gradio expects
|
| 581 |
+
try:
|
| 582 |
+
# Save temporarily and load with a library that can read MP3
|
| 583 |
+
import tempfile
|
| 584 |
+
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp_file:
|
| 585 |
+
tmp_file.write(response.content)
|
| 586 |
+
tmp_path = tmp_file.name
|
| 587 |
+
|
| 588 |
+
# For now, return the file path - Gradio can handle MP3 files
|
| 589 |
+
return tmp_path, "✅ Audio generated successfully"
|
| 590 |
+
except Exception as e:
|
| 591 |
+
return None, f"❌ Error processing audio: {str(e)}"
|
| 592 |
|
| 593 |
else:
|
| 594 |
try:
|
| 595 |
+
error_msg = response.json().get('error', {}).get('message', 'Unknown error')
|
| 596 |
except:
|
| 597 |
error_msg = response.text if response.text else 'Unknown error'
|
| 598 |
return None, f"❌ API Error ({response.status_code}): {error_msg}"
|
|
|
|
| 727 |
if not last_message:
|
| 728 |
return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
|
| 729 |
|
| 730 |
+
audio_file, status_msg = generate_tts(last_message)
|
| 731 |
|
| 732 |
+
if audio_file:
|
| 733 |
return (
|
| 734 |
+
audio_file, # File path for Gradio to play
|
| 735 |
gr.update(visible=True),
|
| 736 |
gr.update(value=status_msg, visible=True)
|
| 737 |
)
|
|
|
|
| 1007 |
info="Enable text-to-speech for assistant responses"
|
| 1008 |
)
|
| 1009 |
edit_tts_model = gr.Dropdown(
|
| 1010 |
+
label="TTS Model",
|
| 1011 |
choices=[
|
| 1012 |
+
"openai/tts-1",
|
| 1013 |
+
"openai/tts-1-hd"
|
|
|
|
|
|
|
| 1014 |
],
|
| 1015 |
+
value=config.get('tts_model', 'openai/tts-1'),
|
| 1016 |
allow_custom_value=True
|
| 1017 |
)
|
| 1018 |
edit_tts_voice = gr.Dropdown(
|
| 1019 |
label="Voice",
|
| 1020 |
+
choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
| 1021 |
+
value=config.get('tts_voice', 'alloy'),
|
| 1022 |
+
info="alloy: neutral, echo: male, fable: british male, onyx: deep male, nova: female, shimmer: female"
|
| 1023 |
)
|
| 1024 |
|
| 1025 |
# Configuration actions
|
|
|
|
| 1109 |
DEFAULT_CONFIG['enable_dynamic_urls'],
|
| 1110 |
DEFAULT_CONFIG['enable_file_upload'],
|
| 1111 |
DEFAULT_CONFIG.get('enable_tts', False),
|
| 1112 |
+
DEFAULT_CONFIG.get('tts_model', 'openai/tts-1'),
|
| 1113 |
+
DEFAULT_CONFIG.get('tts_voice', 'alloy'),
|
| 1114 |
"✅ Reset to default configuration"
|
| 1115 |
)
|
| 1116 |
else:
|
config.json
CHANGED
|
@@ -21,7 +21,7 @@
|
|
| 21 |
"enable_dynamic_urls": true,
|
| 22 |
"enable_file_upload": true,
|
| 23 |
"enable_tts": true,
|
| 24 |
-
"tts_model": "
|
| 25 |
-
"tts_voice": "
|
| 26 |
-
"theme": "
|
| 27 |
}
|
|
|
|
| 21 |
"enable_dynamic_urls": true,
|
| 22 |
"enable_file_upload": true,
|
| 23 |
"enable_tts": true,
|
| 24 |
+
"tts_model": "openai/tts-1-hd",
|
| 25 |
+
"tts_voice": "onyx",
|
| 26 |
+
"theme": "Base"
|
| 27 |
}
|