Autism_QA / main_basic_interface.py
A7m0d's picture
Upload folder using huggingface_hub
712579e verified
import os
import sys
import re
import numpy as np
from dotenv import load_dotenv
import gradio as gr
from audio_utils import process_input_and_generate_speech, GeminiHandler, generate_tts_response
import google.genai as genai
from fastrtc import WebRTC, get_cloudflare_turn_credentials_async
from gradio.utils import get_space
from utils import process_query
from audio_utils import get_transcription_or_text
from logger.custom_logger import CustomLoggerTracker
# Import enhanced functions from specific_utils
from gradio_utils import (
get_all_document_choices,
enhanced_document_upload_handler,
enhanced_audio_transcription,
process_text_with_audio_support,
process_audio_only_response,
process_both_text_and_audio_response,
create_document_info_panel,
get_enhanced_css
)
# Setup
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
load_dotenv()
# Initialize Gemini client properly
gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
custom_log = CustomLoggerTracker()
logger = custom_log.get_logger("main")
logger.info("Logger initialized for main module")
def parse_score_safely(score_str):
"""Safely parse hallucination score, handling extra characters."""
if not score_str:
return 0
# Extract just the number from strings like "4**" or "Score: 4"
import re
numbers = re.findall(r'\d+', str(score_str))
if numbers:
return int(numbers[0])
return 0
def process_text_only(user_input, audio_input, chat_history):
"""Process input and generate only text response using enhanced functions."""
try:
# Use enhanced text processing with audio support
new_history, cleared_input, status_msg, stats_display = process_text_with_audio_support(
user_input, audio_input, chat_history
)
return new_history, status_msg, cleared_input, None
except Exception as e:
logger.error(f"Error in enhanced text processing: {e}")
return chat_history, f"Status: Error - {str(e)}", "", None
def process_audio_only(user_input, audio_input, voice_dropdown, chat_history):
"""Process input and generate only audio response using enhanced functions."""
try:
# Use enhanced audio processing
audio_response, cleared_input, status_msg, stats_display = process_audio_only_response(
user_input, audio_input, voice_dropdown, chat_history
)
return audio_response, status_msg, cleared_input, None
except Exception as e:
logger.error(f"Error in enhanced audio processing: {e}")
return None, f"Status: Error - {str(e)}", "", None
def process_both_text_and_audio(text_input, audio_input, voice_dropdown, chat_history):
"""Process input and generate both text and audio responses using enhanced functions."""
try:
# Use enhanced combined processing
new_history, audio_response, cleared_input, status_msg, stats_display = process_both_text_and_audio_response(
text_input, audio_input, voice_dropdown, chat_history
)
return new_history, audio_response, status_msg, cleared_input, None
except Exception as e:
logger.error(f"Error in enhanced combined processing: {e}")
return chat_history, None, f"Status: Error - {str(e)}", "", None
def toggle_user_doc_visibility(selected_type):
"""Toggle visibility of user document options."""
return gr.update(visible=(selected_type == "User-Specific Document"))
def validate_environment() -> bool:
"""Validate required environment variables are present."""
required_env_vars = [
"GEMINI_API_KEY",
"SILICONFLOW_API_KEY",
"SILICONFLOW_CHAT_URL"
]
missing_vars = []
for var in required_env_vars:
if not os.getenv(var):
missing_vars.append(var)
if missing_vars:
logger.warning(f"Missing environment variables: {', '.join(missing_vars)}")
return False
logger.info("All required environment variables are present")
return True
def clear_chat():
"""Clear chat history."""
return [], None, "Status: Chat cleared."
# Gradio Interface Configuration
image_path = "assets/Compumacy-Logo-Trans2.png"
# Dark Mode Professional Theme with Custom CSS
dark_theme_css = """
/* Dark Mode Theme */
.gradio-container {
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f1419 100%) !important;
color: #e0e6ed !important;
border-radius: 20px !important;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3) !important;
max-width: 1200px !important;
margin: auto;
}
/* Header styling */
.gradio-container h1 {
color: #ffffff !important;
text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important;
}
/* Logo container */
.logo-container {
background: rgba(255, 255, 255, 0.1) !important;
border-radius: 15px !important;
padding: 10px !important;
backdrop-filter: blur(10px) !important;
border: 1px solid rgba(255, 255, 255, 0.1) !important;
}
/* Chatbot styling */
.chatbot {
background: rgba(255, 255, 255, 0.05) !important;
border: 1px solid rgba(255, 255, 255, 0.1) !important;
border-radius: 15px !important;
}
.chatbot .message {
background: rgba(255, 255, 255, 0.08) !important;
border: 1px solid rgba(255, 255, 255, 0.1) !important;
border-radius: 10px !important;
color: #e0e6ed !important;
}
.chatbot .message.user {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
color: white !important;
}
.chatbot .message.bot {
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
color: white !important;
}
/* Input fields */
.gr-textbox, .gr-dropdown, .gr-file {
background: rgba(255, 255, 255, 0.1) !important;
border: 1px solid rgba(255, 255, 255, 0.2) !important;
border-radius: 10px !important;
color: #e0e6ed !important;
backdrop-filter: blur(5px) !important;
}
.gr-textbox::placeholder {
color: rgba(224, 230, 237, 0.6) !important;
}
/* Buttons */
.gr-button {
border-radius: 10px !important;
border: none !important;
font-weight: 600 !important;
text-transform: uppercase !important;
letter-spacing: 0.5px !important;
transition: all 0.3s ease !important;
backdrop-filter: blur(10px) !important;
}
.text-button {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
color: white !important;
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;
}
.text-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
}
.audio-button {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
color: white !important;
box-shadow: 0 4px 15px rgba(240, 147, 251, 0.3) !important;
}
.audio-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(240, 147, 251, 0.4) !important;
}
.both-button {
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
color: white !important;
box-shadow: 0 4px 15px rgba(79, 172, 254, 0.3) !important;
}
.both-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(79, 172, 254, 0.4) !important;
}
/* Clear button */
.clear-button {
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%) !important;
color: white !important;
box-shadow: 0 4px 15px rgba(255, 107, 107, 0.3) !important;
}
.clear-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(255, 107, 107, 0.4) !important;
}
/* Labels and text */
label, .gr-form label {
color: #e0e6ed !important;
font-weight: 500 !important;
}
/* Audio component */
.gr-audio {
background: rgba(255, 255, 255, 0.1) !important;
border: 1px solid rgba(255, 255, 255, 0.2) !important;
border-radius: 15px !important;
backdrop-filter: blur(10px) !important;
}
/* Accordion */
.gr-accordion {
background: rgba(255, 255, 255, 0.05) !important;
border: 1px solid rgba(255, 255, 255, 0.1) !important;
border-radius: 15px !important;
}
/* Info boxes */
.info-box {
background: rgba(79, 172, 254, 0.1) !important;
border-left: 4px solid #4facfe !important;
border-radius: 10px !important;
padding: 15px !important;
margin: 10px 0 !important;
backdrop-filter: blur(5px) !important;
}
/* Status output */
.status-output {
background: rgba(255, 255, 255, 0.1) !important;
border: 1px solid rgba(255, 255, 255, 0.2) !important;
border-radius: 10px !important;
color: #e0e6ed !important;
}
/* Hide default footer */
footer {
display: none !important;
}
/* Scrollbar styling */
::-webkit-scrollbar {
width: 8px;
}
::-webkit-scrollbar-track {
background: rgba(255, 255, 255, 0.1);
border-radius: 4px;
}
::-webkit-scrollbar-thumb {
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
}
/* Responsive design */
@media (max-width: 768px) {
.gradio-container {
margin: 10px !important;
border-radius: 15px !important;
}
}
"""
with gr.Blocks(
title="Wisal Chatbot - Autism AI Assistant",
theme=gr.themes.Base(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter")
),
css=dark_theme_css
) as demo:
gr.HTML("""
<div style="text-align: center; padding: 30px 0;">
<div style="background: linear-gradient(135deg, rgba(79, 172, 254, 0.2) 0%, rgba(118, 75, 162, 0.2) 100%);
border-radius: 20px; padding: 20px; margin-bottom: 30px;
border: 1px solid rgba(255, 255, 255, 0.1);">
<h1 style="font-size: 2.5em; font-weight: 700; margin: 0;
background: linear-gradient(135deg, #4facfe 0%, #764ba2 100%);
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
text-shadow: none;">
πŸ€– Wisal: Autism AI Assistant
</h1>
<p style="font-size: 1.2em; margin: 10px 0 0 0; color: #a0aec0; font-weight: 400;">
Your personalized AI assistant designed specifically for individuals with autism
</p>
</div>
</div>
""")
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=200):
if os.path.exists(image_path):
gr.Image(
value=image_path,
show_label=False,
container=True,
height=150,
width=150,
elem_classes="logo-container",
show_download_button=False,
show_share_button=False
)
else:
gr.HTML("""
<div class="logo-container" style="height: 150px; display: flex; align-items: center; justify-content: center;">
<div style="font-size: 60px;">πŸ€–</div>
</div>
""")
with gr.Column(scale=4):
gr.HTML("""
<div class="info-box">
<h3 style="margin-top: 0; color: #4facfe;">How to use Wisal:</h3>
<ul style="margin-bottom: 0; color: #e0e6ed;">
<li><strong>πŸ’¬ Text & Audio:</strong> Get both written and spoken responses (recommended)</li>
<li><strong>πŸ“ Text Only:</strong> Get a written response that appears in chat history</li>
<li><strong>🎡 Audio Only:</strong> Get a spoken response without updating chat display</li>
<li><strong>🎀 Voice Input:</strong> Record audio or upload an audio file for questions</li>
</ul>
</div>
""")
# Initialize chat history as empty list
chat_history = gr.State([])
with gr.Row():
chatbot = gr.Chatbot(
type='messages',
label="πŸ’¬ Conversation with Wisal",
height=500,
avatar_images=(None, image_path if os.path.exists(image_path) else None),
bubble_full_width=False,
show_copy_button=True,
elem_classes="chatbot"
)
with gr.Row():
with gr.Column(scale=1):
audio_output = gr.Audio(
label="πŸ”Š Wisal's Voice Response",
interactive=False,
show_download_button=True,
elem_classes="gr-audio"
)
with gr.Column(scale=1):
status_output = gr.Textbox(
label="πŸ“Š System Status",
interactive=False,
max_lines=2,
elem_classes="status-output"
)
with gr.Row():
user_input = gr.Textbox(
placeholder="Ask me anything about autism...",
label="πŸ“ Your Message",
lines=3,
scale=3,
show_copy_button=True
)
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="🎀 Voice Input",
scale=2
)
with gr.Row():
voice_dropdown = gr.Dropdown(
label="πŸŽ™οΈ Choose Voice",
choices=["Kore", "Puck", "Zephyr", "Leda", "Fenrir", "Charon", "Orus", "Aoede", "Callirrhoe"],
value="Kore",
scale=2
)
# Separate buttons for different response types
with gr.Row():
text_only_btn = gr.Button(
"πŸ“ Generate Text Only",
variant="secondary",
scale=1,
elem_classes="text-button"
)
audio_only_btn = gr.Button(
"🎡 Generate Audio Only",
variant="secondary",
scale=1,
elem_classes="audio-button"
)
both_btn = gr.Button(
"πŸ’¬ Generate Text & Audio",
variant="primary",
scale=1,
elem_classes="both-button"
)
# Connect the interactions
text_only_btn.click(
fn=process_text_only,
inputs=[user_input, audio_input, chat_history],
outputs=[chatbot, status_output, user_input, audio_input]
)
audio_only_btn.click(
fn=process_audio_only,
inputs=[user_input, audio_input, voice_dropdown, chat_history],
outputs=[audio_output, status_output, user_input, audio_input]
)
both_btn.click(
fn=process_both_text_and_audio,
inputs=[user_input, audio_input, voice_dropdown, chat_history],
outputs=[chatbot, audio_output, status_output, user_input, audio_input]
)
# Keep the original submit functionality for the text input (defaulting to both)
user_input.submit(
fn=process_both_text_and_audio,
inputs=[user_input, audio_input, voice_dropdown, chat_history],
outputs=[chatbot, audio_output, status_output, user_input, audio_input]
)
gr.HTML("<div style='margin: 30px 0;'></div>")
with gr.Accordion("πŸ”§ Advanced Options (Live Chat & Document Q&A)", open=False):
with gr.Row():
with gr.Column():
try:
webrtc2 = WebRTC(
label="🎀 Live Voice Chat",
modality="audio",
mode="send-receive",
elem_id="audio-source",
rtc_configuration=get_cloudflare_turn_credentials_async,
icon="https://www.gstatic.com/lamda/images/gemini_favicon.png"
)
webrtc2.stream(
GeminiHandler(),
inputs=[webrtc2],
outputs=[webrtc2],
time_limit=180 if get_space() else None,
concurrency_limit=2 if get_space() else None
)
except Exception as e:
logger.warning(f"WebRTC setup failed: {e}")
gr.HTML("""
<div class="info-box" style="border-left-color: #f5576c;">
<strong>⚠️ Live Chat temporarily unavailable</strong>
<p>Please use the text and audio inputs above instead.</p>
</div>
""")
with gr.Row():
with gr.Column():
# Enhanced document upload with all 4 options
doc_file = gr.File(
label="πŸ“Ž Upload Document (PDF, DOCX, TXT)",
file_types=[".pdf", ".docx", ".txt"]
)
# Fixed dropdown with all 4 document types
doc_type = gr.Dropdown(
label="πŸ“„ Document Type",
choices=get_all_document_choices(),
value="user_specific",
elem_classes="gr-dropdown"
)
# Optional query field for immediate Q&A
doc_query = gr.Textbox(
label="πŸ’­ Optional: Ask about this document",
placeholder="What does this document say about...",
lines=2,
elem_classes="gr-textbox"
)
# Upload button
upload_btn = gr.Button(
"πŸ“€ Upload & Process",
variant="primary",
elem_classes="both-button"
)
# Upload status display
upload_status = gr.Textbox(
label="πŸ“Š Upload Status",
interactive=False,
lines=4,
elem_classes="status-output"
)
with gr.Column():
# Document info panel
doc_info = gr.HTML(create_document_info_panel())
# Connect upload button to enhanced handler
upload_btn.click(
fn=enhanced_document_upload_handler,
inputs=[doc_file, doc_type, doc_query],
outputs=[upload_status]
)
with gr.Row():
clear_btn = gr.Button(
"πŸ—‘οΈ Clear Chat",
variant="stop",
elem_classes="clear-button"
)
clear_btn.click(
fn=clear_chat,
outputs=[chatbot, audio_output, status_output]
)
# Add usage guide at the bottom
gr.HTML("""
<div class="info-box" style="margin-top: 30px;">
<h3 style="margin-top: 0; color: #4facfe;">πŸ’‘ Usage Guide:</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px;">
<div>
<h4 style="color: #667eea;">πŸ“ Text Only Mode</h4>
<p>Perfect for quick questions when you want to read the response and save it in chat history.</p>
</div>
<div>
<h4 style="color: #f5576c;">🎡 Audio Only Mode</h4>
<p>Great for hands-free interaction when you want to listen to responses without cluttering the chat.</p>
</div>
<div>
<h4 style="color: #4facfe;">πŸ’¬ Text & Audio Mode</h4>
<p>Best of both worlds - see and hear responses, perfect for learning and accessibility.</p>
</div>
</div>
</div>
""")
# Add this test function to main.py temporarily
def test_pipeline_response():
"""Test the pipeline response"""
try:
response = process_query("What is autism?")
print(f"Response type: {type(response)}")
print(f"Response length: {len(str(response))}")
print(f"Response preview: {str(response)[:200]}...")
return True
except Exception as e:
print(f"Test failed: {e}")
return False
def main():
"""Main entry point for the application."""
logger.info("Starting Wisal application...")
# Validate environment before starting
if not validate_environment():
logger.error("Environment validation failed. Please check your .env file.")
return
try:
# Test Gemini client connection
logger.info("Testing Gemini client connection...")
# Launch the application
demo.launch(
server_port=8080,
server_name="0.0.0.0", # Allow external connections
share=False, # Set to True if you want to create a public link
favicon_path=image_path if os.path.exists(image_path) else None,
show_error=True
)
except Exception as e:
logger.error(f"Failed to start application: {e}")