Spaces:

A7m0d
/

Autism_QA

Runtime error

App Files Files Community

Autism_QA / main_basic_interface.py

A7m0d

Upload folder using huggingface_hub

712579e verified about 2 months ago

raw

history blame contribute delete

21.2 kB

	import os
	import sys
	import re
	import numpy as np
	from dotenv import load_dotenv
	import gradio as gr
	from audio_utils import process_input_and_generate_speech, GeminiHandler, generate_tts_response
	import google.genai as genai
	from fastrtc import WebRTC, get_cloudflare_turn_credentials_async
	from gradio.utils import get_space
	from utils import process_query
	from audio_utils import get_transcription_or_text
	from logger.custom_logger import CustomLoggerTracker

	# Import enhanced functions from specific_utils
	from gradio_utils import (
	get_all_document_choices,
	enhanced_document_upload_handler,
	enhanced_audio_transcription,
	process_text_with_audio_support,
	process_audio_only_response,
	process_both_text_and_audio_response,
	create_document_info_panel,
	get_enhanced_css
	)

	# Setup
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))
	load_dotenv()

	# Initialize Gemini client properly
	gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

	custom_log = CustomLoggerTracker()
	logger = custom_log.get_logger("main")
	logger.info("Logger initialized for main module")


	def parse_score_safely(score_str):
	"""Safely parse hallucination score, handling extra characters."""
	if not score_str:
	return 0

	# Extract just the number from strings like "4**" or "Score: 4"
	import re
	numbers = re.findall(r'\d+', str(score_str))
	if numbers:
	return int(numbers[0])
	return 0


	def process_text_only(user_input, audio_input, chat_history):
	"""Process input and generate only text response using enhanced functions."""
	try:
	# Use enhanced text processing with audio support
	new_history, cleared_input, status_msg, stats_display = process_text_with_audio_support(
	user_input, audio_input, chat_history
	)
	return new_history, status_msg, cleared_input, None

	except Exception as e:
	logger.error(f"Error in enhanced text processing: {e}")
	return chat_history, f"Status: Error - {str(e)}", "", None


	def process_audio_only(user_input, audio_input, voice_dropdown, chat_history):
	"""Process input and generate only audio response using enhanced functions."""
	try:
	# Use enhanced audio processing
	audio_response, cleared_input, status_msg, stats_display = process_audio_only_response(
	user_input, audio_input, voice_dropdown, chat_history
	)
	return audio_response, status_msg, cleared_input, None

	except Exception as e:
	logger.error(f"Error in enhanced audio processing: {e}")
	return None, f"Status: Error - {str(e)}", "", None


	def process_both_text_and_audio(text_input, audio_input, voice_dropdown, chat_history):
	"""Process input and generate both text and audio responses using enhanced functions."""
	try:
	# Use enhanced combined processing
	new_history, audio_response, cleared_input, status_msg, stats_display = process_both_text_and_audio_response(
	text_input, audio_input, voice_dropdown, chat_history
	)
	return new_history, audio_response, status_msg, cleared_input, None

	except Exception as e:
	logger.error(f"Error in enhanced combined processing: {e}")
	return chat_history, None, f"Status: Error - {str(e)}", "", None


	def toggle_user_doc_visibility(selected_type):
	"""Toggle visibility of user document options."""
	return gr.update(visible=(selected_type == "User-Specific Document"))


	def validate_environment() -> bool:
	"""Validate required environment variables are present."""
	required_env_vars = [
	"GEMINI_API_KEY",
	"SILICONFLOW_API_KEY",
	"SILICONFLOW_CHAT_URL"
	]

	missing_vars = []
	for var in required_env_vars:
	if not os.getenv(var):
	missing_vars.append(var)

	if missing_vars:
	logger.warning(f"Missing environment variables: {', '.join(missing_vars)}")
	return False

	logger.info("All required environment variables are present")
	return True


	def clear_chat():
	"""Clear chat history."""
	return [], None, "Status: Chat cleared."


	# Gradio Interface Configuration
	image_path = "assets/Compumacy-Logo-Trans2.png"

	# Dark Mode Professional Theme with Custom CSS
	dark_theme_css = """
	/* Dark Mode Theme */
	.gradio-container {
	background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f1419 100%) !important;
	color: #e0e6ed !important;
	border-radius: 20px !important;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3) !important;
	max-width: 1200px !important;
	margin: auto;
	}

	/* Header styling */
	.gradio-container h1 {
	color: #ffffff !important;
	text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important;
	}

	/* Logo container */
	.logo-container {
	background: rgba(255, 255, 255, 0.1) !important;
	border-radius: 15px !important;
	padding: 10px !important;
	backdrop-filter: blur(10px) !important;
	border: 1px solid rgba(255, 255, 255, 0.1) !important;
	}

	/* Chatbot styling */
	.chatbot {
	background: rgba(255, 255, 255, 0.05) !important;
	border: 1px solid rgba(255, 255, 255, 0.1) !important;
	border-radius: 15px !important;
	}

	.chatbot .message {
	background: rgba(255, 255, 255, 0.08) !important;
	border: 1px solid rgba(255, 255, 255, 0.1) !important;
	border-radius: 10px !important;
	color: #e0e6ed !important;
	}

	.chatbot .message.user {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	color: white !important;
	}

	.chatbot .message.bot {
	background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
	color: white !important;
	}

	/* Input fields */
	.gr-textbox, .gr-dropdown, .gr-file {
	background: rgba(255, 255, 255, 0.1) !important;
	border: 1px solid rgba(255, 255, 255, 0.2) !important;
	border-radius: 10px !important;
	color: #e0e6ed !important;
	backdrop-filter: blur(5px) !important;
	}

	.gr-textbox::placeholder {
	color: rgba(224, 230, 237, 0.6) !important;
	}

	/* Buttons */
	.gr-button {
	border-radius: 10px !important;
	border: none !important;
	font-weight: 600 !important;
	text-transform: uppercase !important;
	letter-spacing: 0.5px !important;
	transition: all 0.3s ease !important;
	backdrop-filter: blur(10px) !important;
	}

	.text-button {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	color: white !important;
	box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;
	}

	.text-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
	}

	.audio-button {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
	color: white !important;
	box-shadow: 0 4px 15px rgba(240, 147, 251, 0.3) !important;
	}

	.audio-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 20px rgba(240, 147, 251, 0.4) !important;
	}

	.both-button {
	background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
	color: white !important;
	box-shadow: 0 4px 15px rgba(79, 172, 254, 0.3) !important;
	}

	.both-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 20px rgba(79, 172, 254, 0.4) !important;
	}

	/* Clear button */
	.clear-button {
	background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%) !important;
	color: white !important;
	box-shadow: 0 4px 15px rgba(255, 107, 107, 0.3) !important;
	}

	.clear-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 20px rgba(255, 107, 107, 0.4) !important;
	}

	/* Labels and text */
	label, .gr-form label {
	color: #e0e6ed !important;
	font-weight: 500 !important;
	}

	/* Audio component */
	.gr-audio {
	background: rgba(255, 255, 255, 0.1) !important;
	border: 1px solid rgba(255, 255, 255, 0.2) !important;
	border-radius: 15px !important;
	backdrop-filter: blur(10px) !important;
	}

	/* Accordion */
	.gr-accordion {
	background: rgba(255, 255, 255, 0.05) !important;
	border: 1px solid rgba(255, 255, 255, 0.1) !important;
	border-radius: 15px !important;
	}

	/* Info boxes */
	.info-box {
	background: rgba(79, 172, 254, 0.1) !important;
	border-left: 4px solid #4facfe !important;
	border-radius: 10px !important;
	padding: 15px !important;
	margin: 10px 0 !important;
	backdrop-filter: blur(5px) !important;
	}

	/* Status output */
	.status-output {
	background: rgba(255, 255, 255, 0.1) !important;
	border: 1px solid rgba(255, 255, 255, 0.2) !important;
	border-radius: 10px !important;
	color: #e0e6ed !important;
	}

	/* Hide default footer */
	footer {
	display: none !important;
	}

	/* Scrollbar styling */
	::-webkit-scrollbar {
	width: 8px;
	}

	::-webkit-scrollbar-track {
	background: rgba(255, 255, 255, 0.1);
	border-radius: 4px;
	}

	::-webkit-scrollbar-thumb {
	background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
	border-radius: 4px;
	}

	::-webkit-scrollbar-thumb:hover {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	}

	/* Responsive design */
	@media (max-width: 768px) {
	.gradio-container {
	margin: 10px !important;
	border-radius: 15px !important;
	}
	}
	"""

	with gr.Blocks(
	title="Wisal Chatbot - Autism AI Assistant",
	theme=gr.themes.Base(
	primary_hue="blue",
	secondary_hue="purple",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter")
	),
	css=dark_theme_css
	) as demo:

	gr.HTML("""
	<div style="text-align: center; padding: 30px 0;">
	<div style="background: linear-gradient(135deg, rgba(79, 172, 254, 0.2) 0%, rgba(118, 75, 162, 0.2) 100%);
	border-radius: 20px; padding: 20px; margin-bottom: 30px;
	border: 1px solid rgba(255, 255, 255, 0.1);">
	<h1 style="font-size: 2.5em; font-weight: 700; margin: 0;
	background: linear-gradient(135deg, #4facfe 0%, #764ba2 100%);
	-webkit-background-clip: text; -webkit-text-fill-color: transparent;
	text-shadow: none;">
	🤖 Wisal: Autism AI Assistant
	</h1>
	<p style="font-size: 1.2em; margin: 10px 0 0 0; color: #a0aec0; font-weight: 400;">
	Your personalized AI assistant designed specifically for individuals with autism
	</p>
	</div>
	</div>
	""")

	with gr.Row(equal_height=False):
	with gr.Column(scale=1, min_width=200):
	if os.path.exists(image_path):
	gr.Image(
	value=image_path,
	show_label=False,
	container=True,
	height=150,
	width=150,
	elem_classes="logo-container",
	show_download_button=False,
	show_share_button=False
	)
	else:
	gr.HTML("""
	<div class="logo-container" style="height: 150px; display: flex; align-items: center; justify-content: center;">
	<div style="font-size: 60px;">🤖</div>
	</div>
	""")

	with gr.Column(scale=4):
	gr.HTML("""
	<div class="info-box">
	<h3 style="margin-top: 0; color: #4facfe;">How to use Wisal:</h3>
	<ul style="margin-bottom: 0; color: #e0e6ed;">
	<li><strong>💬 Text & Audio:</strong> Get both written and spoken responses (recommended)</li>
	<li><strong>📝 Text Only:</strong> Get a written response that appears in chat history</li>
	<li><strong>🎵 Audio Only:</strong> Get a spoken response without updating chat display</li>
	<li><strong>🎤 Voice Input:</strong> Record audio or upload an audio file for questions</li>
	</ul>
	</div>
	""")

	# Initialize chat history as empty list
	chat_history = gr.State([])

	with gr.Row():
	chatbot = gr.Chatbot(
	type='messages',
	label="💬 Conversation with Wisal",
	height=500,
	avatar_images=(None, image_path if os.path.exists(image_path) else None),
	bubble_full_width=False,
	show_copy_button=True,
	elem_classes="chatbot"
	)

	with gr.Row():
	with gr.Column(scale=1):
	audio_output = gr.Audio(
	label="🔊 Wisal's Voice Response",
	interactive=False,
	show_download_button=True,
	elem_classes="gr-audio"
	)
	with gr.Column(scale=1):
	status_output = gr.Textbox(
	label="📊 System Status",
	interactive=False,
	max_lines=2,
	elem_classes="status-output"
	)

	with gr.Row():
	user_input = gr.Textbox(
	placeholder="Ask me anything about autism...",
	label="📝 Your Message",
	lines=3,
	scale=3,
	show_copy_button=True
	)
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="🎤 Voice Input",
	scale=2
	)

	with gr.Row():
	voice_dropdown = gr.Dropdown(
	label="🎙️ Choose Voice",
	choices=["Kore", "Puck", "Zephyr", "Leda", "Fenrir", "Charon", "Orus", "Aoede", "Callirrhoe"],
	value="Kore",
	scale=2
	)

	# Separate buttons for different response types
	with gr.Row():
	text_only_btn = gr.Button(
	"📝 Generate Text Only",
	variant="secondary",
	scale=1,
	elem_classes="text-button"
	)
	audio_only_btn = gr.Button(
	"🎵 Generate Audio Only",
	variant="secondary",
	scale=1,
	elem_classes="audio-button"
	)
	both_btn = gr.Button(
	"💬 Generate Text & Audio",
	variant="primary",
	scale=1,
	elem_classes="both-button"
	)

	# Connect the interactions
	text_only_btn.click(
	fn=process_text_only,
	inputs=[user_input, audio_input, chat_history],
	outputs=[chatbot, status_output, user_input, audio_input]
	)

	audio_only_btn.click(
	fn=process_audio_only,
	inputs=[user_input, audio_input, voice_dropdown, chat_history],
	outputs=[audio_output, status_output, user_input, audio_input]
	)

	both_btn.click(
	fn=process_both_text_and_audio,
	inputs=[user_input, audio_input, voice_dropdown, chat_history],
	outputs=[chatbot, audio_output, status_output, user_input, audio_input]
	)

	# Keep the original submit functionality for the text input (defaulting to both)
	user_input.submit(
	fn=process_both_text_and_audio,
	inputs=[user_input, audio_input, voice_dropdown, chat_history],
	outputs=[chatbot, audio_output, status_output, user_input, audio_input]
	)

	gr.HTML("<div style='margin: 30px 0;'></div>")

	with gr.Accordion("🔧 Advanced Options (Live Chat & Document Q&A)", open=False):
	with gr.Row():
	with gr.Column():
	try:
	webrtc2 = WebRTC(
	label="🎤 Live Voice Chat",
	modality="audio",
	mode="send-receive",
	elem_id="audio-source",
	rtc_configuration=get_cloudflare_turn_credentials_async,
	icon="https://www.gstatic.com/lamda/images/gemini_favicon.png"
	)
	webrtc2.stream(
	GeminiHandler(),
	inputs=[webrtc2],
	outputs=[webrtc2],
	time_limit=180 if get_space() else None,
	concurrency_limit=2 if get_space() else None
	)
	except Exception as e:
	logger.warning(f"WebRTC setup failed: {e}")
	gr.HTML("""
	<div class="info-box" style="border-left-color: #f5576c;">
	<strong>⚠️ Live Chat temporarily unavailable</strong>
	<p>Please use the text and audio inputs above instead.</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	# Enhanced document upload with all 4 options
	doc_file = gr.File(
	label="📎 Upload Document (PDF, DOCX, TXT)",
	file_types=[".pdf", ".docx", ".txt"]
	)

	# Fixed dropdown with all 4 document types
	doc_type = gr.Dropdown(
	label="📄 Document Type",
	choices=get_all_document_choices(),
	value="user_specific",
	elem_classes="gr-dropdown"
	)

	# Optional query field for immediate Q&A
	doc_query = gr.Textbox(
	label="💭 Optional: Ask about this document",
	placeholder="What does this document say about...",
	lines=2,
	elem_classes="gr-textbox"
	)

	# Upload button
	upload_btn = gr.Button(
	"📤 Upload & Process",
	variant="primary",
	elem_classes="both-button"
	)

	# Upload status display
	upload_status = gr.Textbox(
	label="📊 Upload Status",
	interactive=False,
	lines=4,
	elem_classes="status-output"
	)

	with gr.Column():
	# Document info panel
	doc_info = gr.HTML(create_document_info_panel())

	# Connect upload button to enhanced handler
	upload_btn.click(
	fn=enhanced_document_upload_handler,
	inputs=[doc_file, doc_type, doc_query],
	outputs=[upload_status]
	)

	with gr.Row():
	clear_btn = gr.Button(
	"🗑️ Clear Chat",
	variant="stop",
	elem_classes="clear-button"
	)
	clear_btn.click(
	fn=clear_chat,
	outputs=[chatbot, audio_output, status_output]
	)

	# Add usage guide at the bottom
	gr.HTML("""
	<div class="info-box" style="margin-top: 30px;">
	<h3 style="margin-top: 0; color: #4facfe;">💡 Usage Guide:</h3>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px;">
	<div>
	<h4 style="color: #667eea;">📝 Text Only Mode</h4>
	<p>Perfect for quick questions when you want to read the response and save it in chat history.</p>
	</div>
	<div>
	<h4 style="color: #f5576c;">🎵 Audio Only Mode</h4>
	<p>Great for hands-free interaction when you want to listen to responses without cluttering the chat.</p>
	</div>
	<div>
	<h4 style="color: #4facfe;">💬 Text & Audio Mode</h4>
	<p>Best of both worlds - see and hear responses, perfect for learning and accessibility.</p>
	</div>
	</div>
	</div>
	""")

	# Add this test function to main.py temporarily
	def test_pipeline_response():
	"""Test the pipeline response"""
	try:
	response = process_query("What is autism?")
	print(f"Response type: {type(response)}")
	print(f"Response length: {len(str(response))}")
	print(f"Response preview: {str(response)[:200]}...")
	return True
	except Exception as e:
	print(f"Test failed: {e}")
	return False



	def main():
	"""Main entry point for the application."""
	logger.info("Starting Wisal application...")

	# Validate environment before starting
	if not validate_environment():
	logger.error("Environment validation failed. Please check your .env file.")
	return

	try:
	# Test Gemini client connection
	logger.info("Testing Gemini client connection...")

	# Launch the application
	demo.launch(
	server_port=8080,
	server_name="0.0.0.0", # Allow external connections
	share=False, # Set to True if you want to create a public link
	favicon_path=image_path if os.path.exists(image_path) else None,
	show_error=True
	)
	except Exception as e:
	logger.error(f"Failed to start application: {e}")