๐ค Empathetic AI Companion
Your intelligent partner for emotional support and meaningful conversations
from datasets import load_dataset from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, Wav2Vec2ForCTC, Wav2Vec2Processor from sentence_transformers import SentenceTransformer import numpy as np import random import faiss import json import logging import re import streamlit as st from datetime import datetime import os import torch import librosa from gtts import gTTS import tempfile import io import base64 import time # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ============================ # AUDIO PROCESSING UTILITIES # ============================ class AudioProcessor: def __init__(self): """Initialize audio processing components""" try: # Load Wav2Vec2 model for speech-to-text self.stt_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") self.stt_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") logger.info("โ STT model loaded successfully") except Exception as e: logger.error(f"โ Error loading STT model: {e}") self.stt_processor = None self.stt_model = None def speech_to_text_from_bytes(self, audio_bytes): """Convert speech to text from audio bytes""" if not self.stt_processor or not self.stt_model: return "STT model not available" try: # Create temporary file from bytes with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_file.write(audio_bytes) tmp_file_path = tmp_file.name # Load and preprocess audio audio_input, sr = librosa.load(tmp_file_path, sr=16000) # Clean up temp file os.unlink(tmp_file_path) # Check if audio is silent if np.max(np.abs(audio_input)) < 0.01: return "No speech detected. Please speak louder." # Process audio input_values = self.stt_processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values # Perform inference with torch.no_grad(): logits = self.stt_model(input_values).logits # Decode transcription predicted_ids = torch.argmax(logits, dim=-1) transcription = self.stt_processor.batch_decode(predicted_ids)[0] return transcription.strip() if transcription.strip() else "Could not transcribe audio" except Exception as e: logger.error(f"Error in speech-to-text: {e}") return f"Error processing audio: {str(e)}" def text_to_speech(self, text, lang='en'): """Convert text to speech using gTTS""" try: # Create TTS object tts = gTTS(text=text, lang=lang, slow=False) # Save to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tts.save(tmp_file.name) return tmp_file.name except Exception as e: logger.error(f"Error in text-to-speech: {e}") return None # ============================ # DATA PREPARATION # ============================ def prepare_dataset(): """Load and prepare the emotion dataset with error handling""" try: print("๐ Loading emotion dataset...") # Load the dataset ds = load_dataset("cardiffnlp/tweet_eval", "emotion") # Define emotion labels (matching the dataset) emotion_labels = ["anger", "joy", "optimism", "sadness"] def clean_text(text): """Clean and preprocess text""" text = text.lower() text = re.sub(r"http\S+", "", text) # remove URLs text = re.sub(r"[^\w\s]", "", text) # remove special characters text = re.sub(r"\d+", "", text) # remove numbers text = re.sub(r"\s+", " ", text) # normalize whitespace return text.strip() # Sample and prepare training data train_data = ds['train'] train_sample = random.sample(list(train_data), min(1000, len(train_data))) # Convert to RAG format rag_json = [] for row in train_sample: cleaned_text = clean_text(row['text']) if len(cleaned_text) > 10: # Filter out very short texts rag_json.append({ "text": cleaned_text, "emotion": emotion_labels[row['label']], "original_text": row['text'] }) print(f"Dataset prepared with {len(rag_json)} samples") return rag_json except Exception as e: print(f"Warning: Could not load dataset: {e}") # Return minimal fallback dataset return [ {"text": "feeling happy and excited", "emotion": "joy"}, {"text": "really angry and frustrated", "emotion": "anger"}, {"text": "sad and lonely today", "emotion": "sadness"}, {"text": "optimistic about the future", "emotion": "optimism"} ] # ============================ # FIXED EMOTION DETECTION MODEL # ============================ class EmotionDetector: def __init__(self): # Try multiple emotion models in order of preference self.model_options = [ "j-hartmann/emotion-english-distilroberta-base", "cardiffnlp/twitter-roberta-base-emotion-latest", "nateraw/bert-base-uncased-emotion", "michellejieli/emotion_text_classifier" ] self.model = None self.tokenizer = None self.classifier = None # Try loading models in order for model_name in self.model_options: try: st.info(f"๐ Trying to load {model_name}...") # Force download and load with specific parameters self.tokenizer = AutoTokenizer.from_pretrained( model_name, force_download=False, resume_download=True ) # Load model with specific device mapping to avoid meta tensor issues self.model = AutoModelForSequenceClassification.from_pretrained( model_name, force_download=False, resume_download=True, device_map=None, # Don't use device_map torch_dtype=torch.float32, # Specify dtype explicitly low_cpu_mem_usage=False # Disable low_cpu_mem_usage ) # Move to CPU explicitly if needed if torch.cuda.is_available(): self.model = self.model.to('cpu') self.classifier = pipeline( "text-classification", model=self.model, tokenizer=self.tokenizer, return_all_scores=False, device=-1 # Force CPU usage ) st.success(f"โ Successfully loaded {model_name}") break except Exception as e: st.warning(f"โ ๏ธ Failed to load {model_name}: {str(e)}") continue # Fallback to simple rule-based detection if all models fail if self.classifier is None: st.warning("โ ๏ธ All emotion models failed. Using rule-based fallback.") self.use_fallback = True else: self.use_fallback = False def detect_emotion_fallback(self, text): """Simple rule-based emotion detection as fallback""" text_lower = text.lower() # Define keyword patterns for emotions emotion_keywords = { 'joy': ['happy', 'joy', 'excited', 'thrilled', 'wonderful', 'amazing', 'great', 'fantastic', 'love', 'awesome'], 'anger': ['angry', 'mad', 'furious', 'annoyed', 'frustrated', 'irritated', 'hate', 'terrible', 'awful'], 'sadness': ['sad', 'depressed', 'upset', 'down', 'lonely', 'miserable', 'disappointed', 'heartbroken'], 'optimism': ['hope', 'optimistic', 'positive', 'confident', 'believe', 'future', 'better', 'improve'] } # Count keyword matches emotion_scores = {} for emotion, keywords in emotion_keywords.items(): score = sum(1 for keyword in keywords if keyword in text_lower) emotion_scores[emotion] = score # Get emotion with highest score if max(emotion_scores.values()) > 0: detected_emotion = max(emotion_scores, key=emotion_scores.get) confidence = min(emotion_scores[detected_emotion] * 0.3 + 0.4, 0.9) # Scale confidence else: detected_emotion = 'optimism' # Default confidence = 0.5 return detected_emotion, confidence def detect_emotion(self, text): """Detect emotion from text with fallback""" if self.use_fallback or not text.strip(): return self.detect_emotion_fallback(text) try: result = self.classifier(text) emotion = result[0]['label'].lower() confidence = result[0]['score'] # Map model outputs to our emotion categories emotion_mapping = { 'anger': 'anger', 'disgust': 'sadness', 'neutral': 'optimism', 'joy': 'joy', 'love': 'joy', 'happiness': 'joy', 'sadness': 'sadness', 'fear': 'sadness', 'surprise': 'optimism', 'optimism': 'optimism', # Additional mappings for different model outputs 'positive': 'joy', 'negative': 'sadness', 'admiration': 'joy', 'amusement': 'joy', 'annoyance': 'anger', 'approval': 'optimism', 'caring': 'joy', 'confusion': 'sadness', 'curiosity': 'optimism', 'desire': 'optimism', 'disappointment': 'sadness', 'disapproval': 'anger', 'embarrassment': 'sadness', 'excitement': 'joy', 'gratitude': 'joy', 'grief': 'sadness', 'nervousness': 'sadness', 'pride': 'joy', 'realization': 'optimism', 'relief': 'joy', 'remorse': 'sadness' } mapped_emotion = emotion_mapping.get(emotion, 'optimism') return mapped_emotion, confidence except Exception as e: logger.error(f"Error in emotion detection: {e}") # Fall back to rule-based detection return self.detect_emotion_fallback(text) # ============================ # LIGHTWEIGHT EMOTION DETECTOR (ALTERNATIVE) # ============================ class LightweightEmotionDetector: """A simple, reliable emotion detector that doesn't rely on heavy models""" def __init__(self): # Enhanced keyword-based emotion detection self.emotion_patterns = { 'joy': { 'keywords': ['happy', 'joy', 'joyful', 'excited', 'thrilled', 'wonderful', 'amazing', 'great', 'fantastic', 'love', 'awesome', 'brilliant', 'perfect', 'delighted', 'cheerful', 'elated', 'glad', 'pleased'], 'phrases': ['feel good', 'so happy', 'really excited', 'love it', 'makes me happy', 'feeling great'] }, 'anger': { 'keywords': ['angry', 'mad', 'furious', 'annoyed', 'frustrated', 'irritated', 'hate', 'terrible', 'awful', 'disgusting', 'outraged', 'livid', 'enraged', 'pissed', 'infuriated', 'resentful'], 'phrases': ['so angry', 'really mad', 'hate it', 'makes me angry', 'fed up', 'sick of'] }, 'sadness': { 'keywords': ['sad', 'depressed', 'upset', 'down', 'lonely', 'miserable', 'disappointed', 'heartbroken', 'devastated', 'hopeless', 'melancholy', 'sorrowful', 'dejected', 'despondent', 'gloomy'], 'phrases': ['feel sad', 'so down', 'really upset', 'makes me sad', 'feeling low', 'broken hearted'] }, 'optimism': { 'keywords': ['hope', 'hopeful', 'optimistic', 'positive', 'confident', 'believe', 'future', 'better', 'improve', 'progress', 'opportunity', 'potential', 'bright', 'promising', 'encouraging'], 'phrases': ['looking forward', 'things will get better', 'positive about', 'have hope', 'bright future'] } } def detect_emotion(self, text): """Detect emotion using enhanced pattern matching""" if not text.strip(): return 'optimism', 0.5 text_lower = text.lower() emotion_scores = {emotion: 0 for emotion in self.emotion_patterns.keys()} # Score based on keywords and phrases for emotion, patterns in self.emotion_patterns.items(): # Keyword matching for keyword in patterns['keywords']: if keyword in text_lower: emotion_scores[emotion] += 1 # Phrase matching (higher weight) for phrase in patterns['phrases']: if phrase in text_lower: emotion_scores[emotion] += 2 # Intensity modifiers intensifiers = ['very', 'really', 'extremely', 'so', 'absolutely', 'totally', 'completely'] intensity_boost = sum(1 for word in intensifiers if word in text_lower) * 0.5 # Get the emotion with highest score if max(emotion_scores.values()) > 0: detected_emotion = max(emotion_scores, key=emotion_scores.get) base_confidence = min(emotion_scores[detected_emotion] * 0.2 + 0.5, 0.95) confidence = min(base_confidence + intensity_boost * 0.1, 0.98) else: detected_emotion = 'optimism' # Default to optimism confidence = 0.6 return detected_emotion, confidence # ============================ # RAG SYSTEM WITH FAISS # ============================ class RAGSystem: """ Retrieval-Augmented Generation (RAG) system for selecting text templates based on user input and detected emotion. """ def __init__(self, rag_data): self.rag_data = rag_data self.texts = [entry['text'] for entry in rag_data] if len(self.texts) == 0: st.warning("โ ๏ธ No RAG data available. Using simple responses.") self.embed_model = None self.embeddings = None self.index = None return try: # Initialize embedding model self.embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Create embeddings self.embeddings = self.embed_model.encode( self.texts, convert_to_numpy=True, show_progress_bar=False ) # Create FAISS index dimension = self.embeddings.shape[1] self.index = faiss.IndexFlatL2(dimension) self.index.add(self.embeddings) except Exception as e: st.warning(f"โ ๏ธ Could not initialize RAG system: {e}") self.embed_model = None self.embeddings = None self.index = None def retrieve_templates(self, user_input, detected_emotion, top_k=3): """Retrieve relevant templates based on emotion and similarity""" if not self.embed_model or not self.index: return [] try: # Filter by emotion first emotion_filtered_indices = [ i for i, entry in enumerate(self.rag_data) if entry['emotion'] == detected_emotion ] if not emotion_filtered_indices: emotion_filtered_indices = list(range(len(self.rag_data))) # Get filtered embeddings filtered_embeddings = self.embeddings[emotion_filtered_indices] filtered_texts = [self.texts[i] for i in emotion_filtered_indices] # Create temporary index for filtered data temp_index = faiss.IndexFlatL2(filtered_embeddings.shape[1]) temp_index.add(filtered_embeddings) # Search for similar templates user_embedding = self.embed_model.encode([user_input], convert_to_numpy=True) distances, indices = temp_index.search( user_embedding, min(top_k, len(filtered_texts)) ) # Top templates top_templates = [filtered_texts[i] for i in indices[0]] return top_templates except Exception as e: logger.error(f"Error in template retrieval: {e}") return [] # ============================ # RESPONSE GENERATOR # ============================ class ResponseGenerator: def __init__(self, emotion_detector, rag_system): self.emotion_detector = emotion_detector self.rag_system = rag_system # Empathetic response templates by emotion self.response_templates = { 'anger': [ "I can understand why you're feeling frustrated. It's completely valid to feel this way.", "Your anger is understandable. Sometimes situations can be really challenging.", "I hear that you're upset, and that's okay. These feelings are important." ], 'sadness': [ "I'm sorry you're going through a difficult time. Your feelings are valid.", "It sounds like you're dealing with something really tough right now.", "I can sense your sadness, and I want you to know that it's okay to feel this way." ], 'joy': [ "I'm so happy to hear about your positive experience! That's wonderful.", "Your joy is contagious! It's great to hear such positive news.", "I love hearing about things that make you happy. That sounds amazing!" ], 'optimism': [ "Your positive outlook is inspiring. That's a great way to look at things.", "I appreciate your hopeful perspective. That's really encouraging.", "It's wonderful to hear your optimistic thoughts. Keep that positive energy!" ], 'neutral': [ "Thanks for sharing that. I hear you.", "I understand. Let's continue exploring this topic together.", "I appreciate you telling me that. Let's keep going." ] } def generate_response(self, user_input, top_k=3): """Generate empathetic response using RAG and few-shot prompting""" try: # Step 1: Detect emotion detected_emotion, confidence = self.emotion_detector.detect_emotion(user_input) # Step 2: Retrieve relevant templates (if RAG is available) templates = [] if self.rag_system and self.rag_system.embed_model: templates = self.rag_system.retrieve_templates( user_input, detected_emotion, top_k=top_k ) # Step 3: Create response using templates and emotion base_responses = self.response_templates.get( detected_emotion, self.response_templates['optimism'] ) # Combine base response with context from templates selected_base = random.choice(base_responses) # Create contextual response if templates: context_template = random.choice(templates) # Enhanced response generation response = f"{selected_base} I can relate to what you're sharing - {context_template[:80]}. Remember that your feelings are important and valid." else: response = selected_base # Add disclaimer disclaimer = "\n\nโ ๏ธ This is an automated response. For serious emotional concerns, please consult a mental health professional." return response + disclaimer, detected_emotion, confidence except Exception as e: error_msg = f"I apologize, but I encountered an error: {str(e)}" disclaimer = "\n\nโ ๏ธ This is an automated response. Please consult a professional if needed." return error_msg + disclaimer, 'neutral', 0.0 # ============================ # SIMPLE RESPONSE GENERATOR (FALLBACK) # ============================ class SimpleResponseGenerator: """Simplified response generator that works without RAG""" def __init__(self, emotion_detector): self.emotion_detector = emotion_detector # Enhanced response templates self.response_templates = { 'anger': [ "I can understand why you're feeling frustrated. It's completely valid to feel this way. Sometimes situations can be really challenging, and it's important to acknowledge these feelings.", "Your anger is understandable. When things don't go as expected, it's natural to feel upset. Would you like to talk about what's causing these feelings?", "I hear that you're upset, and that's okay. These feelings are important and deserve attention. Take a moment to breathe if you need it." ], 'sadness': [ "I'm sorry you're going through a difficult time. Your feelings are valid, and it's okay to feel sad sometimes. Remember that this feeling will pass.", "It sounds like you're dealing with something really tough right now. I want you to know that it's perfectly normal to feel this way, and you're not alone.", "I can sense your sadness, and I want you to know that it's okay to feel this way. Sometimes life presents us with challenges that naturally make us feel down." ], 'joy': [ "I'm so happy to hear about your positive experience! That's wonderful, and your joy is really uplifting. It's great when life gives us these beautiful moments.", "Your joy is contagious! It's amazing to hear such positive news. These happy moments are precious and worth celebrating.", "I love hearing about things that make you happy. That sounds absolutely amazing! Your enthusiasm is really inspiring." ], 'optimism': [ "Your positive outlook is truly inspiring. That's such a great way to look at things, and your hopefulness is really encouraging.", "I appreciate your hopeful perspective. That kind of optimism can make such a difference, not just for you but for others around you too.", "It's wonderful to hear your optimistic thoughts. Keep that positive energy flowing - it's a powerful force for good!" ] } def generate_response(self, user_input, top_k=3): """Generate response without RAG system""" try: # Detect emotion detected_emotion, confidence = self.emotion_detector.detect_emotion(user_input) # Get appropriate response template templates = self.response_templates.get(detected_emotion, self.response_templates['optimism']) selected_response = random.choice(templates) # Add personalized touch based on input length and content if len(user_input) > 100: selected_response += " I can see you've shared quite a bit with me, and I appreciate your openness." elif any(word in user_input.lower() for word in ['help', 'advice', 'what should']): selected_response += " If you'd like to talk more about this, I'm here to listen." # Add disclaimer disclaimer = "\n\nโ ๏ธ This is an automated response. For serious emotional concerns, please consult a mental health professional." return selected_response + disclaimer, detected_emotion, confidence except Exception as e: error_msg = f"I apologize, but I encountered an error: {str(e)}" disclaimer = "\n\nโ ๏ธ This is an automated response. Please consult a professional if needed." return error_msg + disclaimer, 'optimism', 0.0 # ============================ # STREAMLIT APP # ============================ def main(): # Page config with better settings st.set_page_config( page_title="Empathetic AI Companion", page_icon="๐ค", layout="wide", initial_sidebar_state="expanded" ) # CSS with modern design st.markdown(""" """, unsafe_allow_html=True) # Enhanced Header with animation st.markdown("""
Your intelligent partner for emotional support and meaningful conversations