""" Intent Classifier for Hybrid RAG + FSM Chatbot Detects user intent to route between scenario flows and RAG queries """ from typing import Dict, Optional, List import re class IntentClassifier: """ Classify user intent using keyword matching Routes to either: - Scenario flows (scripted conversations) - RAG queries (knowledge retrieval) """ def __init__(self, scenarios_dir: str = "scenarios"): """ Initialize with auto-loading triggers from scenario JSON files Args: scenarios_dir: Directory containing scenario JSON files """ # Auto-load scenario patterns from JSON files self.scenario_patterns = self._load_scenario_patterns(scenarios_dir) # General question patterns (RAG) self.general_patterns = [ # Location "ở đâu", "địa điểm", "location", "where", "chỗ nào", "tổ chức tại", # Time "mấy giờ", "khi nào", "when", "time", "bao giờ", "thời gian", "ngày nào", # Info "thông tin", "info", "information", "chi tiết", "details", "về", # Parking "đậu xe", "parking", "gửi xe", # Contact "liên hệ", "contact", "số điện thoại", # Events/content - NEW (Bug fix #3) "sự kiện", "event", "đâu", "show nào", "line-up", "lineup", "performer" ] def _load_scenario_patterns(self, scenarios_dir: str) -> dict: """ Auto-load triggers from all scenario JSON files Returns: {"scenario_id": ["trigger1", "trigger2", ...]} """ import json import os patterns = {} if not os.path.exists(scenarios_dir): print(f"⚠ Scenarios directory not found: {scenarios_dir}") return patterns for filename in os.listdir(scenarios_dir): if filename.endswith('.json'): filepath = os.path.join(scenarios_dir, filename) try: with open(filepath, 'r', encoding='utf-8') as f: scenario = json.load(f) scenario_id = scenario.get('scenario_id') triggers = scenario.get('triggers', []) if scenario_id and triggers: patterns[scenario_id] = triggers print(f"✓ Loaded triggers for: {scenario_id} ({len(triggers)} patterns)") except Exception as e: print(f"⚠ Error loading {filename}: {e}") return patterns def classify( self, message: str, conversation_state: Optional[Dict] = None ) -> str: """ Classify user intent with IMPROVED mid-scenario detection (Bug fix #3) Returns: - "scenario:{scenario_id}" - Trigger new scenario - "scenario:continue" - Continue active scenario - "rag:general" - General RAG query (no active scenario) - "rag:with_resume" - RAG query mid-scenario (then resume) """ message_lower = message.lower().strip() # Check if user is in active scenario active_scenario = conversation_state.get('active_scenario') if conversation_state else None if active_scenario: # User is in a scenario - check if this is off-topic or continuation # IMPROVED: Detect off-topic questions better # Check for question words + patterns question_indicators = ["?", "đâu", "gì", "sao", "where", "what", "how", "when"] has_question = any(q in message_lower for q in question_indicators) # Check if matches general patterns matches_general = self._matches_any_pattern(message_lower, self.general_patterns) # Short messages with questions are likely off-topic word_count = len(message_lower.split()) is_short_question = word_count <= 4 and has_question # Decision logic if matches_general or is_short_question: # User asking off-topic question → RAG with resume print(f"🔀 Off-topic detected: '{message}' → rag:with_resume") return "rag:with_resume" else: # Normal scenario continuation return "scenario:continue" # Not in scenario - check for scenario triggers for scenario_id, patterns in self.scenario_patterns.items(): for pattern in patterns: if pattern.lower() in message_lower: return f"scenario:{scenario_id}" # No scenario match - general RAG query return "rag:general" def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool: """ Check if message matches any pattern in list """ for pattern in patterns: # Simple substring match if pattern in message: return True # Word boundary check if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE): return True return False def get_scenario_type(self, intent: str) -> Optional[str]: """ Extract scenario type from intent string Args: intent: "scenario:price_inquiry" or "scenario:continue" Returns: "price_inquiry" or None """ if not intent.startswith("scenario:"): return None parts = intent.split(":", 1) if len(parts) < 2: return None scenario_type = parts[1] if scenario_type == "continue": return None return scenario_type def add_scenario_pattern(self, scenario_id: str, patterns: List[str]): """ Dynamically add new scenario patterns """ if scenario_id in self.scenario_patterns: self.scenario_patterns[scenario_id].extend(patterns) else: self.scenario_patterns[scenario_id] = patterns def add_general_pattern(self, patterns: List[str]): """ Dynamically add new general question patterns """ self.general_patterns.extend(patterns)