Spaces:

minhvtt
/

ChatbotRAG

Running

App Files Files Community

minhvtt commited on 7 days ago

Commit

2ecdea6

verified ·

1 Parent(s): 75033ed

Upload 20 files

Browse files

Files changed (3) hide show

hybrid_chat_endpoint.py +23 -10
intent_classifier.py +39 -59
scenario_engine.py +17 -5

hybrid_chat_endpoint.py CHANGED Viewed

@@ -208,9 +208,9 @@ async def handle_rag_with_resume(
 ):
     """
     Handle RAG query mid-scenario
-    Answer question then remind user to continue scenario
     """
-    # Query RAG
     context_used = []
     if request.use_rag:
         query_embedding = embedding_service.encode_text(request.message)
@@ -222,18 +222,31 @@ async def handle_rag_with_resume(
         )
         context_used = results
-    # Build simple RAG response
-    rag_response = await simple_rag_response(
-        request.message,
-        context_used,
-        request.system_message
-    )
     # Add resume hint
-    last_scenario_msg = f"\n\n---\nVậy nha! Quay lại câu hỏi trước, bạn đã quyết định chưa? ^^"
     return {
-        "response": rag_response + last_scenario_msg,
         "mode": "rag_with_resume",
         "scenario_active": True,
         "context_used": context_used

 ):
     """
     Handle RAG query mid-scenario
+    Answer question properly, then remind user to continue scenario
     """
+    # Query RAG with proper search
     context_used = []
     if request.use_rag:
         query_embedding = embedding_service.encode_text(request.message)
         )
         context_used = results
+    # Build REAL RAG response (not placeholder)
+    if context_used and len(context_used) > 0:
+        # Format top results nicely
+        top_result = context_used[0]
+        text = top_result['metadata'].get('text', '')
+        # Extract most relevant snippet (first 300 chars)
+        if text:
+            rag_response = text[:300].strip()
+            if len(text) > 300:
+                rag_response += "..."
+        else:
+            rag_response = "Tôi tìm thấy thông tin nhưng không thể hiển thị chi tiết."
+        # If multiple results, add count
+        if len(context_used) > 1:
+            rag_response += f"\n\n(Tìm thấy {len(context_used)} kết quả liên quan)"
+    else:
+        rag_response = "Xin lỗi, tôi không tìm thấy thông tin về câu hỏi này trong tài liệu."
     # Add resume hint
+    resume_hint = "\n\n---\n💬 Vậy nha! Quay lại câu hỏi trước, bạn đã quyết định chưa?"
     return {
+        "response": rag_response + resume_hint,
         "mode": "rag_with_resume",
         "scenario_active": True,
         "context_used": context_used

intent_classifier.py CHANGED Viewed

@@ -42,7 +42,11 @@ class IntentClassifier:
             "đậu xe", "parking", "gửi xe",
             # Contact
-            "liên hệ", "contact", "số điện thoại"
         ]
     def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
@@ -84,63 +88,62 @@ class IntentClassifier:
         conversation_state: Optional[Dict] = None
     ) -> str:
         """
-        Classify user intent
-        Args:
-            message: User message
-            conversation_state: Current conversation state (optional)
-                {
-                    "active_scenario": "price_inquiry" | null,
-                    "scenario_step": 3,
-                    "scenario_data": {...}
-                }
         Returns:
-            Intent string:
-            - "scenario:<scenario_id>" - Start new scenario
-            - "scenario:continue" - Continue current scenario
-            - "rag:general" - General RAG query
-            - "rag:with_resume" - RAG query but resume scenario after
         """
         message_lower = message.lower().strip()
-        state = conversation_state or {}
-        # Check if in active scenario
-        in_scenario = state.get("active_scenario") is not None
-        if in_scenario:
-            # User is mid-scenario
-            # Check if message is off-topic question
-            if self._is_general_question(message_lower):
                 return "rag:with_resume"
             else:
-                # Continue scenario (user answering scenario question)
                 return "scenario:continue"
-        # Not in scenario - check for new scenario triggers
         for scenario_id, patterns in self.scenario_patterns.items():
-            if self._matches_any_pattern(message_lower, patterns):
-                return f"scenario:{scenario_id}"
-        # Default: general RAG query
         return "rag:general"
-    def _is_general_question(self, message: str) -> bool:
-        """
-        Check if message is a general question (should use RAG)
-        """
-        return self._matches_any_pattern(message, self.general_patterns)
     def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
         """
         Check if message matches any pattern in list
         """
         for pattern in patterns:
-            # Simple substring match (case insensitive already done)
             if pattern in message:
                 return True
-            # Check word boundary
             if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
                 return True
@@ -183,26 +186,3 @@ class IntentClassifier:
         Dynamically add new general question patterns
         """
         self.general_patterns.extend(patterns)
-# Example usage
-if __name__ == "__main__":
-    classifier = IntentClassifier()
-    # Test cases
-    test_cases = [
-        ("giá vé bao nhiêu?", None),
-        ("sự kiện ở đâu?", None),
-        ("đặt vé cho tôi", None),
-        ("A show", {"active_scenario": "price_inquiry", "scenario_step": 1}),
-        ("sự kiện mấy giờ?", {"active_scenario": "price_inquiry", "scenario_step": 3}),
-    ]
-    print("Intent Classification Test:")
-    print("-" * 50)
-    for message, state in test_cases:
-        intent = classifier.classify(message, state)
-        print(f"Message: {message}")
-        print(f"State: {state}")
-        print(f"Intent: {intent}")
-        print()

             "đậu xe", "parking", "gửi xe",
             # Contact
+            "liên hệ", "contact", "số điện thoại",
+            # Events/content - NEW (Bug fix #3)
+            "sự kiện", "event", "đâu", "show nào",
+            "line-up", "lineup", "performer"
         ]
     def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
         conversation_state: Optional[Dict] = None
     ) -> str:
         """
+        Classify user intent with IMPROVED mid-scenario detection (Bug fix #3)
         Returns:
+            - "scenario:{scenario_id}" - Trigger new scenario
+            - "scenario:continue" - Continue active scenario
+            - "rag:general" - General RAG query (no active scenario)
+            - "rag:with_resume" - RAG query mid-scenario (then resume)
         """
         message_lower = message.lower().strip()
+        # Check if user is in active scenario
+        active_scenario = conversation_state.get('active_scenario') if conversation_state else None
+        if active_scenario:
+            # User is in a scenario - check if this is off-topic or continuation
+            # IMPROVED: Detect off-topic questions better
+            # Check for question words + patterns
+            question_indicators = ["?", "đâu", "gì", "sao", "where", "what", "how", "when"]
+            has_question = any(q in message_lower for q in question_indicators)
+            # Check if matches general patterns
+            matches_general = self._matches_any_pattern(message_lower, self.general_patterns)
+            # Short messages with questions are likely off-topic
+            word_count = len(message_lower.split())
+            is_short_question = word_count <= 4 and has_question
+            # Decision logic
+            if matches_general or is_short_question:
+                # User asking off-topic question → RAG with resume
+                print(f"🔀 Off-topic detected: '{message}' → rag:with_resume")
                 return "rag:with_resume"
             else:
+                # Normal scenario continuation
                 return "scenario:continue"
+        # Not in scenario - check for scenario triggers
         for scenario_id, patterns in self.scenario_patterns.items():
+            for pattern in patterns:
+                if pattern.lower() in message_lower:
+                    return f"scenario:{scenario_id}"
+        # No scenario match - general RAG query
         return "rag:general"
     def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
         """
         Check if message matches any pattern in list
         """
         for pattern in patterns:
+            # Simple substring match
             if pattern in message:
                 return True
+            # Word boundary check
             if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
                 return True
         Dynamically add new general question patterns
         """
         self.general_patterns.extend(patterns)

scenario_engine.py CHANGED Viewed

@@ -248,10 +248,12 @@ class ScenarioEngine:
         3. Merged template vars
         """
         # Layer 1: Base data (initial + user inputs)
         template_data = {
-            'event_name': scenario_data.get('event_name', 'sự kiện này'),
-            'mood': scenario_data.get('mood', ''),
-            'interest': scenario_data.get('interest', ''),
             **scenario_data  # Include all scenario data
         }
@@ -278,10 +280,20 @@ class ScenarioEngine:
                 return step_config['bot_message_template'].format(**template_data)
             except KeyError as e:
                 print(f"⚠ Template var missing: {e}")
-                # Fallback to message without placeholders
-                return step_config.get('bot_message', step_config['bot_message_template'])
         return step_config.get('bot_message', '')
     def _execute_rag_query(self, query: str, rag_service: Any) -> str:
         """

         3. Merged template vars
         """
         # Layer 1: Base data (initial + user inputs)
+        # Map common template vars from scenario_data
         template_data = {
+            'event_name': scenario_data.get('event_name', scenario_data.get('step_1_input', 'sự kiện này')),
+            'mood': scenario_data.get('mood', scenario_data.get('step_1_input', '')),
+            'interest': scenario_data.get('interest', scenario_data.get('step_1_input', '')),
+            'interest_tag': scenario_data.get('interest_tag', scenario_data.get('step_1_input', '')),
             **scenario_data  # Include all scenario data
         }
                 return step_config['bot_message_template'].format(**template_data)
             except KeyError as e:
                 print(f"⚠ Template var missing: {e}")
+                print(f"📋 Available vars: {list(template_data.keys())}")
+                # Fallback: replace missing vars with placeholder
+                import re
+                message = step_config['bot_message_template']
+                # Find all {var} patterns
+                missing_vars = re.findall(r'\{(\w+)\}', message)
+                for var in missing_vars:
+                    if var not in template_data:
+                        template_data[var] = f"[{var}]"
+                        print(f"⚠ Adding placeholder for: {var}")
+                return message.format(**template_data)
         return step_config.get('bot_message', '')
     def _execute_rag_query(self, query: str, rag_service: Any) -> str:
         """