Spaces:
Running
Running
Upload 20 files
Browse files- hybrid_chat_endpoint.py +23 -10
- intent_classifier.py +39 -59
- scenario_engine.py +17 -5
hybrid_chat_endpoint.py
CHANGED
|
@@ -208,9 +208,9 @@ async def handle_rag_with_resume(
|
|
| 208 |
):
|
| 209 |
"""
|
| 210 |
Handle RAG query mid-scenario
|
| 211 |
-
Answer question then remind user to continue scenario
|
| 212 |
"""
|
| 213 |
-
# Query RAG
|
| 214 |
context_used = []
|
| 215 |
if request.use_rag:
|
| 216 |
query_embedding = embedding_service.encode_text(request.message)
|
|
@@ -222,18 +222,31 @@ async def handle_rag_with_resume(
|
|
| 222 |
)
|
| 223 |
context_used = results
|
| 224 |
|
| 225 |
-
# Build
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
context_used
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
# Add resume hint
|
| 233 |
-
|
| 234 |
|
| 235 |
return {
|
| 236 |
-
"response": rag_response +
|
| 237 |
"mode": "rag_with_resume",
|
| 238 |
"scenario_active": True,
|
| 239 |
"context_used": context_used
|
|
|
|
| 208 |
):
|
| 209 |
"""
|
| 210 |
Handle RAG query mid-scenario
|
| 211 |
+
Answer question properly, then remind user to continue scenario
|
| 212 |
"""
|
| 213 |
+
# Query RAG with proper search
|
| 214 |
context_used = []
|
| 215 |
if request.use_rag:
|
| 216 |
query_embedding = embedding_service.encode_text(request.message)
|
|
|
|
| 222 |
)
|
| 223 |
context_used = results
|
| 224 |
|
| 225 |
+
# Build REAL RAG response (not placeholder)
|
| 226 |
+
if context_used and len(context_used) > 0:
|
| 227 |
+
# Format top results nicely
|
| 228 |
+
top_result = context_used[0]
|
| 229 |
+
text = top_result['metadata'].get('text', '')
|
| 230 |
+
|
| 231 |
+
# Extract most relevant snippet (first 300 chars)
|
| 232 |
+
if text:
|
| 233 |
+
rag_response = text[:300].strip()
|
| 234 |
+
if len(text) > 300:
|
| 235 |
+
rag_response += "..."
|
| 236 |
+
else:
|
| 237 |
+
rag_response = "Tôi tìm thấy thông tin nhưng không thể hiển thị chi tiết."
|
| 238 |
+
|
| 239 |
+
# If multiple results, add count
|
| 240 |
+
if len(context_used) > 1:
|
| 241 |
+
rag_response += f"\n\n(Tìm thấy {len(context_used)} kết quả liên quan)"
|
| 242 |
+
else:
|
| 243 |
+
rag_response = "Xin lỗi, tôi không tìm thấy thông tin về câu hỏi này trong tài liệu."
|
| 244 |
|
| 245 |
# Add resume hint
|
| 246 |
+
resume_hint = "\n\n---\n💬 Vậy nha! Quay lại câu hỏi trước, bạn đã quyết định chưa?"
|
| 247 |
|
| 248 |
return {
|
| 249 |
+
"response": rag_response + resume_hint,
|
| 250 |
"mode": "rag_with_resume",
|
| 251 |
"scenario_active": True,
|
| 252 |
"context_used": context_used
|
intent_classifier.py
CHANGED
|
@@ -42,7 +42,11 @@ class IntentClassifier:
|
|
| 42 |
"đậu xe", "parking", "gửi xe",
|
| 43 |
|
| 44 |
# Contact
|
| 45 |
-
"liên hệ", "contact", "số điện thoại"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
]
|
| 47 |
|
| 48 |
def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
|
|
@@ -84,63 +88,62 @@ class IntentClassifier:
|
|
| 84 |
conversation_state: Optional[Dict] = None
|
| 85 |
) -> str:
|
| 86 |
"""
|
| 87 |
-
Classify user intent
|
| 88 |
-
|
| 89 |
-
Args:
|
| 90 |
-
message: User message
|
| 91 |
-
conversation_state: Current conversation state (optional)
|
| 92 |
-
{
|
| 93 |
-
"active_scenario": "price_inquiry" | null,
|
| 94 |
-
"scenario_step": 3,
|
| 95 |
-
"scenario_data": {...}
|
| 96 |
-
}
|
| 97 |
|
| 98 |
Returns:
|
| 99 |
-
|
| 100 |
-
- "scenario
|
| 101 |
-
- "
|
| 102 |
-
- "rag:
|
| 103 |
-
- "rag:with_resume" - RAG query but resume scenario after
|
| 104 |
"""
|
| 105 |
message_lower = message.lower().strip()
|
| 106 |
-
state = conversation_state or {}
|
| 107 |
|
| 108 |
-
# Check if in active scenario
|
| 109 |
-
|
| 110 |
|
| 111 |
-
if
|
| 112 |
-
# User is
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
return "rag:with_resume"
|
| 116 |
else:
|
| 117 |
-
#
|
| 118 |
return "scenario:continue"
|
| 119 |
|
| 120 |
-
# Not in scenario - check for
|
| 121 |
for scenario_id, patterns in self.scenario_patterns.items():
|
| 122 |
-
|
| 123 |
-
|
|
|
|
| 124 |
|
| 125 |
-
#
|
| 126 |
return "rag:general"
|
| 127 |
|
| 128 |
-
def _is_general_question(self, message: str) -> bool:
|
| 129 |
-
"""
|
| 130 |
-
Check if message is a general question (should use RAG)
|
| 131 |
-
"""
|
| 132 |
-
return self._matches_any_pattern(message, self.general_patterns)
|
| 133 |
-
|
| 134 |
def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
|
| 135 |
"""
|
| 136 |
Check if message matches any pattern in list
|
| 137 |
"""
|
| 138 |
for pattern in patterns:
|
| 139 |
-
# Simple substring match
|
| 140 |
if pattern in message:
|
| 141 |
return True
|
| 142 |
|
| 143 |
-
#
|
| 144 |
if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
|
| 145 |
return True
|
| 146 |
|
|
@@ -183,26 +186,3 @@ class IntentClassifier:
|
|
| 183 |
Dynamically add new general question patterns
|
| 184 |
"""
|
| 185 |
self.general_patterns.extend(patterns)
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
# Example usage
|
| 189 |
-
if __name__ == "__main__":
|
| 190 |
-
classifier = IntentClassifier()
|
| 191 |
-
|
| 192 |
-
# Test cases
|
| 193 |
-
test_cases = [
|
| 194 |
-
("giá vé bao nhiêu?", None),
|
| 195 |
-
("sự kiện ở đâu?", None),
|
| 196 |
-
("đặt vé cho tôi", None),
|
| 197 |
-
("A show", {"active_scenario": "price_inquiry", "scenario_step": 1}),
|
| 198 |
-
("sự kiện mấy giờ?", {"active_scenario": "price_inquiry", "scenario_step": 3}),
|
| 199 |
-
]
|
| 200 |
-
|
| 201 |
-
print("Intent Classification Test:")
|
| 202 |
-
print("-" * 50)
|
| 203 |
-
for message, state in test_cases:
|
| 204 |
-
intent = classifier.classify(message, state)
|
| 205 |
-
print(f"Message: {message}")
|
| 206 |
-
print(f"State: {state}")
|
| 207 |
-
print(f"Intent: {intent}")
|
| 208 |
-
print()
|
|
|
|
| 42 |
"đậu xe", "parking", "gửi xe",
|
| 43 |
|
| 44 |
# Contact
|
| 45 |
+
"liên hệ", "contact", "số điện thoại",
|
| 46 |
+
|
| 47 |
+
# Events/content - NEW (Bug fix #3)
|
| 48 |
+
"sự kiện", "event", "đâu", "show nào",
|
| 49 |
+
"line-up", "lineup", "performer"
|
| 50 |
]
|
| 51 |
|
| 52 |
def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
|
|
|
|
| 88 |
conversation_state: Optional[Dict] = None
|
| 89 |
) -> str:
|
| 90 |
"""
|
| 91 |
+
Classify user intent with IMPROVED mid-scenario detection (Bug fix #3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
Returns:
|
| 94 |
+
- "scenario:{scenario_id}" - Trigger new scenario
|
| 95 |
+
- "scenario:continue" - Continue active scenario
|
| 96 |
+
- "rag:general" - General RAG query (no active scenario)
|
| 97 |
+
- "rag:with_resume" - RAG query mid-scenario (then resume)
|
|
|
|
| 98 |
"""
|
| 99 |
message_lower = message.lower().strip()
|
|
|
|
| 100 |
|
| 101 |
+
# Check if user is in active scenario
|
| 102 |
+
active_scenario = conversation_state.get('active_scenario') if conversation_state else None
|
| 103 |
|
| 104 |
+
if active_scenario:
|
| 105 |
+
# User is in a scenario - check if this is off-topic or continuation
|
| 106 |
+
|
| 107 |
+
# IMPROVED: Detect off-topic questions better
|
| 108 |
+
# Check for question words + patterns
|
| 109 |
+
question_indicators = ["?", "đâu", "gì", "sao", "where", "what", "how", "when"]
|
| 110 |
+
has_question = any(q in message_lower for q in question_indicators)
|
| 111 |
+
|
| 112 |
+
# Check if matches general patterns
|
| 113 |
+
matches_general = self._matches_any_pattern(message_lower, self.general_patterns)
|
| 114 |
+
|
| 115 |
+
# Short messages with questions are likely off-topic
|
| 116 |
+
word_count = len(message_lower.split())
|
| 117 |
+
is_short_question = word_count <= 4 and has_question
|
| 118 |
+
|
| 119 |
+
# Decision logic
|
| 120 |
+
if matches_general or is_short_question:
|
| 121 |
+
# User asking off-topic question → RAG with resume
|
| 122 |
+
print(f"🔀 Off-topic detected: '{message}' → rag:with_resume")
|
| 123 |
return "rag:with_resume"
|
| 124 |
else:
|
| 125 |
+
# Normal scenario continuation
|
| 126 |
return "scenario:continue"
|
| 127 |
|
| 128 |
+
# Not in scenario - check for scenario triggers
|
| 129 |
for scenario_id, patterns in self.scenario_patterns.items():
|
| 130 |
+
for pattern in patterns:
|
| 131 |
+
if pattern.lower() in message_lower:
|
| 132 |
+
return f"scenario:{scenario_id}"
|
| 133 |
|
| 134 |
+
# No scenario match - general RAG query
|
| 135 |
return "rag:general"
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
|
| 138 |
"""
|
| 139 |
Check if message matches any pattern in list
|
| 140 |
"""
|
| 141 |
for pattern in patterns:
|
| 142 |
+
# Simple substring match
|
| 143 |
if pattern in message:
|
| 144 |
return True
|
| 145 |
|
| 146 |
+
# Word boundary check
|
| 147 |
if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
|
| 148 |
return True
|
| 149 |
|
|
|
|
| 186 |
Dynamically add new general question patterns
|
| 187 |
"""
|
| 188 |
self.general_patterns.extend(patterns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scenario_engine.py
CHANGED
|
@@ -248,10 +248,12 @@ class ScenarioEngine:
|
|
| 248 |
3. Merged template vars
|
| 249 |
"""
|
| 250 |
# Layer 1: Base data (initial + user inputs)
|
|
|
|
| 251 |
template_data = {
|
| 252 |
-
'event_name': scenario_data.get('event_name', 'sự kiện này'),
|
| 253 |
-
'mood': scenario_data.get('mood', ''),
|
| 254 |
-
'interest': scenario_data.get('interest', ''),
|
|
|
|
| 255 |
**scenario_data # Include all scenario data
|
| 256 |
}
|
| 257 |
|
|
@@ -278,10 +280,20 @@ class ScenarioEngine:
|
|
| 278 |
return step_config['bot_message_template'].format(**template_data)
|
| 279 |
except KeyError as e:
|
| 280 |
print(f"⚠ Template var missing: {e}")
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
return step_config.get('bot_message', '')
|
|
|
|
| 285 |
|
| 286 |
def _execute_rag_query(self, query: str, rag_service: Any) -> str:
|
| 287 |
"""
|
|
|
|
| 248 |
3. Merged template vars
|
| 249 |
"""
|
| 250 |
# Layer 1: Base data (initial + user inputs)
|
| 251 |
+
# Map common template vars from scenario_data
|
| 252 |
template_data = {
|
| 253 |
+
'event_name': scenario_data.get('event_name', scenario_data.get('step_1_input', 'sự kiện này')),
|
| 254 |
+
'mood': scenario_data.get('mood', scenario_data.get('step_1_input', '')),
|
| 255 |
+
'interest': scenario_data.get('interest', scenario_data.get('step_1_input', '')),
|
| 256 |
+
'interest_tag': scenario_data.get('interest_tag', scenario_data.get('step_1_input', '')),
|
| 257 |
**scenario_data # Include all scenario data
|
| 258 |
}
|
| 259 |
|
|
|
|
| 280 |
return step_config['bot_message_template'].format(**template_data)
|
| 281 |
except KeyError as e:
|
| 282 |
print(f"⚠ Template var missing: {e}")
|
| 283 |
+
print(f"📋 Available vars: {list(template_data.keys())}")
|
| 284 |
+
# Fallback: replace missing vars with placeholder
|
| 285 |
+
import re
|
| 286 |
+
message = step_config['bot_message_template']
|
| 287 |
+
# Find all {var} patterns
|
| 288 |
+
missing_vars = re.findall(r'\{(\w+)\}', message)
|
| 289 |
+
for var in missing_vars:
|
| 290 |
+
if var not in template_data:
|
| 291 |
+
template_data[var] = f"[{var}]"
|
| 292 |
+
print(f"⚠ Adding placeholder for: {var}")
|
| 293 |
+
return message.format(**template_data)
|
| 294 |
|
| 295 |
return step_config.get('bot_message', '')
|
| 296 |
+
|
| 297 |
|
| 298 |
def _execute_rag_query(self, query: str, rag_service: Any) -> str:
|
| 299 |
"""
|