minhvtt commited on
Commit
2ecdea6
·
verified ·
1 Parent(s): 75033ed

Upload 20 files

Browse files
Files changed (3) hide show
  1. hybrid_chat_endpoint.py +23 -10
  2. intent_classifier.py +39 -59
  3. scenario_engine.py +17 -5
hybrid_chat_endpoint.py CHANGED
@@ -208,9 +208,9 @@ async def handle_rag_with_resume(
208
  ):
209
  """
210
  Handle RAG query mid-scenario
211
- Answer question then remind user to continue scenario
212
  """
213
- # Query RAG
214
  context_used = []
215
  if request.use_rag:
216
  query_embedding = embedding_service.encode_text(request.message)
@@ -222,18 +222,31 @@ async def handle_rag_with_resume(
222
  )
223
  context_used = results
224
 
225
- # Build simple RAG response
226
- rag_response = await simple_rag_response(
227
- request.message,
228
- context_used,
229
- request.system_message
230
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  # Add resume hint
233
- last_scenario_msg = f"\n\n---\nVậy nha! Quay lại câu hỏi trước, bạn đã quyết định chưa? ^^"
234
 
235
  return {
236
- "response": rag_response + last_scenario_msg,
237
  "mode": "rag_with_resume",
238
  "scenario_active": True,
239
  "context_used": context_used
 
208
  ):
209
  """
210
  Handle RAG query mid-scenario
211
+ Answer question properly, then remind user to continue scenario
212
  """
213
+ # Query RAG with proper search
214
  context_used = []
215
  if request.use_rag:
216
  query_embedding = embedding_service.encode_text(request.message)
 
222
  )
223
  context_used = results
224
 
225
+ # Build REAL RAG response (not placeholder)
226
+ if context_used and len(context_used) > 0:
227
+ # Format top results nicely
228
+ top_result = context_used[0]
229
+ text = top_result['metadata'].get('text', '')
230
+
231
+ # Extract most relevant snippet (first 300 chars)
232
+ if text:
233
+ rag_response = text[:300].strip()
234
+ if len(text) > 300:
235
+ rag_response += "..."
236
+ else:
237
+ rag_response = "Tôi tìm thấy thông tin nhưng không thể hiển thị chi tiết."
238
+
239
+ # If multiple results, add count
240
+ if len(context_used) > 1:
241
+ rag_response += f"\n\n(Tìm thấy {len(context_used)} kết quả liên quan)"
242
+ else:
243
+ rag_response = "Xin lỗi, tôi không tìm thấy thông tin về câu hỏi này trong tài liệu."
244
 
245
  # Add resume hint
246
+ resume_hint = "\n\n---\n💬 Vậy nha! Quay lại câu hỏi trước, bạn đã quyết định chưa?"
247
 
248
  return {
249
+ "response": rag_response + resume_hint,
250
  "mode": "rag_with_resume",
251
  "scenario_active": True,
252
  "context_used": context_used
intent_classifier.py CHANGED
@@ -42,7 +42,11 @@ class IntentClassifier:
42
  "đậu xe", "parking", "gửi xe",
43
 
44
  # Contact
45
- "liên hệ", "contact", "số điện thoại"
 
 
 
 
46
  ]
47
 
48
  def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
@@ -84,63 +88,62 @@ class IntentClassifier:
84
  conversation_state: Optional[Dict] = None
85
  ) -> str:
86
  """
87
- Classify user intent
88
-
89
- Args:
90
- message: User message
91
- conversation_state: Current conversation state (optional)
92
- {
93
- "active_scenario": "price_inquiry" | null,
94
- "scenario_step": 3,
95
- "scenario_data": {...}
96
- }
97
 
98
  Returns:
99
- Intent string:
100
- - "scenario:<scenario_id>" - Start new scenario
101
- - "scenario:continue" - Continue current scenario
102
- - "rag:general" - General RAG query
103
- - "rag:with_resume" - RAG query but resume scenario after
104
  """
105
  message_lower = message.lower().strip()
106
- state = conversation_state or {}
107
 
108
- # Check if in active scenario
109
- in_scenario = state.get("active_scenario") is not None
110
 
111
- if in_scenario:
112
- # User is mid-scenario
113
- # Check if message is off-topic question
114
- if self._is_general_question(message_lower):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return "rag:with_resume"
116
  else:
117
- # Continue scenario (user answering scenario question)
118
  return "scenario:continue"
119
 
120
- # Not in scenario - check for new scenario triggers
121
  for scenario_id, patterns in self.scenario_patterns.items():
122
- if self._matches_any_pattern(message_lower, patterns):
123
- return f"scenario:{scenario_id}"
 
124
 
125
- # Default: general RAG query
126
  return "rag:general"
127
 
128
- def _is_general_question(self, message: str) -> bool:
129
- """
130
- Check if message is a general question (should use RAG)
131
- """
132
- return self._matches_any_pattern(message, self.general_patterns)
133
-
134
  def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
135
  """
136
  Check if message matches any pattern in list
137
  """
138
  for pattern in patterns:
139
- # Simple substring match (case insensitive already done)
140
  if pattern in message:
141
  return True
142
 
143
- # Check word boundary
144
  if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
145
  return True
146
 
@@ -183,26 +186,3 @@ class IntentClassifier:
183
  Dynamically add new general question patterns
184
  """
185
  self.general_patterns.extend(patterns)
186
-
187
-
188
- # Example usage
189
- if __name__ == "__main__":
190
- classifier = IntentClassifier()
191
-
192
- # Test cases
193
- test_cases = [
194
- ("giá vé bao nhiêu?", None),
195
- ("sự kiện ở đâu?", None),
196
- ("đặt vé cho tôi", None),
197
- ("A show", {"active_scenario": "price_inquiry", "scenario_step": 1}),
198
- ("sự kiện mấy giờ?", {"active_scenario": "price_inquiry", "scenario_step": 3}),
199
- ]
200
-
201
- print("Intent Classification Test:")
202
- print("-" * 50)
203
- for message, state in test_cases:
204
- intent = classifier.classify(message, state)
205
- print(f"Message: {message}")
206
- print(f"State: {state}")
207
- print(f"Intent: {intent}")
208
- print()
 
42
  "đậu xe", "parking", "gửi xe",
43
 
44
  # Contact
45
+ "liên hệ", "contact", "số điện thoại",
46
+
47
+ # Events/content - NEW (Bug fix #3)
48
+ "sự kiện", "event", "đâu", "show nào",
49
+ "line-up", "lineup", "performer"
50
  ]
51
 
52
  def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
 
88
  conversation_state: Optional[Dict] = None
89
  ) -> str:
90
  """
91
+ Classify user intent with IMPROVED mid-scenario detection (Bug fix #3)
 
 
 
 
 
 
 
 
 
92
 
93
  Returns:
94
+ - "scenario:{scenario_id}" - Trigger new scenario
95
+ - "scenario:continue" - Continue active scenario
96
+ - "rag:general" - General RAG query (no active scenario)
97
+ - "rag:with_resume" - RAG query mid-scenario (then resume)
 
98
  """
99
  message_lower = message.lower().strip()
 
100
 
101
+ # Check if user is in active scenario
102
+ active_scenario = conversation_state.get('active_scenario') if conversation_state else None
103
 
104
+ if active_scenario:
105
+ # User is in a scenario - check if this is off-topic or continuation
106
+
107
+ # IMPROVED: Detect off-topic questions better
108
+ # Check for question words + patterns
109
+ question_indicators = ["?", "đâu", "gì", "sao", "where", "what", "how", "when"]
110
+ has_question = any(q in message_lower for q in question_indicators)
111
+
112
+ # Check if matches general patterns
113
+ matches_general = self._matches_any_pattern(message_lower, self.general_patterns)
114
+
115
+ # Short messages with questions are likely off-topic
116
+ word_count = len(message_lower.split())
117
+ is_short_question = word_count <= 4 and has_question
118
+
119
+ # Decision logic
120
+ if matches_general or is_short_question:
121
+ # User asking off-topic question → RAG with resume
122
+ print(f"🔀 Off-topic detected: '{message}' → rag:with_resume")
123
  return "rag:with_resume"
124
  else:
125
+ # Normal scenario continuation
126
  return "scenario:continue"
127
 
128
+ # Not in scenario - check for scenario triggers
129
  for scenario_id, patterns in self.scenario_patterns.items():
130
+ for pattern in patterns:
131
+ if pattern.lower() in message_lower:
132
+ return f"scenario:{scenario_id}"
133
 
134
+ # No scenario match - general RAG query
135
  return "rag:general"
136
 
 
 
 
 
 
 
137
  def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
138
  """
139
  Check if message matches any pattern in list
140
  """
141
  for pattern in patterns:
142
+ # Simple substring match
143
  if pattern in message:
144
  return True
145
 
146
+ # Word boundary check
147
  if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
148
  return True
149
 
 
186
  Dynamically add new general question patterns
187
  """
188
  self.general_patterns.extend(patterns)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scenario_engine.py CHANGED
@@ -248,10 +248,12 @@ class ScenarioEngine:
248
  3. Merged template vars
249
  """
250
  # Layer 1: Base data (initial + user inputs)
 
251
  template_data = {
252
- 'event_name': scenario_data.get('event_name', 'sự kiện này'),
253
- 'mood': scenario_data.get('mood', ''),
254
- 'interest': scenario_data.get('interest', ''),
 
255
  **scenario_data # Include all scenario data
256
  }
257
 
@@ -278,10 +280,20 @@ class ScenarioEngine:
278
  return step_config['bot_message_template'].format(**template_data)
279
  except KeyError as e:
280
  print(f"⚠ Template var missing: {e}")
281
- # Fallback to message without placeholders
282
- return step_config.get('bot_message', step_config['bot_message_template'])
 
 
 
 
 
 
 
 
 
283
 
284
  return step_config.get('bot_message', '')
 
285
 
286
  def _execute_rag_query(self, query: str, rag_service: Any) -> str:
287
  """
 
248
  3. Merged template vars
249
  """
250
  # Layer 1: Base data (initial + user inputs)
251
+ # Map common template vars from scenario_data
252
  template_data = {
253
+ 'event_name': scenario_data.get('event_name', scenario_data.get('step_1_input', 'sự kiện này')),
254
+ 'mood': scenario_data.get('mood', scenario_data.get('step_1_input', '')),
255
+ 'interest': scenario_data.get('interest', scenario_data.get('step_1_input', '')),
256
+ 'interest_tag': scenario_data.get('interest_tag', scenario_data.get('step_1_input', '')),
257
  **scenario_data # Include all scenario data
258
  }
259
 
 
280
  return step_config['bot_message_template'].format(**template_data)
281
  except KeyError as e:
282
  print(f"⚠ Template var missing: {e}")
283
+ print(f"📋 Available vars: {list(template_data.keys())}")
284
+ # Fallback: replace missing vars with placeholder
285
+ import re
286
+ message = step_config['bot_message_template']
287
+ # Find all {var} patterns
288
+ missing_vars = re.findall(r'\{(\w+)\}', message)
289
+ for var in missing_vars:
290
+ if var not in template_data:
291
+ template_data[var] = f"[{var}]"
292
+ print(f"⚠ Adding placeholder for: {var}")
293
+ return message.format(**template_data)
294
 
295
  return step_config.get('bot_message', '')
296
+
297
 
298
  def _execute_rag_query(self, query: str, rag_service: Any) -> str:
299
  """