Spaces:

minhvtt
/

ChatbotRAG

Running

App Files Files Community

minhvtt commited on 3 days ago

Commit

75033ed

verified ·

1 Parent(s): 856023d

Upload 26 files

Browse files

Files changed (16) hide show

chat_endpoint.py +283 -282
conversation_service.py +126 -2
hybrid_chat_endpoint.py +285 -0
hybrid_chat_stream.py +207 -0
intent_classifier.py +208 -0
lead_storage_service.py +90 -0
main.py +459 -33
scenario_engine.py +329 -0
scenarios/event_recommendation.json +108 -0
scenarios/exit_intent_rescue.json +38 -0
scenarios/mini_survey_lead.json +37 -0
scenarios/mood_recommendation.json +55 -0
scenarios/post_event_feedback.json +115 -0
scenarios/price_inquiry.json +103 -0
stream_utils.py +86 -0
tools_service.py +250 -233

chat_endpoint.py CHANGED Viewed

@@ -1,282 +1,283 @@
-"""
-Chat endpoint với Multi-turn Conversation + Function Calling
-"""
-from fastapi import HTTPException
-from datetime import datetime
-from huggingface_hub import InferenceClient
-from typing import Dict, List
-import json
-async def chat_endpoint(
-    request,  # ChatRequest
-    conversation_service,
-    tools_service,
-    advanced_rag,
-    embedding_service,
-    qdrant_service,
-    chat_history_collection,
-    hf_token
-):
-    """
-    Multi-turn conversational chatbot với RAG + Function Calling
-    Flow:
-    1. Session management - create hoặc load existing session
-    2. RAG search - retrieve context nếu enabled
-    3. Build messages với conversation history +  tools prompt
-    4. LLM generation - có thể trigger tool calls
-    5. Execute tools nếu cần
-    6. Final LLM response với tool results
-    7. Save to conversation history
-    """
-    try:
-        # ===== 1. SESSION MANAGEMENT =====
-        session_id = request.session_id
-        if not session_id:
-            # Create new session (server-side)
-            session_id = conversation_service.create_session(
-                metadata={"user_agent": "api", "created_via": "chat_endpoint"}
-            )
-            print(f"Created new session: {session_id}")
-        else:
-            # Validate existing session
-            if not conversation_service.session_exists(session_id):
-                raise HTTPException(
-                    status_code=404,
-                    detail=f"Session {session_id} not found. It may have expired."
-                )
-        # Load conversation history
-        conversation_history = conversation_service.get_conversation_history(session_id)
-        # ===== 2. RAG SEARCH =====
-        context_used = []
-        rag_stats = None
-        context_text = ""
-        if request.use_rag:
-            if request.use_advanced_rag:
-                # Use Advanced RAG Pipeline
-                hf_client = None
-                if request.hf_token or hf_token:
-                    hf_client = InferenceClient(token=request.hf_token or hf_token)
-                documents, stats = advanced_rag.hybrid_rag_pipeline(
-                    query=request.message,
-                    top_k=request.top_k,
-                    score_threshold=request.score_threshold,
-                    use_reranking=request.use_reranking,
-                    use_compression=request.use_compression,
-                    use_query_expansion=request.use_query_expansion,
-                    max_context_tokens=500,
-                    hf_client=hf_client
-                )
-                # Convert to dict format
-                context_used = [
-                    {
-                        "id": doc.id,
-                        "confidence": doc.confidence,
-                        "metadata": doc.metadata
-                    }
-                    for doc in documents
-                ]
-                rag_stats = stats
-                # Format context
-                context_text = advanced_rag.format_context_for_llm(documents)
-            else:
-                # Basic RAG
-                query_embedding = embedding_service.encode_text(request.message)
-                results = qdrant_service.search(
-                    query_embedding=query_embedding,
-                    limit=request.top_k,
-                    score_threshold=request.score_threshold
-                )
-                context_used = results
-                context_text = "\n\nRelevant Context:\n"
-                for i, doc in enumerate(context_used, 1):
-                    doc_text = doc["metadata"].get("text", "")
-                    if not doc_text:
-                        doc_text = " ".join(doc["metadata"].get("texts", []))
-                    confidence = doc["confidence"]
-                    context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
-        # ===== 3. BUILD MESSAGES với TOOLS PROMPT =====
-        messages = []
-        # System message với RAG context + Tools instruction
-        if request.use_rag and context_used:
-            if request.use_advanced_rag:
-                base_prompt = advanced_rag.build_rag_prompt(
-                    query="",  # Query sẽ đi trong user message
-                    context=context_text,
-                    system_message=request.system_message
-                )
-            else:
-                base_prompt = f"""{request.system_message}
-{context_text}
-HƯỚNG DẪN:
-- Sử dụng thông tin từ context trên để trả lời câu hỏi.
-- Trả lời tự nhiên, thân thiện, không copy nguyên văn.
-- Nếu tìm thấy sự kiện, hãy tóm tắt các thông tin quan trọng nhất.
-"""
-        else:
-            base_prompt = request.system_message
-        # Add tools instruction nếu enabled
-        if request.enable_tools:
-            tools_prompt = tools_service.get_tools_prompt()
-            system_message_with_tools = f"{base_prompt}\n\n{tools_prompt}"
-        else:
-            system_message_with_tools = base_prompt
-        # Bắt đầu messages với system
-        messages.append({"role": "system", "content": system_message_with_tools})
-        # Add conversation history (past turns)
-        messages.extend(conversation_history)
-        # Add current user message
-        messages.append({"role": "user", "content": request.message})
-        # ===== 4. LLM GENERATION =====
-        token = request.hf_token or hf_token
-        tool_calls_made = []
-        if not token:
-            response = f"""[LLM Response Placeholder]
-Context retrieved: {len(context_used)} documents
-User question: {request.message}
-Session: {session_id}
-To enable actual LLM generation:
-1. Set HUGGINGFACE_TOKEN environment variable, OR
-2. Pass hf_token in request body
-"""
-        else:
-            try:
-                client = InferenceClient(
-                    token=token,
-                    model="openai/gpt-oss-20b"  # Hoặc model khác
-                )
-                # First LLM call
-                first_response = ""
-                try:
-                    for msg in client.chat_completion(
-                        messages,
-                        max_tokens=request.max_tokens,
-                        stream=True,
-                        temperature=request.temperature,
-                        top_p=request.top_p,
-                    ):
-                        choices = msg.choices
-                        if len(choices) and choices[0].delta.content:
-                            first_response += choices[0].delta.content
-                except Exception as e:
-                    # HF API throws error when LLM returns JSON (tool call)
-                    # Extract the "failed_generation" from error
-                    error_str = str(e)
-                    if "tool_use_failed" in error_str and "failed_generation" in error_str:
-                        # Parse error dict to get the actual JSON response
-                        import ast
-                        try:
-                            error_dict = ast.literal_eval(error_str)
-                            first_response = error_dict.get("failed_generation", "")
-                        except:
-                            # Fallback: extract JSON from string
-                            import re
-                            match = re.search(r"'failed_generation': '({.*?})'", error_str)
-                            if match:
-                                first_response = match.group(1)
-                            else:
-                                raise e
-                    else:
-                        raise e
-                # ===== 5. PARSE & EXECUTE TOOLS =====
-                if request.enable_tools:
-                    tool_result = await tools_service.parse_and_execute(first_response)
-                    if tool_result:
-                        # Tool was called!
-                        tool_calls_made.append(tool_result)
-                        # Add tool result to messages
-                        messages.append({"role": "assistant", "content": first_response})
-                        messages.append({
-                            "role": "user",
-                            "content": f"TOOL RESULT:\n{json.dumps(tool_result['result'], ensure_ascii=False, indent=2)}\n\nHãy dùng thông tin này để trả lời câu hỏi của user."
-                        })
-                        # Second LLM call với tool results
-                        final_response = ""
-                        for msg in client.chat_completion(
-                            messages,
-                            max_tokens=request.max_tokens,
-                            stream=True,
-                            temperature=request.temperature,
-                            top_p=request.top_p,
-                        ):
-                            choices = msg.choices
-                            if len(choices) and choices[0].delta.content:
-                                final_response += choices[0].delta.content
-                        response = final_response
-                    else:
-                        # No tool call, use first response
-                        response = first_response
-                else:
-                    response = first_response
-            except Exception as e:
-                response = f"Error generating response with LLM: {str(e)}\n\nContext was retrieved successfully, but LLM generation failed."
-        # ===== 6. SAVE TO CONVERSATION HISTORY =====
-        conversation_service.add_message(
-            session_id,
-            "user",
-            request.message
-        )
-        conversation_service.add_message(
-            session_id,
-            "assistant",
-            response,
-            metadata={
-                "rag_stats": rag_stats,
-                "tool_calls": tool_calls_made,
-                "context_count": len(context_used)
-            }
-        )
-        # Also save to legacy chat_history collection
-        chat_data = {
-            "session_id": session_id,
-            "user_message": request.message,
-            "assistant_response": response,
-            "context_used": context_used,
-            "tool_calls": tool_calls_made,
-            "timestamp": datetime.utcnow()
-        }
-        chat_history_collection.insert_one(chat_data)
-        # ===== 7. RETURN RESPONSE =====
-        return {
-            "response": response,
-            "context_used": context_used,
-            "timestamp": datetime.utcnow().isoformat(),
-            "rag_stats": rag_stats,
-            "session_id": session_id,
-            "tool_calls": tool_calls_made if tool_calls_made else None
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")

+"""
+Chat endpoint với Multi-turn Conversation + Function Calling
+"""
+from fastapi import HTTPException
+from datetime import datetime
+from huggingface_hub import InferenceClient
+from typing import Dict, List
+import json
+async def chat_endpoint(
+    request,  # ChatRequest
+    conversation_service,
+    tools_service,
+    advanced_rag,
+    embedding_service,
+    qdrant_service,
+    chat_history_collection,
+    hf_token
+):
+    """
+    Multi-turn conversational chatbot với RAG + Function Calling
+    Flow:
+    1. Session management - create hoặc load existing session
+    2. RAG search - retrieve context nếu enabled
+    3. Build messages với conversation history +  tools prompt
+    4. LLM generation - có thể trigger tool calls
+    5. Execute tools nếu cần
+    6. Final LLM response với tool results
+    7. Save to conversation history
+    """
+    try:
+        # ===== 1. SESSION MANAGEMENT =====
+        session_id = request.session_id
+        if not session_id:
+            # Create new session (server-side)
+            session_id = conversation_service.create_session(
+                metadata={"user_agent": "api", "created_via": "chat_endpoint"},
+                user_id=request.user_id  # NEW: Pass user_id from request
+            )
+            print(f"Created new session: {session_id} for user: {request.user_id or 'anonymous'}")
+        else:
+            # Validate existing session
+            if not conversation_service.session_exists(session_id):
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"Session {session_id} not found. It may have expired."
+                )
+        # Load conversation history
+        conversation_history = conversation_service.get_conversation_history(session_id)
+        # ===== 2. RAG SEARCH =====
+        context_used = []
+        rag_stats = None
+        context_text = ""
+        if request.use_rag:
+            if request.use_advanced_rag:
+                # Use Advanced RAG Pipeline
+                hf_client = None
+                if request.hf_token or hf_token:
+                    hf_client = InferenceClient(token=request.hf_token or hf_token)
+                documents, stats = advanced_rag.hybrid_rag_pipeline(
+                    query=request.message,
+                    top_k=request.top_k,
+                    score_threshold=request.score_threshold,
+                    use_reranking=request.use_reranking,
+                    use_compression=request.use_compression,
+                    use_query_expansion=request.use_query_expansion,
+                    max_context_tokens=500,
+                    hf_client=hf_client
+                )
+                # Convert to dict format
+                context_used = [
+                    {
+                        "id": doc.id,
+                        "confidence": doc.confidence,
+                        "metadata": doc.metadata
+                    }
+                    for doc in documents
+                ]
+                rag_stats = stats
+                # Format context
+                context_text = advanced_rag.format_context_for_llm(documents)
+            else:
+                # Basic RAG
+                query_embedding = embedding_service.encode_text(request.message)
+                results = qdrant_service.search(
+                    query_embedding=query_embedding,
+                    limit=request.top_k,
+                    score_threshold=request.score_threshold
+                )
+                context_used = results
+                context_text = "\n\nRelevant Context:\n"
+                for i, doc in enumerate(context_used, 1):
+                    doc_text = doc["metadata"].get("text", "")
+                    if not doc_text:
+                        doc_text = " ".join(doc["metadata"].get("texts", []))
+                    confidence = doc["confidence"]
+                    context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
+        # ===== 3. BUILD MESSAGES với TOOLS PROMPT =====
+        messages = []
+        # System message với RAG context + Tools instruction
+        if request.use_rag and context_used:
+            if request.use_advanced_rag:
+                base_prompt = advanced_rag.build_rag_prompt(
+                    query="",  # Query sẽ đi trong user message
+                    context=context_text,
+                    system_message=request.system_message
+                )
+            else:
+                base_prompt = f"""{request.system_message}
+{context_text}
+HƯỚNG DẪN:
+- Sử dụng thông tin từ context trên để trả lời câu hỏi.
+- Trả lời tự nhiên, thân thiện, không copy nguyên văn.
+- Nếu tìm thấy sự kiện, hãy tóm tắt các thông tin quan trọng nhất.
+"""
+        else:
+            base_prompt = request.system_message
+        # Add tools instruction nếu enabled
+        if request.enable_tools:
+            tools_prompt = tools_service.get_tools_prompt()
+            system_message_with_tools = f"{base_prompt}\n\n{tools_prompt}"
+        else:
+            system_message_with_tools = base_prompt
+        # Bắt đầu messages với system
+        messages.append({"role": "system", "content": system_message_with_tools})
+        # Add conversation history (past turns)
+        messages.extend(conversation_history)
+        # Add current user message
+        messages.append({"role": "user", "content": request.message})
+        # ===== 4. LLM GENERATION =====
+        token = request.hf_token or hf_token
+        tool_calls_made = []
+        if not token:
+            response = f"""[LLM Response Placeholder]
+Context retrieved: {len(context_used)} documents
+User question: {request.message}
+Session: {session_id}
+To enable actual LLM generation:
+1. Set HUGGINGFACE_TOKEN environment variable, OR
+2. Pass hf_token in request body
+"""
+        else:
+            try:
+                client = InferenceClient(
+                    token=token,
+                    model="openai/gpt-oss-20b"  # Hoặc model khác
+                )
+                # First LLM call
+                first_response = ""
+                try:
+                    for msg in client.chat_completion(
+                        messages,
+                        max_tokens=request.max_tokens,
+                        stream=True,
+                        temperature=request.temperature,
+                        top_p=request.top_p,
+                    ):
+                        choices = msg.choices
+                        if len(choices) and choices[0].delta.content:
+                            first_response += choices[0].delta.content
+                except Exception as e:
+                    # HF API throws error when LLM returns JSON (tool call)
+                    # Extract the "failed_generation" from error
+                    error_str = str(e)
+                    if "tool_use_failed" in error_str and "failed_generation" in error_str:
+                        # Parse error dict to get the actual JSON response
+                        import ast
+                        try:
+                            error_dict = ast.literal_eval(error_str)
+                            first_response = error_dict.get("failed_generation", "")
+                        except:
+                            # Fallback: extract JSON from string
+                            import re
+                            match = re.search(r"'failed_generation': '({.*?})'", error_str)
+                            if match:
+                                first_response = match.group(1)
+                            else:
+                                raise e
+                    else:
+                        raise e
+                # ===== 5. PARSE & EXECUTE TOOLS =====
+                if request.enable_tools:
+                    tool_result = await tools_service.parse_and_execute(first_response)
+                    if tool_result:
+                        # Tool was called!
+                        tool_calls_made.append(tool_result)
+                        # Add tool result to messages
+                        messages.append({"role": "assistant", "content": first_response})
+                        messages.append({
+                            "role": "user",
+                            "content": f"TOOL RESULT:\n{json.dumps(tool_result['result'], ensure_ascii=False, indent=2)}\n\nHãy dùng thông tin này để trả lời câu hỏi của user."
+                        })
+                        # Second LLM call với tool results
+                        final_response = ""
+                        for msg in client.chat_completion(
+                            messages,
+                            max_tokens=request.max_tokens,
+                            stream=True,
+                            temperature=request.temperature,
+                            top_p=request.top_p,
+                        ):
+                            choices = msg.choices
+                            if len(choices) and choices[0].delta.content:
+                                final_response += choices[0].delta.content
+                        response = final_response
+                    else:
+                        # No tool call, use first response
+                        response = first_response
+                else:
+                    response = first_response
+            except Exception as e:
+                response = f"Error generating response with LLM: {str(e)}\n\nContext was retrieved successfully, but LLM generation failed."
+        # ===== 6. SAVE TO CONVERSATION HISTORY =====
+        conversation_service.add_message(
+            session_id,
+            "user",
+            request.message
+        )
+        conversation_service.add_message(
+            session_id,
+            "assistant",
+            response,
+            metadata={
+                "rag_stats": rag_stats,
+                "tool_calls": tool_calls_made,
+                "context_count": len(context_used)
+            }
+        )
+        # Also save to legacy chat_history collection
+        chat_data = {
+            "session_id": session_id,
+            "user_message": request.message,
+            "assistant_response": response,
+            "context_used": context_used,
+            "tool_calls": tool_calls_made,
+            "timestamp": datetime.utcnow()
+        }
+        chat_history_collection.insert_one(chat_data)
+        # ===== 7. RETURN RESPONSE =====
+        return {
+            "response": response,
+            "context_used": context_used,
+            "timestamp": datetime.utcnow().isoformat(),
+            "rag_stats": rag_stats,
+            "session_id": session_id,
+            "tool_calls": tool_calls_made if tool_calls_made else None
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")

conversation_service.py CHANGED Viewed

@@ -29,6 +29,7 @@ class ConversationService:
         """Create necessary indexes"""
         try:
             self.collection.create_index("session_id", unique=True)
             # Auto-delete sessions sau 7 ngày không dùng
             self.collection.create_index(
                 "updated_at",
@@ -38,10 +39,14 @@ class ConversationService:
         except Exception as e:
             print(f"Conversation indexes already exist or error: {e}")
-    def create_session(self, metadata: Optional[Dict] = None) -> str:
         """
         Create new conversation session
         Returns:
             session_id (UUID string)
         """
@@ -49,7 +54,9 @@ class ConversationService:
         self.collection.insert_one({
             "session_id": session_id,
             "messages": [],
             "metadata": metadata or {},
             "created_at": datetime.utcnow(),
             "updated_at": datetime.utcnow()
@@ -146,7 +153,7 @@ class ConversationService:
         """
         session = self.collection.find_one(
             {"session_id": session_id},
-            {"_id": 0, "session_id": 1, "created_at": 1, "updated_at": 1, "metadata": 1}
         )
         return session
@@ -182,3 +189,120 @@ class ConversationService:
                 return msg["content"]
         return None

         """Create necessary indexes"""
         try:
             self.collection.create_index("session_id", unique=True)
+            self.collection.create_index("user_id")  # NEW: Index for user filtering
             # Auto-delete sessions sau 7 ngày không dùng
             self.collection.create_index(
                 "updated_at",
         except Exception as e:
             print(f"Conversation indexes already exist or error: {e}")
+    def create_session(self, metadata: Optional[Dict] = None, user_id: Optional[str] = None) -> str:
         """
         Create new conversation session
+        Args:
+            metadata: Additional metadata
+            user_id: User identifier (optional)
         Returns:
             session_id (UUID string)
         """
         self.collection.insert_one({
             "session_id": session_id,
+            "user_id": user_id,  # NEW: Store user_id
             "messages": [],
+            "scenario_state": None,  # NEW: Scenario state
             "metadata": metadata or {},
             "created_at": datetime.utcnow(),
             "updated_at": datetime.utcnow()
         """
         session = self.collection.find_one(
             {"session_id": session_id},
+            {"_id": 0, "session_id": 1, "user_id": 1, "created_at": 1, "updated_at": 1, "metadata": 1}
         )
         return session
                 return msg["content"]
         return None
+    def list_sessions(
+        self,
+        limit: int = 50,
+        skip: int = 0,
+        sort_by: str = "updated_at",
+        descending: bool = True,
+        user_id: Optional[str] = None  # NEW: Filter by user
+    ) -> List[Dict]:
+        """
+        List all conversation sessions
+        Args:
+            limit: Maximum number of sessions to return
+            skip: Number of sessions to skip (for pagination)
+            sort_by: Field to sort by (created_at, updated_at)
+            descending: Sort in descending order
+            user_id: Filter sessions by user_id (optional)
+        Returns:
+            List of session summaries
+        """
+        sort_order = -1 if descending else 1
+        # Build query filter
+        query = {}
+        if user_id:
+            query["user_id"] = user_id
+        sessions = self.collection.find(
+            query,  # Use query filter
+            {"_id": 0, "session_id": 1, "user_id": 1, "created_at": 1, "updated_at": 1, "metadata": 1}
+        ).sort(sort_by, sort_order).skip(skip).limit(limit)
+        result = []
+        for session in sessions:
+            # Count messages
+            message_count = len(
+                self.collection.find_one({"session_id": session["session_id"]}, {"messages": 1})
+                .get("messages", [])
+            )
+            result.append({
+                "session_id": session["session_id"],
+                "user_id": session.get("user_id"),  # NEW: Include user_id
+                "created_at": session["created_at"],
+                "updated_at": session["updated_at"],
+                "message_count": message_count,
+                "metadata": session.get("metadata", {})
+            })
+        return result
+    def count_sessions(self, user_id: Optional[str] = None) -> int:
+        """
+        Get total number of sessions
+        Args:
+            user_id: Filter count by user_id (optional)
+        """
+        query = {}
+        if user_id:
+            query["user_id"] = user_id
+        return self.collection.count_documents(query)
+    # ===== Scenario State Management =====
+    def get_scenario_state(self, session_id: str) -> Optional[Dict]:
+        """
+        Get current scenario state for session
+        Returns:
+            {
+                "active_scenario": "price_inquiry",
+                "scenario_step": 3,
+                "scenario_data": {...},
+                "last_activity": "..."
+            }
+            or None if no active scenario
+        """
+        session = self.collection.find_one({"session_id": session_id})
+        if not session:
+            return None
+        return session.get("scenario_state")
+    def set_scenario_state(self, session_id: str, state: Dict):
+        """
+        Set scenario state for session
+        Args:
+            session_id: Session ID
+            state: Scenario state dict
+        """
+        self.collection.update_one(
+            {"session_id": session_id},
+            {
+                "$set": {
+                    "scenario_state": state,
+                    "updated_at": datetime.utcnow()
+                }
+            },
+            upsert=True
+        )
+    def clear_scenario(self, session_id: str):
+        """
+        Clear scenario state (end scenario)
+        """
+        self.collection.update_one(
+            {"session_id": session_id},
+            {
+                "$set": {
+                    "scenario_state": None,
+                    "updated_at": datetime.utcnow()
+                }
+            }
+        )

hybrid_chat_endpoint.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+Hybrid Chat Endpoint: RAG + Scenario FSM
+Routes between scripted scenarios and knowledge retrieval
+"""
+from fastapi import HTTPException
+from datetime import datetime
+from typing import Dict, Any
+import json
+async def hybrid_chat_endpoint(
+    request,  # ChatRequest
+    conversation_service,
+    intent_classifier,
+    scenario_engine,
+    tools_service,
+    advanced_rag,
+    embedding_service,
+    qdrant_service,
+    chat_history_collection,
+    hf_token,
+    lead_storage  # NEW: For saving customer leads
+):
+    """
+    Hybrid conversational chatbot: Scenario FSM + RAG
+    Flow:
+    1. Load session & scenario state
+    2. Classify intent (scenario vs RAG)
+    3. Route:
+       - Scenario: Execute FSM flow
+       - RAG: Knowledge retrieval
+       - RAG+Resume: Answer question then resume scenario
+    4. Save state & history
+    """
+    try:
+        # ===== SESSION MANAGEMENT =====
+        session_id = request.session_id
+        if not session_id:
+            session_id = conversation_service.create_session(
+                metadata={"user_agent": "api", "created_via": "hybrid_chat"},
+                user_id=request.user_id
+            )
+            print(f"✓ Created session: {session_id} (user: {request.user_id or 'anon'})")
+        else:
+            if not conversation_service.session_exists(session_id):
+                raise HTTPException(404, detail=f"Session {session_id} not found")
+        # ===== LOAD SCENARIO STATE =====
+        scenario_state = conversation_service.get_scenario_state(session_id) or {}
+        # ===== INTENT CLASSIFICATION =====
+        intent = intent_classifier.classify(request.message, scenario_state)
+        print(f"🎯 Intent: {intent}")
+        # ===== ROUTING =====
+        if intent.startswith("scenario:"):
+            # Route to scenario engine
+            response_data = await handle_scenario(
+                intent,
+                request.message,
+                session_id,
+                scenario_state,
+                scenario_engine,
+                conversation_service,
+                advanced_rag,
+                lead_storage  # NEW: Pass for action handling
+            )
+        elif intent == "rag:with_resume":
+            # Answer question but keep scenario active
+            response_data = await handle_rag_with_resume(
+                request,
+                session_id,
+                scenario_state,
+                advanced_rag,
+                embedding_service,
+                qdrant_service,
+                conversation_service
+            )
+        else:  # rag:general
+            # Pure RAG query
+            response_data = await handle_pure_rag(
+                request,
+                session_id,
+                advanced_rag,
+                embedding_service,
+                qdrant_service,
+                tools_service,
+                chat_history_collection,
+                hf_token,
+                conversation_service
+            )
+        # ===== SAVE HISTORY =====
+        conversation_service.add_message(
+            session_id,
+            "user",
+            request.message,
+            metadata={"intent": intent}
+        )
+        conversation_service.add_message(
+            session_id,
+            "assistant",
+            response_data["response"],
+            metadata={
+                "mode": response_data.get("mode", "unknown"),
+                "context_used": response_data.get("context_used", [])[:3]  # Limit size
+            }
+        )
+        return {
+            "response": response_data["response"],
+            "session_id": session_id,
+            "mode": response_data.get("mode"),
+            "scenario_active": response_data.get("scenario_active", False),
+            "timestamp": datetime.utcnow().isoformat()
+        }
+    except Exception as e:
+        print(f"❌ Error in hybrid_chat: {str(e)}")
+        raise HTTPException(500, detail=f"Chat error: {str(e)}")
+async def handle_scenario(
+    intent,
+    user_message,
+    session_id,
+    scenario_state,
+    scenario_engine,
+    conversation_service,
+    advanced_rag,
+    lead_storage=None
+):
+    """Handle scenario-based conversation"""
+    if intent == "scenario:continue":
+        # Continue existing scenario
+        result = scenario_engine.next_step(
+            scenario_id=scenario_state["active_scenario"],
+            current_step=scenario_state["scenario_step"],
+            user_input=user_message,
+            scenario_data=scenario_state.get("scenario_data", {}),
+            rag_service=advanced_rag
+        )
+    else:
+        # Start new scenario
+        scenario_type = intent.split(":", 1)[1]
+        result = scenario_engine.start_scenario(scenario_type)
+    # Update scenario state
+    if result.get("end_scenario"):
+        conversation_service.clear_scenario(session_id)
+        scenario_active = False
+    else:
+        conversation_service.set_scenario_state(session_id, result["new_state"])
+        scenario_active = True
+    # Execute action if any
+    if result.get("action") and lead_storage:
+        action = result['action']
+        scenario_data = result.get('new_state', {}).get('scenario_data', scenario_state.get('scenario_data', {}))
+        if action == "send_pdf_email":
+            # Save lead with email
+            lead_storage.save_lead(
+                event_name=scenario_data.get('step_1_input', 'Unknown Event'),
+                email=scenario_data.get('step_5_input'),  # Email from step 5
+                interests={
+                    "group": scenario_data.get('group_size'),
+                    "wants_pdf": True
+                },
+                session_id=session_id
+            )
+            print(f"📧 Lead saved: email sent (saved to DB)")
+        elif action == "save_lead_phone":
+            # Save lead with phone
+            lead_storage.save_lead(
+                event_name=scenario_data.get('step_1_input', 'Unknown Event'),
+                email=scenario_data.get('step_5_input'),
+                phone=scenario_data.get('step_8_input'),  # Phone from step 8
+                interests={
+                    "group": scenario_data.get('group_size'),
+                    "wants_reminder": True
+                },
+                session_id=session_id
+            )
+            print(f"📱 Lead saved: SMS reminder (saved to DB)")
+    return {
+        "response": result["message"],
+        "mode": "scenario",
+        "scenario_active": scenario_active
+    }
+async def handle_rag_with_resume(
+    request,
+    session_id,
+    scenario_state,
+    advanced_rag,
+    embedding_service,
+    qdrant_service,
+    conversation_service
+):
+    """
+    Handle RAG query mid-scenario
+    Answer question then remind user to continue scenario
+    """
+    # Query RAG
+    context_used = []
+    if request.use_rag:
+        query_embedding = embedding_service.encode_text(request.message)
+        results = qdrant_service.search(
+            query_embedding=query_embedding,
+            limit=request.top_k,
+            score_threshold=request.score_threshold,
+            ef=256
+        )
+        context_used = results
+    # Build simple RAG response
+    rag_response = await simple_rag_response(
+        request.message,
+        context_used,
+        request.system_message
+    )
+    # Add resume hint
+    last_scenario_msg = f"\n\n---\nVậy nha! Quay lại câu hỏi trước, bạn đã quyết định chưa? ^^"
+    return {
+        "response": rag_response + last_scenario_msg,
+        "mode": "rag_with_resume",
+        "scenario_active": True,
+        "context_used": context_used
+    }
+async def handle_pure_rag(
+    request,
+    session_id,
+    advanced_rag,
+    embedding_service,
+    qdrant_service,
+    tools_service,
+    chat_history_collection,
+    hf_token,
+    conversation_service
+):
+    """
+    Handle pure RAG query (fallback to existing logic)
+    """
+    # Import existing chat_endpoint logic
+    from chat_endpoint import chat_endpoint
+    # Call existing endpoint
+    result = await chat_endpoint(
+        request,
+        conversation_service,
+        tools_service,
+        advanced_rag,
+        embedding_service,
+        qdrant_service,
+        chat_history_collection,
+        hf_token
+    )
+    return {
+        "response": result["response"],
+        "mode": "rag",
+        "context_used": result.get("context_used", [])
+    }
+async def simple_rag_response(message, context, system_message):
+    """Simple RAG response without LLM (for quick answers)"""
+    if context:
+        # Return top context
+        top = context[0]
+        return f"{top['metadata'].get('text', 'Không tìm thấy thông tin.')}"
+    return "Xin lỗi, tôi không tìm thấy thông tin về điều này."

hybrid_chat_stream.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Hybrid Chat Streaming Endpoint
+Real-time SSE streaming for scenarios + RAG
+"""
+from typing import AsyncGenerator
+import asyncio
+from datetime import datetime
+from stream_utils import (
+    format_sse, stream_text_slowly,
+    EVENT_STATUS, EVENT_TOKEN, EVENT_DONE, EVENT_ERROR, EVENT_METADATA
+)
+async def hybrid_chat_stream(
+    request,
+    conversation_service,
+    intent_classifier,
+    scenario_engine,
+    advanced_rag,
+    embedding_service,
+    qdrant_service,
+    hf_token,
+    lead_storage
+) -> AsyncGenerator[str, None]:
+    """
+    Stream chat responses in real-time (SSE format)
+    Yields SSE events:
+    - status: "Đang suy nghĩ...", "Đang tìm kiếm..."
+    - token: Individual text chunks
+    - metadata: Context, session info
+    - done: Completion signal
+    - error: Error messages
+    """
+    try:
+        # === SESSION MANAGEMENT ===
+        session_id = request.session_id
+        if not session_id:
+            session_id = conversation_service.create_session(
+                metadata={"user_agent": "api", "created_via": "stream"},
+                user_id=request.user_id
+            )
+            yield format_sse(EVENT_METADATA, {"session_id": session_id})
+        # === INTENT CLASSIFICATION ===
+        yield format_sse(EVENT_STATUS, "Đang phân tích câu hỏi...")
+        scenario_state = conversation_service.get_scenario_state(session_id) or {}
+        intent = intent_classifier.classify(request.message, scenario_state)
+        # === ROUTING ===
+        if intent.startswith("scenario:"):
+            # Scenario flow with simulated streaming
+            async for sse_event in handle_scenario_stream(
+                intent, request.message, session_id,
+                scenario_state, scenario_engine, conversation_service, lead_storage
+            ):
+                yield sse_event
+        elif intent == "rag:with_resume":
+            # Quick RAG answer + resume scenario
+            yield format_sse(EVENT_STATUS, "Đang tra cứu...")
+            async for sse_event in handle_rag_stream(
+                request, advanced_rag, embedding_service, qdrant_service
+            ):
+                yield sse_event
+            # Resume hint
+            async for chunk in stream_text_slowly(
+                "\n\n---\nVậy nha! Quay lại câu hỏi trước nhé ^^",
+                chars_per_chunk=5,
+                delay_ms=15
+            ):
+                yield chunk
+        else:  # Pure RAG
+            yield format_sse(EVENT_STATUS, "Đang tìm kiếm trong tài liệu...")
+            async for sse_event in handle_rag_stream(
+                request, advanced_rag, embedding_service, qdrant_service
+            ):
+                yield sse_event
+        # === SAVE HISTORY ===
+        # Note: We'll save the full response after streaming completes
+        # This requires buffering on the server side
+        # === DONE ===
+        yield format_sse(EVENT_DONE, {
+            "session_id": session_id,
+            "timestamp": datetime.utcnow().isoformat()
+        })
+    except Exception as e:
+        yield format_sse(EVENT_ERROR, str(e))
+async def handle_scenario_stream(
+    intent, user_message, session_id,
+    scenario_state, scenario_engine, conversation_service, lead_storage
+) -> AsyncGenerator[str, None]:
+    """
+    Handle scenario with simulated typing effect
+    """
+    # Get scenario response (sync)
+    if intent == "scenario:continue":
+        result = scenario_engine.next_step(
+            scenario_id=scenario_state["active_scenario"],
+            current_step=scenario_state["scenario_step"],
+            user_input=user_message,
+            scenario_data=scenario_state.get("scenario_data", {})
+        )
+    else:
+        scenario_type = intent.split(":", 1)[1]
+        result = scenario_engine.start_scenario(scenario_type)
+    # Update state
+    if result.get("end_scenario"):
+        conversation_service.clear_scenario(session_id)
+    elif result.get("new_state"):
+        conversation_service.set_scenario_state(session_id, result["new_state"])
+    # Execute actions
+    if result.get("action") and lead_storage:
+        action = result['action']
+        scenario_data = result.get('new_state', {}).get('scenario_data', {})
+        if action == "send_pdf_email":
+            lead_storage.save_lead(
+                event_name=scenario_data.get('step_1_input', 'Unknown'),
+                email=scenario_data.get('step_5_input'),
+                interests={"group": scenario_data.get('group_size'), "wants_pdf": True},
+                session_id=session_id
+            )
+        elif action == "save_lead_phone":
+            lead_storage.save_lead(
+                event_name=scenario_data.get('step_1_input', 'Unknown'),
+                email=scenario_data.get('step_5_input'),
+                phone=scenario_data.get('step_8_input'),
+                interests={"group": scenario_data.get('group_size'), "wants_reminder": True},
+                session_id=session_id
+            )
+    # Stream response with typing effect
+    response_text = result["message"]
+    async for chunk in stream_text_slowly(
+        response_text,
+        chars_per_chunk=4,  # Faster for scenarios
+        delay_ms=15
+    ):
+        yield chunk
+    yield format_sse(EVENT_METADATA, {
+        "mode": "scenario",
+        "scenario_active": not result.get("end_scenario")
+    })
+async def handle_rag_stream(
+    request, advanced_rag, embedding_service, qdrant_service
+) -> AsyncGenerator[str, None]:
+    """
+    Handle RAG with real LLM streaming
+    """
+    # RAG search (sync part)
+    context_used = []
+    if request.use_rag:
+        query_embedding = embedding_service.encode_text(request.message)
+        results = qdrant_service.search(
+            query_embedding=query_embedding,
+            limit=request.top_k,
+            score_threshold=request.score_threshold,
+            ef=256
+        )
+        context_used = results
+    # Build context
+    if context_used:
+        context_str = "\n\n".join([
+            f"[{i+1}] {r['metadata'].get('text', '')[:500]}"
+            for i, r in enumerate(context_used[:3])
+        ])
+    else:
+        context_str = "Không tìm thấy thông tin liên quan."
+    # Simple response (for now - can integrate with real LLM streaming later)
+    if context_used:
+        response_text = f"Dựa trên tài liệu, {context_used[0]['metadata'].get('text', '')[:300]}..."
+    else:
+        response_text = "Xin lỗi, tôi không tìm thấy thông tin về câu hỏi này."
+    # Simulate streaming (will be replaced with real HF streaming)
+    async for chunk in stream_text_slowly(
+        response_text,
+        chars_per_chunk=3,
+        delay_ms=20
+    ):
+        yield chunk
+    yield format_sse(EVENT_METADATA, {
+        "mode": "rag",
+        "context_count": len(context_used)
+    })
+# TODO: Implement real HF InferenceClient streaming
+# This requires updating advanced_rag.py to support stream=True

intent_classifier.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""
+Intent Classifier for Hybrid RAG + FSM Chatbot
+Detects user intent to route between scenario flows and RAG queries
+"""
+from typing import Dict, Optional, List
+import re
+class IntentClassifier:
+    """
+    Classify user intent using keyword matching
+    Routes to either:
+    - Scenario flows (scripted conversations)
+    - RAG queries (knowledge retrieval)
+    """
+    def __init__(self, scenarios_dir: str = "scenarios"):
+        """
+        Initialize with auto-loading triggers from scenario JSON files
+        Args:
+            scenarios_dir: Directory containing scenario JSON files
+        """
+        # Auto-load scenario patterns from JSON files
+        self.scenario_patterns = self._load_scenario_patterns(scenarios_dir)
+        # General question patterns (RAG)
+        self.general_patterns = [
+            # Location
+            "ở đâu", "địa điểm", "location", "where",
+            "chỗ nào", "tổ chức tại",
+            # Time
+            "mấy giờ", "khi nào", "when", "time",
+            "bao giờ", "thời gian", "ngày nào",
+            # Info
+            "thông tin", "info", "information",
+            "chi tiết", "details", "về",
+            # Parking
+            "đậu xe", "parking", "gửi xe",
+            # Contact
+            "liên hệ", "contact", "số điện thoại"
+        ]
+    def _load_scenario_patterns(self, scenarios_dir: str) -> dict:
+        """
+        Auto-load triggers from all scenario JSON files
+        Returns:
+            {"scenario_id": ["trigger1", "trigger2", ...]}
+        """
+        import json
+        import os
+        patterns = {}
+        if not os.path.exists(scenarios_dir):
+            print(f"⚠ Scenarios directory not found: {scenarios_dir}")
+            return patterns
+        for filename in os.listdir(scenarios_dir):
+            if filename.endswith('.json'):
+                filepath = os.path.join(scenarios_dir, filename)
+                try:
+                    with open(filepath, 'r', encoding='utf-8') as f:
+                        scenario = json.load(f)
+                        scenario_id = scenario.get('scenario_id')
+                        triggers = scenario.get('triggers', [])
+                        if scenario_id and triggers:
+                            patterns[scenario_id] = triggers
+                            print(f"✓ Loaded triggers for: {scenario_id} ({len(triggers)} patterns)")
+                except Exception as e:
+                    print(f"⚠ Error loading {filename}: {e}")
+        return patterns
+    def classify(
+        self,
+        message: str,
+        conversation_state: Optional[Dict] = None
+    ) -> str:
+        """
+        Classify user intent
+        Args:
+            message: User message
+            conversation_state: Current conversation state (optional)
+                {
+                    "active_scenario": "price_inquiry" | null,
+                    "scenario_step": 3,
+                    "scenario_data": {...}
+                }
+        Returns:
+            Intent string:
+            - "scenario:<scenario_id>" - Start new scenario
+            - "scenario:continue" - Continue current scenario
+            - "rag:general" - General RAG query
+            - "rag:with_resume" - RAG query but resume scenario after
+        """
+        message_lower = message.lower().strip()
+        state = conversation_state or {}
+        # Check if in active scenario
+        in_scenario = state.get("active_scenario") is not None
+        if in_scenario:
+            # User is mid-scenario
+            # Check if message is off-topic question
+            if self._is_general_question(message_lower):
+                return "rag:with_resume"
+            else:
+                # Continue scenario (user answering scenario question)
+                return "scenario:continue"
+        # Not in scenario - check for new scenario triggers
+        for scenario_id, patterns in self.scenario_patterns.items():
+            if self._matches_any_pattern(message_lower, patterns):
+                return f"scenario:{scenario_id}"
+        # Default: general RAG query
+        return "rag:general"
+    def _is_general_question(self, message: str) -> bool:
+        """
+        Check if message is a general question (should use RAG)
+        """
+        return self._matches_any_pattern(message, self.general_patterns)
+    def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool:
+        """
+        Check if message matches any pattern in list
+        """
+        for pattern in patterns:
+            # Simple substring match (case insensitive already done)
+            if pattern in message:
+                return True
+            # Check word boundary
+            if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE):
+                return True
+        return False
+    def get_scenario_type(self, intent: str) -> Optional[str]:
+        """
+        Extract scenario type from intent string
+        Args:
+            intent: "scenario:price_inquiry" or "scenario:continue"
+        Returns:
+            "price_inquiry" or None
+        """
+        if not intent.startswith("scenario:"):
+            return None
+        parts = intent.split(":", 1)
+        if len(parts) < 2:
+            return None
+        scenario_type = parts[1]
+        if scenario_type == "continue":
+            return None
+        return scenario_type
+    def add_scenario_pattern(self, scenario_id: str, patterns: List[str]):
+        """
+        Dynamically add new scenario patterns
+        """
+        if scenario_id in self.scenario_patterns:
+            self.scenario_patterns[scenario_id].extend(patterns)
+        else:
+            self.scenario_patterns[scenario_id] = patterns
+    def add_general_pattern(self, patterns: List[str]):
+        """
+        Dynamically add new general question patterns
+        """
+        self.general_patterns.extend(patterns)
+# Example usage
+if __name__ == "__main__":
+    classifier = IntentClassifier()
+    # Test cases
+    test_cases = [
+        ("giá vé bao nhiêu?", None),
+        ("sự kiện ở đâu?", None),
+        ("đặt vé cho tôi", None),
+        ("A show", {"active_scenario": "price_inquiry", "scenario_step": 1}),
+        ("sự kiện mấy giờ?", {"active_scenario": "price_inquiry", "scenario_step": 3}),
+    ]
+    print("Intent Classification Test:")
+    print("-" * 50)
+    for message, state in test_cases:
+        intent = classifier.classify(message, state)
+        print(f"Message: {message}")
+        print(f"State: {state}")
+        print(f"Intent: {intent}")
+        print()

lead_storage_service.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+Lead Storage Service
+Saves customer leads collected during scenario conversations
+"""
+from typing import Dict, Optional
+from datetime import datetime
+from pymongo.collection import Collection
+class LeadStorageService:
+    """
+    Store customer leads from scenario interactions
+    """
+    def __init__(self, leads_collection: Collection):
+        self.collection = leads_collection
+        self._ensure_indexes()
+    def _ensure_indexes(self):
+        """Create indexes for leads collection"""
+        try:
+            self.collection.create_index("email")
+            self.collection.create_index("phone")
+            self.collection.create_index("created_at")
+            print("✓ Leads indexes created")
+        except Exception as e:
+            print(f"Leads indexes already exist: {e}")
+    def save_lead(
+        self,
+        event_name: str,
+        email: Optional[str] = None,
+        phone: Optional[str] = None,
+        interests: Optional[Dict] = None,
+        session_id: Optional[str] = None,
+        user_id: Optional[str] = None
+    ) -> str:
+        """
+        Save customer lead
+        Args:
+            event_name: Event they're interested in
+            email: Customer email
+            phone: Customer phone
+            interests: Additional data (group_size, etc.)
+            session_id: Conversation session
+            user_id: User ID if authenticated
+        Returns:
+            Lead ID
+        """
+        lead = {
+            "event_name": event_name,
+            "email": email,
+            "phone": phone,
+            "interests": interests or {},
+            "session_id": session_id,
+            "user_id": user_id,
+            "source": "chatbot_scenario",
+            "created_at": datetime.utcnow(),
+            "status": "new"
+        }
+        result = self.collection.insert_one(lead)
+        lead_id = str(result.inserted_id)
+        print(f"💾 Saved lead: {lead_id} | Event: {event_name} | Email: {email} | Phone: {phone}")
+        return lead_id
+    def get_leads(
+        self,
+        event_name: Optional[str] = None,
+        limit: int = 50,
+        skip: int = 0
+    ):
+        """Get leads with optional filtering"""
+        query = {}
+        if event_name:
+            query["event_name"] = event_name
+        leads = self.collection.find(query).sort("created_at", -1).skip(skip).limit(limit)
+        return list(leads)
+    def count_leads(self, event_name: Optional[str] = None) -> int:
+        """Count total leads"""
+        query = {}
+        if event_name:
+            query["event_name"] = event_name
+        return self.collection.count_documents(query)

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import Optional, List, Dict
@@ -19,6 +19,11 @@ from pdf_parser import PDFIndexer
 from multimodal_pdf_parser import MultimodalPDFIndexer
 from conversation_service import ConversationService
 from tools_service import ToolsService
 # Initialize FastAPI app
 app = FastAPI(
@@ -107,6 +112,17 @@ print("✓ Conversation Service initialized")
 tools_service = ToolsService(base_url="https://www.festavenue.site")
 print("✓ Tools Service initialized (Function Calling enabled)")
 print("✓ Services initialized successfully")
@@ -135,6 +151,7 @@ class IndexResponse(BaseModel):
 class ChatRequest(BaseModel):
     message: str
     session_id: Optional[str] = None  # Multi-turn conversation
     use_rag: bool = True
     top_k: int = 3
     system_message: Optional[str] = """Bạn là trợ lý AI chuyên biệt cho hệ thống quản lý sự kiện và bán vé.
@@ -680,59 +697,182 @@ async def get_stats():
 # ChatbotRAG Endpoints
 # ============================================
 @app.post("/chat", response_model=ChatResponse)
 async def chat(request: ChatRequest):
     """
-    Multi-turn conversational chatbot với RAG + Function Calling
     Features:
-    - ✅ Server-side session management (tự động tạo session_id)
-    - ✅ Conversation history tracking
-    - ✅ RAG context retrieval
-    - ✅ Function calling (gọi API khi cần thông tin chi tiết)
     Flow:
-    1. Request đầu tiên: Không cần session_id → BE tạo mới
-    2. Request tiếp theo: Gửi session_id từ response trước → BE nhớ context
-    Example:
     ```
-    # Lần 1
-    POST /chat { "message": "Tìm sự kiện hòa nhạc" }
-    Response: { "session_id": "abc-123", "response": "..." }
-    # Lần 2 (follow-up)
-    POST /chat { "message": "Ngày tổ chức chính xác?", "session_id": "abc-123" }
-    Response: { "session_id": "abc-123", "response": "..." }  # Bot hiểu context
     ```
-    Body Parameters:
-    - message: User message (required)
-    - session_id: Session ID cho multi-turn (optional, tự tạo nếu không có)
-    - use_rag: Enable RAG retrieval (default: true)
-    - enable_tools: Enable function calling (default: true)
-    - top_k: Number of documents (default: 3)
-    - temperature: LLM temperature (default: 0.7)
-    Returns:
-    - response: AI generated response
-    - session_id: Session identifier (TRẢ VỀ trong mọi trường hợp)
-    - context_used: Retrieved context documents
-    - tool_calls: API calls made (if any)
-    - timestamp: Response timestamp
-    """
-    # Import chat endpoint logic
-    from chat_endpoint import chat_endpoint
-    return await chat_endpoint(
         request=request,
         conversation_service=conversation_service,
         tools_service=tools_service,
         advanced_rag=advanced_rag,
         embedding_service=embedding_service,
         qdrant_service=qdrant_service,
         chat_history_collection=chat_history_collection,
-        hf_token=hf_token
     )
@@ -775,6 +915,207 @@ async def get_conversation_history(session_id: str, include_metadata: bool = Fal
     }
 @app.post("/chat/clear-session")
 async def clear_chat_session(session_id: str):
     """
@@ -892,6 +1233,91 @@ async def add_document(request: AddDocumentRequest):
         raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 @app.post("/rag/search", response_model=List[SearchResponse])
 async def rag_search(
     query: str = Form(...),

 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse  # Add StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import Optional, List, Dict
 from multimodal_pdf_parser import MultimodalPDFIndexer
 from conversation_service import ConversationService
 from tools_service import ToolsService
+from intent_classifier import IntentClassifier  # NEW
+from scenario_engine import ScenarioEngine  # NEW
+from lead_storage_service import LeadStorageService  # NEW
+from hybrid_chat_endpoint import hybrid_chat_endpoint  # NEW
+from hybrid_chat_stream import hybrid_chat_stream  # NEW: Streaming
 # Initialize FastAPI app
 app = FastAPI(
 tools_service = ToolsService(base_url="https://www.festavenue.site")
 print("✓ Tools Service initialized (Function Calling enabled)")
+# Initialize Hybrid Chat Components
+intent_classifier = IntentClassifier()
+print("✓ Intent Classifier initialized")
+scenario_engine = ScenarioEngine(scenarios_dir="scenarios")
+print("✓ Scenario Engine initialized")
+leads_collection = db["leads"]
+lead_storage = LeadStorageService(leads_collection)
+print("✓ Lead Storage Service initialized")
 print("✓ Services initialized successfully")
 class ChatRequest(BaseModel):
     message: str
     session_id: Optional[str] = None  # Multi-turn conversation
+    user_id: Optional[str] = None  # User identifier for session tracking
     use_rag: bool = True
     top_k: int = 3
     system_message: Optional[str] = """Bạn là trợ lý AI chuyên biệt cho hệ thống quản lý sự kiện và bán vé.
 # ChatbotRAG Endpoints
 # ============================================
+# Import chat endpoint logic
+from hybrid_chat_endpoint import hybrid_chat_endpoint
 @app.post("/chat", response_model=ChatResponse)
 async def chat(request: ChatRequest):
     """
+    Hybrid Conversational Chatbot: Scenario FSM + RAG
     Features:
+    - ✅ Scenario-based flows (giá vé, đặt vé kịch bản)
+    - ✅ RAG knowledge retrieval (PDF, documents)
+    - ✅ Mid-scenario RAG interruption (answer off-topic questions)
+    - ✅ Lead collection (email, phone → MongoDB)
+    - ✅ Multi-turn conversations with state management
+    - ✅ Function calling (external API integration)
     Flow:
+    1. User message → Intent classification
+    2. Route to: Scenario FSM OR RAG OR Hybrid
+    3. Execute flow + save state
+    4. Save conversation history
+    Example 1 - Start Price Inquiry Scenario:
+    ```
+    POST /chat
+    {
+      "message": "giá vé bao nhiêu?",
+      "use_rag": true
+    }
+    Response:
+    {
+      "response": "Hello 👋 Bạn muốn xem giá của show nào để mình báo đúng nè?",
+      "session_id": "abc-123",
+      "mode": "scenario",
+      "scenario_active": true
+    }
     ```
+    Example 2 - Continue Scenario:
     ```
+    POST /chat
+    {
+      "message": "Show A",
+      "session_id": "abc-123"
+    }
+    Response:
+    {
+      "response": "Bạn đi 1 mình hay đi nhóm...",
+      "mode": "scenario",
+      "scenario_active": true
+    }
+    ```
+    Example 3 - Mid-scenario RAG Question:
+    ```
+    POST /chat
+    {
+      "message": "sự kiện mấy giờ?",
+      "session_id": "abc-123"
+    }
+    # Bot answers from RAG, then resumes scenario
+    ```
+    Example 4 - Pure RAG Query:
+    ```
+    POST /chat
+    {
+      "message": "địa điểm sự kiện ở đâu?",
+      "use_rag": true
+    }
+    # Normal RAG response (không trigger scenario)
+    ```
+    """
+    return await hybrid_chat_endpoint(
         request=request,
         conversation_service=conversation_service,
+        intent_classifier=intent_classifier,
+        scenario_engine=scenario_engine,
         tools_service=tools_service,
         advanced_rag=advanced_rag,
         embedding_service=embedding_service,
         qdrant_service=qdrant_service,
         chat_history_collection=chat_history_collection,
+        hf_token=hf_token,
+        lead_storage=lead_storage
+    )
+@app.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+    """
+    Streaming Chat Endpoint (SSE - Server-Sent Events)
+    Real-time token-by-token response display
+    Features:
+    - ✅ Real-time "typing" effect
+    - ✅ Status updates (thinking, searching)
+    - ✅ Scenario: Simulated streaming (smooth typing)
+    - ✅ RAG: Real LLM streaming
+    - ✅ HTTP/2 compatible
+    Event Types:
+    - status: Bot status ("Đang suy nghĩ...", "Đang tìm kiếm...")
+    - token: Text chunks
+    - metadata: Session ID, context info
+    - done: Completion signal
+    - error: Error messages
+    Example - JavaScript Client:
+    ```javascript
+    const response = await fetch('/chat/stream', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        message: "giá vé bao nhiêu?",
+        use_rag: true
+      })
+    });
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    while (true) {
+      const {done, value} = await reader.read();
+      if (done) break;
+      const chunk = decoder.decode(value);
+      const lines = chunk.split('\n\n');
+      for (const line of lines) {
+        if (line.startsWith('event: token')) {
+          const data = line.split('data: ')[1];
+          displayToken(data); // Append to UI
+        }
+        else if (line.startsWith('event: done')) {
+          console.log('Stream complete');
+        }
+      }
+    }
+    ```
+    Example - EventSource (simpler but less control):
+    ```javascript
+    // Note: EventSource doesn't support POST, need to use fetch
+    const eventSource = new EventSource('/chat/stream?message=hello');
+    eventSource.addEventListener('token', (e) => {
+      displayToken(e.data);
+    });
+    eventSource.addEventListener('done', (e) => {
+      eventSource.close();
+    });
+    ```
+    """
+    return StreamingResponse(
+        hybrid_chat_stream(
+            request=request,
+            conversation_service=conversation_service,
+            intent_classifier=intent_classifier,
+            scenario_engine=scenario_engine,
+            advanced_rag=advanced_rag,
+            embedding_service=embedding_service,
+            qdrant_service=qdrant_service,
+            hf_token=hf_token,
+            lead_storage=lead_storage
+        ),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Disable nginx buffering
+        }
     )
     }
+@app.get("/chat/sessions")
+async def list_sessions(
+    limit: int = 50,
+    skip: int = 0,
+    sort_by: str = "updated_at",
+    user_id: Optional[str] = None  # NEW: Filter by user
+):
+    """
+    List all conversation sessions
+    Query Parameters:
+        limit: Maximum sessions to return (default: 50, max: 100)
+        skip: Number of sessions to skip for pagination (default: 0)
+        sort_by: Field to sort by - 'created_at' or 'updated_at' (default: updated_at)
+        user_id: Filter sessions by user_id (optional)
+    Returns:
+        List of sessions with metadata and message counts
+    Examples:
+    ```
+    GET /chat/sessions  # All sessions
+    GET /chat/sessions?user_id=user_123  # Only user_123's sessions
+    GET /chat/sessions?limit=20&skip=0&sort_by=updated_at
+    ```
+    """
+    # Validate limit
+    if limit > 100:
+        limit = 100
+    if limit < 1:
+        limit = 1
+    # Validate sort_by
+    if sort_by not in ["created_at", "updated_at"]:
+        raise HTTPException(
+            status_code=400,
+            detail="sort_by must be 'created_at' or 'updated_at'"
+        )
+    sessions = conversation_service.list_sessions(
+        limit=limit,
+        skip=skip,
+        sort_by=sort_by,
+        descending=True,
+        user_id=user_id  # NEW: Pass user_id filter
+    )
+    total_sessions = conversation_service.count_sessions(user_id=user_id)  # NEW: Count with filter
+    return {
+        "total": total_sessions,
+        "limit": limit,
+        "skip": skip,
+        "count": len(sessions),
+        "user_id": user_id,  # NEW: Include filter in response
+        "sessions": sessions
+    }
+@app.get("/scenarios")
+async def list_scenarios():
+    """
+    Get list of all available scenarios for proactive chat
+    FE use case:
+    - Random pick scenario để bắt đầu chat chủ động
+    - Hiển thị menu các scenario available
+    Returns:
+        List of scenarios with metadata
+    Example:
+    ```
+    GET /scenarios
+    Response:
+    {
+      "scenarios": [
+        {
+          "scenario_id": "price_inquiry",
+          "name": "Hỏi giá vé",
+          "description": "Tư vấn giá vé và gửi PDF",
+          "triggers": ["giá vé", "bao nhiêu"],
+          "category": "sales"
+        },
+        ...
+      ]
+    }
+    ```
+    """
+    scenarios_list = []
+    for scenario_id, scenario_data in scenario_engine.scenarios.items():
+        scenarios_list.append({
+            "scenario_id": scenario_id,
+            "name": scenario_data.get("name", scenario_id),
+            "description": scenario_data.get("description", ""),
+            "triggers": scenario_data.get("triggers", []),
+            "category": scenario_data.get("category", "general"),
+            "priority": scenario_data.get("priority", "normal"),
+            "estimated_duration": scenario_data.get("estimated_duration", "unknown")
+        })
+    return {
+        "total": len(scenarios_list),
+        "scenarios": scenarios_list
+    }
+@app.post("/scenarios/{scenario_id}/start")
+async def start_scenario_proactive(
+    scenario_id: str,
+    request_body: Optional[Dict] = None
+):
+    """
+    Start a scenario proactively with optional initial data
+    Use cases:
+    1. FE picks random scenario
+    2. BE triggers scenario based on user action (after purchase, exit intent, etc.)
+    3. Inject context data (event_name, mood, etc.)
+    Example 1 - Simple start:
+    ```
+    POST /scenarios/price_inquiry/start
+    {}
+    Response:
+    {
+      "session_id": "abc-123",
+      "message": "Hello 👋 Bạn muốn xem giá..."
+    }
+    ```
+    Example 2 - With initial data (post-event feedback):
+    ```
+    POST /scenarios/post_event_feedback/start
+    {
+      "initial_data": {
+        "event_name": "Hòa Nhạc Mùa Xuân",
+        "event_date": "2024-11-29",
+        "event_id": "evt_123"
+      },
+      "session_id": "existing-session",  // optional
+      "user_id": "user_456"               // optional
+    }
+    Response:
+    {
+      "session_id": "abc-123",
+      "message": "Cảm ơn bạn đã tham dự *Hòa Nhạc Mùa Xuân* hôm qua!"
+    }
+    ```
+    Example 3 - Mood recommendation:
+    ```
+    POST /scenarios/mood_recommendation/start
+    {
+      "initial_data": {
+        "mood": "chill",
+        "preferred_genre": "acoustic"
+      }
+    }
+    ```
+    """
+    # Parse request body
+    body = request_body or {}
+    initial_data = body.get("initial_data", {})
+    session_id = body.get("session_id")
+    user_id = body.get("user_id")
+    # Create or use existing session
+    if not session_id:
+        session_id = conversation_service.create_session(
+            metadata={"started_by": "proactive", "scenario": scenario_id},
+            user_id=user_id
+        )
+    # Start scenario with initial data
+    result = scenario_engine.start_scenario(scenario_id, initial_data)
+    if result.get("new_state"):
+        conversation_service.set_scenario_state(session_id, result["new_state"])
+    # Save bot message to history
+    conversation_service.add_message(
+        session_id,
+        "assistant",
+        result["message"],
+        metadata={"proactive": True, "scenario": scenario_id, "initial_data": initial_data}
+    )
+    return {
+        "session_id": session_id,
+        "scenario_id": scenario_id,
+        "message": result["message"],
+        "scenario_active": True,
+        "proactive": True
+    }
 @app.post("/chat/clear-session")
 async def clear_chat_session(session_id: str):
     """
         raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/documents/upload/pdf")
+async def upload_pdf(
+    file: UploadFile = File(...),
+    metadata: Optional[str] = Form(None)
+):
+    """
+    Upload PDF file and index into knowledge base
+    Features:
+    - Extracts text from PDF
+    - Detects image URLs in text/markdown
+    - Chunks content intelligently
+    - Indexes all chunks into Qdrant for RAG
+    Args:
+        file: PDF file to upload
+        metadata: Optional JSON string with metadata (title, author, etc.)
+    Returns:
+        Success status, document ID, and indexing stats
+    Example:
+    ```bash
+    curl -X POST http://localhost:8000/documents/upload/pdf \
+      -F "file=@document.pdf" \
+      -F 'metadata={"title": "User Guide", "category": "documentation"}'
+    ```
+    """
+    try:
+        # Validate file type
+        if not file.filename.endswith('.pdf'):
+            raise HTTPException(
+                status_code=400,
+                detail="Only PDF files are supported"
+            )
+        # Read file bytes
+        pdf_bytes = await file.read()
+        # Parse metadata if provided
+        import json
+        doc_metadata = {}
+        if metadata:
+            try:
+                doc_metadata = json.loads(metadata)
+            except json.JSONDecodeError:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Invalid metadata JSON format"
+                )
+        # Generate unique document ID
+        from bson import ObjectId
+        document_id = str(ObjectId())
+        # Add upload timestamp
+        doc_metadata['uploaded_at'] = datetime.utcnow().isoformat()
+        doc_metadata['original_filename'] = file.filename
+        # Index PDF using multimodal parser
+        result = multimodal_pdf_indexer.index_pdf_bytes(
+            pdf_bytes=pdf_bytes,
+            document_id=document_id,
+            filename=file.filename,
+            document_metadata=doc_metadata
+        )
+        return {
+            "success": True,
+            "document_id": document_id,
+            "filename": file.filename,
+            "chunks_indexed": result['chunks_indexed'],
+            "images_found": result.get('images_found', 0),
+            "message": f"PDF uploaded and indexed: {result['chunks_indexed']} chunks, {result.get('images_found', 0)} image URLs found"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing PDF: {str(e)}"
+        )
 @app.post("/rag/search", response_model=List[SearchResponse])
 async def rag_search(
     query: str = Form(...),

scenario_engine.py ADDED Viewed

	@@ -0,0 +1,329 @@

+"""
+Scenario Engine for FSM-based Conversations
+Executes multi-turn scripted conversations from JSON definitions
+"""
+import json
+import os
+import re
+from typing import Dict, Optional, List, Any
+from datetime import datetime
+class ScenarioEngine:
+    """
+    Execute scenario-based conversations
+    Load scenarios from JSON and manage step-by-step flow
+    """
+    def __init__(self, scenarios_dir: str = "scenarios"):
+        self.scenarios_dir = scenarios_dir
+        self.scenarios = self._load_scenarios()
+    def _load_scenarios(self) -> Dict[str, Dict]:
+        """Load all scenario JSON files"""
+        scenarios = {}
+        if not os.path.exists(self.scenarios_dir):
+            print(f"⚠ Scenarios directory not found: {self.scenarios_dir}")
+            return scenarios
+        for filename in os.listdir(self.scenarios_dir):
+            if filename.endswith('.json'):
+                filepath = os.path.join(self.scenarios_dir, filename)
+                with open(filepath, 'r', encoding='utf-8') as f:
+                    scenario = json.load(f)
+                    scenario_id = scenario.get('scenario_id')
+                    if scenario_id:
+                        scenarios[scenario_id] = scenario
+                        print(f"✓ Loaded scenario: {scenario_id}")
+        return scenarios
+    def start_scenario(self, scenario_id: str, initial_data: Dict = None) -> Dict[str, Any]:
+        """
+        Start a new scenario with optional initial data
+        Args:
+            scenario_id: Scenario to start
+            initial_data: External data to inject (event_name, mood, etc.)
+        Returns:
+            {
+                "message": str,
+                "new_state": {...},
+                "end_scenario": bool
+            }
+        """
+        if scenario_id not in self.scenarios:
+            return {
+                "message": "Xin lỗi, tính năng này đang được cập nhật.",
+                "new_state": {},
+                "end_scenario": True
+            }
+        scenario = self.scenarios[scenario_id]
+        first_step = scenario['steps'][0]
+        # Initialize with external data
+        scenario_data = initial_data.copy() if initial_data else {}
+        # Build first message with initial data
+        message = self._build_message(first_step, scenario_data, None)
+        return {
+            "message": message,
+            "new_state": {
+                "active_scenario": scenario_id,
+                "scenario_step": 1,
+                "scenario_data": scenario_data,
+                "last_activity": datetime.utcnow().isoformat()
+            },
+            "end_scenario": False
+        }
+    def next_step(
+        self,
+        scenario_id: str,
+        current_step: int,
+        user_input: str,
+        scenario_data: Dict,
+        rag_service: Optional[Any] = None
+    ) -> Dict[str, Any]:
+        """
+        Process user input and move to next step
+        Args:
+            scenario_id: Active scenario ID
+            current_step: Current step number
+            user_input: User's message
+            scenario_data: Data collected so far
+            rag_service: Optional RAG service for hybrid queries
+        Returns:
+            {
+                "message": str,
+                "new_state": {...} | None,
+                "end_scenario": bool,
+                "action": str | None
+            }
+        """
+        if scenario_id not in self.scenarios:
+            return {"message": "Error: Scenario not found", "end_scenario": True}
+        scenario = self.scenarios[scenario_id]
+        current_step_config = self._get_step(scenario, current_step)
+        if not current_step_config:
+            return {"message": "Error: Step not found", "end_scenario": True}
+        # Validate input if needed
+        expected_type = current_step_config.get('expected_input_type')
+        if expected_type:
+            validation_error = self._validate_input(user_input, expected_type)
+            if validation_error:
+                return {
+                    "message": validation_error,
+                    "new_state": None,  # Don't change state
+                    "end_scenario": False
+                }
+        # Handle branching
+        if 'branches' in current_step_config:
+            branch_result = self._handle_branches(
+                current_step_config['branches'],
+                user_input,
+                scenario_data
+            )
+            next_step_id = branch_result['next_step']
+            scenario_data.update(branch_result.get('save_data', {}))
+        else:
+            next_step_id = current_step_config.get('next_step')
+        # Save user input
+        input_field = current_step_config.get('save_as', f'step_{current_step}_input')
+        scenario_data[input_field] = user_input
+        # Get next step config
+        next_step_config = self._get_step(scenario, next_step_id)
+        if not next_step_config:
+            return {"message": "Cảm ơn bạn!", "end_scenario": True}
+        # Check if scenario ends
+        if next_step_config.get('end_scenario'):
+            return {
+                "message": next_step_config['bot_message'],
+                "new_state": None,
+                "end_scenario": True,
+                "action": next_step_config.get('action')
+            }
+        # Build next message
+        message = self._build_message(
+            next_step_config,
+            scenario_data,
+            rag_service
+        )
+        return {
+            "message": message,
+            "new_state": {
+                "active_scenario": scenario_id,
+                "scenario_step": next_step_id,
+                "scenario_data": scenario_data,
+                "last_activity": datetime.utcnow().isoformat()
+            },
+            "end_scenario": False,
+            "action": next_step_config.get('action')
+        }
+    def _get_step(self, scenario: Dict, step_id: int) -> Optional[Dict]:
+        """Get step config by ID"""
+        for step in scenario['steps']:
+            if step['id'] == step_id:
+                return step
+        return None
+    def _validate_input(self, user_input: str, expected_type: str) -> Optional[str]:
+        """
+        Validate user input
+        Returns error message or None if valid
+        """
+        if expected_type == 'email':
+            if not re.match(r'^[\w\.-]+@[\w\.-]+\.\w+$', user_input):
+                return "Email không hợp lệ. Vui lòng nhập lại (vd: ten@email.com)"
+        elif expected_type == 'phone':
+            # Simple Vietnamese phone validation
+            clean = re.sub(r'[^\d]', '', user_input)
+            if len(clean) < 9 or len(clean) > 11:
+                return "Số điện thoại không hợp lệ. Vui lòng nhập lại (10-11 số)"
+        return None
+    def _handle_branches(
+        self,
+        branches: Dict,
+        user_input: str,
+        scenario_data: Dict
+    ) -> Dict:
+        """
+        Handle branch logic
+        Returns:
+            {"next_step": int, "save_data": {...}}
+        """
+        user_input_lower = user_input.lower().strip()
+        for branch_name, branch_config in branches.items():
+            if branch_name == 'default':
+                continue
+            patterns = branch_config.get('patterns', [])
+            for pattern in patterns:
+                if pattern.lower() in user_input_lower:
+                    return {
+                        "next_step": branch_config['next_step'],
+                        "save_data": branch_config.get('save_data', {})
+                    }
+        # Default branch
+        default_name = branches.get('default_branch', list(branches.keys())[0])
+        default_branch = branches.get(default_name, list(branches.values())[0])
+        return {
+            "next_step": default_branch['next_step'],
+            "save_data": default_branch.get('save_data', {})
+        }
+    def _build_message(
+        self,
+        step_config: Dict,
+        scenario_data: Dict,
+        rag_service: Optional[Any]
+    ) -> str:
+        """
+        Build bot message with 3-layer data resolution:
+        1. scenario_data (initial + user inputs)
+        2. RAG results (if rag_query_template exists)
+        3. Merged template vars
+        """
+        # Layer 1: Base data (initial + user inputs)
+        template_data = {
+            'event_name': scenario_data.get('event_name', 'sự kiện này'),
+            'mood': scenario_data.get('mood', ''),
+            'interest': scenario_data.get('interest', ''),
+            **scenario_data  # Include all scenario data
+        }
+        # Layer 2: RAG query (if specified)
+        if 'rag_query_template' in step_config:
+            try:
+                # Build query from template
+                query = step_config['rag_query_template'].format(**template_data)
+                if rag_service:
+                    # Execute RAG search
+                    results = self._execute_rag_query(query, rag_service)
+                    template_data['rag_results'] = results
+                else:
+                    # Fallback if no RAG service
+                    template_data['rag_results'] = "(Đang tải thông tin...)"
+            except Exception as e:
+                print(f"⚠ RAG query error: {e}")
+                template_data['rag_results'] = ""
+        # Layer 3: Build final message
+        if 'bot_message_template' in step_config:
+            try:
+                return step_config['bot_message_template'].format(**template_data)
+            except KeyError as e:
+                print(f"⚠ Template var missing: {e}")
+                # Fallback to message without placeholders
+                return step_config.get('bot_message', step_config['bot_message_template'])
+        return step_config.get('bot_message', '')
+    def _execute_rag_query(self, query: str, rag_service: Any) -> str:
+        """
+        Execute RAG query and format results
+        Returns formatted string of top results
+        """
+        try:
+            # Simple search (we'll integrate with actual RAG later)
+            # For now, return placeholder
+            return f"[Kết quả tìm kiếm cho: {query}]\n1. Sự kiện A\n2. Sự kiện B"
+        except Exception as e:
+            print(f"⚠ RAG execution error: {e}")
+            return ""
+# Test
+if __name__ == "__main__":
+    engine = ScenarioEngine()
+    print("\nTest: Start price_inquiry scenario")
+    result = engine.start_scenario("price_inquiry")
+    print(f"Bot: {result['message']}")
+    print(f"State: {result['new_state']}")
+    print("\nTest: User answers 'Show A'")
+    state = result['new_state']
+    result = engine.next_step(
+        scenario_id=state['active_scenario'],
+        current_step=state['scenario_step'],
+        user_input="Show A",
+        scenario_data=state['scenario_data']
+    )
+    print(f"Bot: {result['message']}")
+    print("\nTest: User answers 'nhóm'")
+    state = result['new_state']
+    result = engine.next_step(
+        scenario_id=state['active_scenario'],
+        current_step=state['scenario_step'],
+        user_input="nhóm 5 người",
+        scenario_data=state['scenario_data']
+    )
+    print(f"Bot: {result['message']}")
+    print(f"Data collected: {result['new_state']['scenario_data']}")

scenarios/event_recommendation.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "scenario_id": "event_recommendation",
+  "name": "Gợi ý sự kiện cá nhân hoá",
+  "description": "Gợi ý sự kiện dựa trên sở thích và mood của user",
+  "triggers": ["gợi ý", "event nào hợp", "nên đi show nào"],
+  "steps": [
+    {
+      "id": 1,
+      "bot_message": "Hello! 👋 Bạn muốn tìm sự kiện theo vibe gì nè? Chill – Sôi động – Hài – Workshop?",
+      "expected_input_type": "interest_tag",
+      "next_step": 2
+    },
+    {
+      "id": 2,
+      "bot_message_template": "Mình hiểu rồi! Để mình tìm sự kiện hợp vibe **{interest_tag}** nha",
+      "rag_query_template": "sự kiện phù hợp với {interest_tag}",
+      "next_step": 3
+    },
+    {
+      "id": 3,
+      "bot_message_template": "Đây là 2–3 event hợp với bạn nè:\n{rag_results}\nBạn có muốn xem chi tiết event nào không?",
+      "expected_input_type": "event_name",
+      "next_step": 4
+    },
+    {
+      "id": 4,
+      "bot_message": "Bạn cần xem: giá – line-up – địa điểm – hay thời gian của sự kiện?",
+      "expected_input_type": "choice",
+      "branches": {
+        "price": {
+          "patterns": ["giá", "price"],
+          "next_step": 5
+        },
+        "lineup": {
+          "patterns": ["lineup", "line-up", "nghệ sĩ"],
+          "next_step": 6
+        },
+        "location": {
+          "patterns": ["địa điểm", "ở đâu", "location"],
+          "next_step": 7
+        },
+        "time": {
+          "patterns": ["thời gian", "khi nào", "date", "time"],
+          "next_step": 8
+        }
+      },
+      "default_branch": "price"
+    },
+    {
+      "id": 5,
+      "bot_message_template": "Giá vé event {event_name} nè:\n{rag_results}",
+      "rag_query_template": "giá vé {event_name}",
+      "next_step": 9
+    },
+    {
+      "id": 6,
+      "bot_message_template": "Lineup / nghệ sĩ của event {event_name} là:\n{rag_results}",
+      "rag_query_template": "lineup {event_name}",
+      "next_step": 9
+    },
+    {
+      "id": 7,
+      "bot_message_template": "Địa điểm tổ chức event {event_name}:\n{rag_results}",
+      "rag_query_template": "địa điểm {event_name}",
+      "next_step": 9
+    },
+    {
+      "id": 8,
+      "bot_message_template": "Thời gian / lịch diễn của event {event_name}:\n{rag_results}",
+      "rag_query_template": "thời gian {event_name}",
+      "next_step": 9
+    },
+    {
+      "id": 9,
+      "bot_message": "Bạn muốn mình lưu event này vào email để bạn theo dõi dễ hơn không?",
+      "expected_input_type": "choice",
+      "branches": {
+        "yes": {
+          "patterns": ["có", "yes", "ok"],
+          "next_step": 10
+        },
+        "no": {
+          "patterns": ["không", "no"],
+          "next_step": 11
+        }
+      },
+      "default_branch": "no"
+    },
+    {
+      "id": 10,
+      "bot_message": "Cho mình xin email để gửi bản tóm tắt event kèm link mua vé?",
+      "expected_input_type": "email",
+      "validation": "email",
+      "action": "send_event_summary_email",
+      "next_step": 12
+    },
+    {
+      "id": 11,
+      "bot_message": "Okie, bạn cần event theo vibe khác không nè? 😄",
+      "end_scenario": true
+    },
+    {
+      "id": 12,
+      "bot_message": "Đã gửi email cho bạn nha! ✨",
+      "end_scenario": true
+    }
+  ]
+}

scenarios/exit_intent_rescue.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "scenario_id": "exit_intent_rescue",
+  "name": "Giữ chân khi user chuẩn bị thoát",
+  "description": "Kịch bản gửi ưu đãi nhẹ để ngăn user thoát",
+  "triggers": ["exit_intent"],
+  "steps": [
+    {
+      "id": 1,
+      "bot_message": "Khoan đã 😭 Trước khi bạn rời đi… chúng mình sắp có mã giảm 5% cho bất kỳ vé nào. Bạn muốn nhận không?",
+      "expected_input_type": "choice",
+      "branches": {
+        "yes": {
+          "patterns": ["có", "yes", "ok", "muốn", "quan tâm"],
+          "next_step": 2
+        },
+        "no": {
+          "patterns": ["không", "no", "ko", "chưa hợp", "không thích"],
+          "next_step": 3
+        }
+      },
+      "default_branch": "no"
+    },
+    {
+      "id": 2,
+      "bot_message": "Cho mình xin email để gửi mã nhé!",
+      "expected_input_type": "email",
+      "validation": "email",
+      "action": "send_coupon_email",
+      "next_step": 4
+    },
+    {
+      "id": 3,
+      "bot_message": "Okie, nếu cần gì bạn cứ gọi mình nha 💛",
+      "end_scenario": true
+    }
+  ]
+}

scenarios/mini_survey_lead.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "scenario_id": "mini_survey_lead",
+  "name": "Mini survey thu lead",
+  "description": "Kịch bản khảo sát 3 câu hỏi để thu email nhẹ nhàng",
+  "triggers": ["survey", "khảo sát", "quiz"],
+  "steps": [
+    {
+      "id": 1,
+      "bot_message": "Đi event kiểu gì hợp vibe bạn nhất nè? 😆 (Chọn 1)\n• Chill\n• Sôi động\n• Hài\n• Học hỏi",
+      "expected_input_type": "choice",
+      "save_data": {"preference": "{user_choice}"},
+      "next_step": 2
+    },
+    {
+      "id": 2,
+      "bot_message": "Bạn thường đi event: 1 mình – bạn thân – nhóm?",
+      "expected_input_type": "choice",
+      "save_data": {"group_type": "{user_choice}"},
+      "next_step": 3
+    },
+    {
+      "id": 3,
+      "bot_message": "Bạn thích mức giá nào để thoải mái nhất? (<500k / 500–1tr / >1tr)",
+      "expected_input_type": "choice",
+      "save_data": {"budget": "{user_choice}"},
+      "next_step": 4
+    },
+    {
+      "id": 4,
+      "bot_message": "Doneee! 🎉 Mình có sự kiện này 'Gợi ý sự kiện hợp vibe 2025' tổng hợp theo câu trả lời của bạn. Gửi email để nhận nhé?",
+      "expected_input_type": "email",
+      "validation": "email",
+      "action": "send_survey_pdf",
+      "next_step": 5
+    }
+  ]
+}

scenarios/mood_recommendation.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "scenario_id": "mood_recommendation",
+  "name": "Gợi ý theo mood",
+  "description": "Gợi ý sự kiện theo tâm trạng hiện tại",
+  "triggers": ["chán", "muốn đi đâu", "gợi ý mood"],
+  "steps": [
+    {
+      "id": 1,
+      "bot_message": "Mood hôm nay của bạn là gì nè? 😊 (Chill / Sôi động / Muốn cười / Muốn học hỏi)",
+      "expected_input_type": "interest_tag",
+      "save_data": {"mood": "{user_choice}"},
+      "next_step": 2
+    },
+    {
+      "id": 2,
+      "bot_message_template": "Để mình tìm event hợp mood **{mood}** của bạn nha 🔍",
+      "rag_query_template": "sự kiện hợp mood {mood}",
+      "next_step": 3
+    },
+    {
+      "id": 3,
+      "bot_message_template": "Có mấy event hợp mood bạn nè:\n{rag_results}\nBạn muốn xem chi tiết event nào?",
+      "expected_input_type": "event_name",
+      "next_step": 4
+    },
+    {
+      "id": 4,
+      "bot_message": "Bạn muốn nhận gợi ý mỗi tuần theo mood không?",
+      "expected_input_type": "choice",
+      "branches": {
+        "yes": {
+          "patterns": ["có", "yes"],
+          "next_step": 5
+        },
+        "no": {
+          "patterns": ["không"],
+          "next_step": 6
+        }
+      }
+    },
+    {
+      "id": 5,
+      "bot_message": "Cho mình email để gửi gợi ý hàng tuần nhé!",
+      "expected_input_type": "email",
+      "validation": "email",
+      "action": "save_mood_subscription",
+      "next_step": 6
+    },
+    {
+      "id": 6,
+      "bot_message": "Có dịp rồi hãy quay lại để xem các sự kiện khác nhé! 😊",
+      "end_scenario": true
+    }
+  ]
+}

scenarios/post_event_feedback.json ADDED Viewed

	@@ -0,0 +1,115 @@

+{
+  "scenario_id": "post_event_feedback",
+  "name": "Hậu sự kiện – Thu thập feedback & nuôi lại lead",
+  "description": "Kịch bản chăm sóc khách sau sự kiện: xin đánh giá, phân loại cảm xúc, gợi ý sự kiện phù hợp và thu lead dài hạn.",
+  "triggers": ["feedback", "đánh giá", "hậu sự kiện", "event review", "review", "đi sự kiện xong"],
+  "steps": [
+    {
+      "id": 1,
+      "bot_message_template": "Hello 👋 Cảm ơn bạn đã tham dự *{event_name}* hôm qua! Bạn thấy trải nghiệm tổng thể như thế nào?",
+      "expected_input_type": "rating",
+      "timeout_seconds": 20,
+      "timeout_message": "Bạn rảnh gửi mình 1–2 câu đánh giá nhé, để team cải thiện ạ 🙏",
+      "rag_query_template": "thông tin về {event_name}",
+      "next_step": 2
+    },
+    {
+      "id": 2,
+      "bot_message": "Cảm ơn bạn! Nếu tiện, cho mình hỏi thêm → Điều gì bạn thích nhất ở sự kiện?",
+      "expected_input_type": "text",
+      "save_data": {"liked_point": "@user_input"},
+      "next_step": 3
+    },
+    {
+      "id": 3,
+      "bot_message": "Cảm ơn bạn 🙏 Còn điều gì bạn nghĩ chúng mình có thể cải thiện hơn ở lần sau?",
+      "expected_input_type": "text",
+      "save_data": {"improve_suggestion": "@user_input"},
+      "next_step": 4
+    },
+    {
+      "id": 4,
+      "bot_message": "Cho mình hỏi chút nữa nha… Nội dung sự kiện có hợp với sở thích của bạn không?",
+      "expected_input_type": "choice",
+      "branches": {
+        "yes": {
+          "patterns": ["có", "yes", "đúng", "ổn", "hop"],
+          "save_data": {"content_fit": true},
+          "next_step": 5
+        },
+        "no": {
+          "patterns": ["không", "no", "ko", "chưa hợp", "không thích"],
+          "save_data": {"content_fit": false},
+          "next_step": 6
+        }
+      },
+      "default_branch": "yes"
+    },
+    {
+      "id": 5,
+      "bot_message": "Tuyệt quá! Mình note lại rồi nè. À, bạn có muốn nhận list sự kiện phù hợp với gu của bạn trong 1 tháng tới không?",
+      "expected_input_type": "choice",
+      "next_step": 7,
+      "branches": {
+        "yes": {
+          "patterns": ["có", "yes", "ok", "muốn", "quan tâm"],
+          "next_step": 7
+        },
+        "no": {
+          "patterns": ["không", "no", "ko"],
+          "next_step": 10
+        }
+      },
+      "default_branch": "no"
+    },
+    {
+      "id": 6,
+      "bot_message": "Oh mình hiểu rồi nè! Để lần sau team chọn nội dung sát hơn với gu của bạn. Gu của bạn nghiêng về kiểu nào nè?",
+      "expected_input_type": "choice",
+      "branches": {
+        "music": {
+          "patterns": ["nhạc", "music", "concert", "live"],
+          "save_data": {"preferred_genre": "music"},
+          "next_step": 5
+        },
+        "talkshow": {
+          "patterns": ["talkshow", "trò chuyện", "chia sẻ", "speaker"],
+          "save_data": {"preferred_genre": "talkshow"},
+          "next_step": 5
+        },
+        "workshop": {
+          "patterns": ["workshop", "học", "lớp", "training"],
+          "save_data": {"preferred_genre": "workshop"},
+          "next_step": 5
+        }
+      },
+      "default_branch": "music"
+    },
+    {
+      "id": 7,
+      "bot_message": "Cho mình xin email để gửi danh sách sự kiện theo đúng gu của bạn nha 💌",
+      "expected_input_type": "email",
+      "validation": "email",
+      "action": "save_lead_email",
+      "next_step": 8
+    },
+    {
+      "id": 8,
+      "bot_message": "Cảm ơn bạn! Nếu muốn nhận thông báo vé hot/early bird qua SMS thì cho mình xin số nhé 📱",
+      "expected_input_type": "phone",
+      "validation": "phone",
+      "action": "save_lead_phone",
+      "next_step": 9
+    },
+    {
+      "id": 9,
+      "bot_message": "Done! Team sẽ gửi bạn list sự kiện xịn nhất hàng tháng 🎉 Cảm ơn bạn đã đồng hành ❤️",
+      "end_scenario": true
+    },
+    {
+      "id": 10,
+      "bot_message": "Không sao nha! Nếu sau này bạn muốn xem thêm sự kiện hay ho khác cứ nhắn mình nha 💛",
+      "end_scenario": true
+    }
+  ]
+}

scenarios/price_inquiry.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "scenario_id": "price_inquiry",
+  "name": "Hỏi giá vé",
+  "description": "Kịch bản tư vấn giá vé và gửi PDF bảng giá",
+  "triggers": ["giá vé", "bao nhiêu", "ticket price"],
+  "steps": [
+    {
+      "id": 1,
+      "bot_message": "Hello 👋 Bạn muốn xem giá của show nào để mình báo đúng nè?",
+      "expected_input_type": "event_name",
+      "timeout_seconds": 20,
+      "timeout_message": "Bạn cần hỗ trợ gì không ạ?",
+      "next_step": 2
+    },
+    {
+      "id": 2,
+      "bot_message": "Bạn đi 1 mình hay đi nhóm để mình lọc loại vé phù hợp nha?",
+      "expected_input_type": "choice",
+      "branches": {
+        "alone": {
+          "patterns": ["1 mình", "một mình", "alone", "solo", "1", "mình"],
+          "next_step": 3,
+          "save_data": {"group_size": 1, "group_discount": false}
+        },
+        "group": {
+          "patterns": ["nhóm", "group", "bạn bè", "đi cùng", "nhiều người"],
+          "next_step": 3,
+          "save_data": {"group_size": "multiple", "group_discount": true}
+        }
+      },
+      "default_branch": "alone"
+    },
+    {
+      "id": 3,
+      "bot_message_template": "Rồi nè! Show này đang có 3–5 hạng vé, giá từ khoảng {price_min} đến {price_max}.",
+      "rag_query_template": "giá vé {event_name}",
+      "next_step": 4
+    },
+    {
+      "id": 4,
+      "bot_message": "Bạn muốn xem tóm tắt nhanh hay bản full có sơ đồ ghế & vị trí view?",
+      "expected_input_type": "choice",
+      "branches": {
+        "summary": {
+          "patterns": ["tóm tắt", "nhanh", "summary", "ngắn"],
+          "next_step": 10
+        },
+        "full": {
+          "patterns": ["full", "đầy đủ", "sơ đồ", "chi tiết", "pdf"],
+          "next_step": 5
+        }
+      },
+      "default_branch": "summary"
+    },
+    {
+      "id": 5,
+      "bot_message": "Nice! File PDF full nhìn rõ từng khu ghế → tránh mua nhầm 🥲\nMình gửi file qua email để bạn lưu lại cho dễ xem nha, cho mình xin email?",
+      "expected_input_type": "email",
+      "validation": "email",
+      "next_step": 6
+    },
+    {
+      "id": 6,
+      "bot_message": "Đã gửi vào email bạn rồi nè 👌",
+      "action": "send_pdf_email",
+      "next_step": 7
+    },
+    {
+      "id": 7,
+      "bot_message": "Bạn muốn mình nhắc bạn khi có vé Early Bird hoặc sắp sold-out không?",
+      "expected_input_type": "choice",
+      "branches": {
+        "yes": {
+          "patterns": ["có", "yes", "ok", "được", "muốn"],
+          "next_step": 8
+        },
+        "no": {
+          "patterns": ["không", "no", "thôi", "ko"],
+          "next_step": 9
+        }
+      },
+      "default_branch": "no"
+    },
+    {
+      "id": 8,
+      "bot_message": "Cho mình xin số để mình SMS cho bạn ạ.",
+      "expected_input_type": "phone",
+      "validation": "phone",
+      "action": "save_lead_phone",
+      "next_step": 10
+    },
+    {
+      "id": 9,
+      "bot_message": "Okii, cứ hỏi mình bất kì lúc nào nha ✨",
+      "end_scenario": true
+    },
+    {
+      "id": 10,
+      "bot_message": "Cảm ơn bạn! Hẹn gặp lại ^^",
+      "end_scenario": true
+    }
+  ]
+}

stream_utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+SSE (Server-Sent Events) Utilities
+Format streaming responses for real-time chat
+"""
+import json
+from typing import Dict, Any, AsyncGenerator
+import asyncio
+def format_sse(event: str, data: Any) -> str:
+    """
+    Format data as SSE message
+    Args:
+        event: Event type (token, status, done, error)
+        data: Data payload (string or dict)
+    Returns:
+        Formatted SSE string
+    Example:
+        format_sse("token", "Hello")
+        # "event: token\ndata: Hello\n\n"
+    """
+    if isinstance(data, dict):
+        data_str = json.dumps(data, ensure_ascii=False)
+    else:
+        data_str = str(data)
+    return f"event: {event}\ndata: {data_str}\n\n"
+async def simulate_typing(
+    text: str,
+    chars_per_chunk: int = 3,
+    delay_ms: float = 20
+) -> AsyncGenerator[str, None]:
+    """
+    Simulate typing effect by yielding text in chunks
+    Args:
+        text: Full text to stream
+        chars_per_chunk: Characters per chunk
+        delay_ms: Milliseconds delay between chunks
+    Yields:
+        Text chunks
+    Example:
+        async for chunk in simulate_typing("Hello world", chars_per_chunk=2):
+            yield format_sse("token", chunk)
+    """
+    for i in range(0, len(text), chars_per_chunk):
+        chunk = text[i:i + chars_per_chunk]
+        yield chunk
+        await asyncio.sleep(delay_ms / 1000)
+async def stream_text_slowly(
+    text: str,
+    event_type: str = "token",
+    chars_per_chunk: int = 3,
+    delay_ms: float = 20
+) -> AsyncGenerator[str, None]:
+    """
+    Stream text with typing effect in SSE format
+    Args:
+        text: Text to stream
+        event_type: SSE event type
+        chars_per_chunk: Characters per chunk
+        delay_ms: Delay between chunks
+    Yields:
+        SSE formatted chunks
+    """
+    async for chunk in simulate_typing(text, chars_per_chunk, delay_ms):
+        yield format_sse(event_type, chunk)
+# Event type constants
+EVENT_STATUS = "status"
+EVENT_TOKEN = "token"
+EVENT_DONE = "done"
+EVENT_ERROR = "error"
+EVENT_METADATA = "metadata"

tools_service.py CHANGED Viewed

@@ -1,233 +1,250 @@
-"""
-Tools Service for LLM Function Calling
-HuggingFace-compatible với prompt engineering
-"""
-import httpx
-from typing import List, Dict, Any, Optional
-import json
-import asyncio
-class ToolsService:
-    """
-    Manages external API tools that LLM can call via prompt engineering
-    """
-    def __init__(self, base_url: str = "https://www.festavenue.site"):
-        self.base_url = base_url
-        self.client = httpx.AsyncClient(timeout=10.0)
-    def get_tools_prompt(self) -> str:
-        """
-        Return prompt instruction for HuggingFace LLM về available tools
-        """
-        return """
-AVAILABLE TOOLS:
-Bạn có thể sử dụng các công cụ sau để lấy thông tin chi tiết:
-1. get_event_details(event_code: str)
-   - Mô tả: Lấy thông tin đầy đủ về một sự kiện từ hệ thống
-   - Khi nào dùng: Khi user hỏi về ngày giờ chính xác, địa điểm cụ thể, thông tin liên hệ, hoặc chi tiết khác về một sự kiện
-   - Tham số: event_code = ID sự kiện (LẤY TỪ metadata.id_use TRONG CONTEXT, KHÔNG PHẢI tên sự kiện!)
-   VÍ DỤ QUAN TRỌNG:
-   Context có:
-   ```
-   metadata: {
-     "id_use": "69194cf61c0eda56688806f7",  ← DÙNG CÁI NÀY!
-     "texts": ["Y-CONCERT - Festival âm nhạc..."]
-   }
-   ```
-   → Dùng event_code = "69194cf61c0eda56688806f7" (NOT "Y-CONCERT")
-CÚ PHÁP GỌI TOOL:
-Khi bạn cần gọi tool, hãy trả lời CHÍNH XÁC theo format JSON này:
-```json
-{
-  "tool_call": true,
-  "function_name": "get_event_details",
-  "arguments": {
-    "event_code": "69194cf61c0eda56688806f7"
-  },
-  "reason": "Cần lấy thông tin chính xác về ngày giờ tổ chức"
-}
-```
-QUAN TRỌNG:
-- event_code PHẢI LÀ metadata.id_use từ context (dạng MongoDB ObjectId)
-- KHÔNG dùng tên sự kiện như "Y-CONCERT" làm event_code
-- CHỈ trả JSON khi BẮT BUỘC cần gọi tool
-- Nếu có thể trả lời từ context sẵn có, đừng gọi tool
-- Sau khi nhận kết quả từ tool, hãy trả lời user bằng ngôn ngữ tự nhiên
-"""
-    async def parse_and_execute(self, llm_response: str) -> Optional[Dict[str, Any]]:
-        """
-        Parse LLM response và execute tool nếu có
-        Returns:
-            None nếu không có tool call
-            Dict với tool result nếu có tool call
-        """
-        # Try to extract JSON from response
-        try:
-            # Tìm JSON block trong response
-            if "```json" in llm_response:
-                json_start = llm_response.find("```json") + 7
-                json_end = llm_response.find("```", json_start)
-                json_str = llm_response[json_start:json_end].strip()
-            elif "{" in llm_response and "}" in llm_response:
-                # Fallback: tìm JSON object đầu tiên
-                json_start = llm_response.find("{")
-                json_end = llm_response.rfind("}") + 1
-                json_str = llm_response[json_start:json_end]
-            else:
-                return None
-            tool_call = json.loads(json_str)
-            # Handle multiple JSON formats from LLM
-            # Format 1: HF API nested wrapper
-            # {"name": "tool_call", "arguments": {"tool_call": true, ...}}
-            if "name" in tool_call and "arguments" in tool_call and isinstance(tool_call["arguments"], dict):
-                if "tool_call" in tool_call["arguments"]:
-                    tool_call = tool_call["arguments"]  # Unwrap
-            # Format 2: Direct tool name format
-            # {"name": "tool.get_event_details", "arguments": {"event_code": "..."}}
-            if "name" in tool_call and "arguments" in tool_call:
-                function_name = tool_call["name"]
-                # Remove "tool." prefix if exists
-                if function_name.startswith("tool."):
-                    function_name = function_name.replace("tool.", "")
-                # Convert to standard format
-                tool_call = {
-                    "tool_call": True,
-                    "function_name": function_name,
-                    "arguments": tool_call["arguments"],
-                    "reason": "Converted from alternate format"
-                }
-            # Validate tool call structure
-            if not tool_call.get("tool_call"):
-                return None
-            function_name = tool_call.get("function_name")
-            arguments = tool_call.get("arguments", {})
-            # Execute tool
-            if function_name == "get_event_details":
-                result = await self._get_event_details(arguments.get("event_code"))
-                return {
-                    "function": function_name,
-                    "arguments": arguments,
-                    "result": result
-                }
-            else:
-                return {
-                    "function": function_name,
-                    "arguments": arguments,
-                    "result": {"success": False, "error": f"Unknown function: {function_name}"}
-                }
-        except (json.JSONDecodeError, KeyError, ValueError) as e:
-            # Không phải tool call, response bình thường
-            return None
-    async def _get_event_details(self, event_code: str) -> Dict[str, Any]:
-        """
-        Call getEventByEventCode API
-        """
-        print(f"\n=== CALLING API get_event_details ===")
-        print(f"Event Code: {event_code}")
-        try:
-            url = f"https://hoalacrent.io.vn/api/v0/event/get-event-by-event-code"
-            params = {"eventCode": event_code}
-            print(f"URL: {url}")
-            print(f"Params: {params}")
-            response = await self.client.get(url, params=params)
-            print(f"Status Code: {response.status_code}")
-            response.raise_for_status()
-            data = response.json()
-            print(f"Response Data Keys: {list(data.keys()) if data else 'None'}")
-            print(f"Has 'data' field: {'data' in data}")
-            # Extract relevant fields
-            event = data.get("data", {})
-            if not event:
-                return {
-                    "success": False,
-                    "error": "Event not found",
-                    "message": f"Không tìm thấy sự kiện với mã {event_code}"
-                }
-            # Extract location với nested address structure
-            location_data = event.get("location", {})
-            location = {
-                "address": {
-                    "street": location_data.get("address", {}).get("street", ""),
-                    "city": location_data.get("address", {}).get("city", ""),
-                    "state": location_data.get("address", {}).get("state", ""),
-                    "postalCode": location_data.get("address", {}).get("postalCode", ""),
-                    "country": location_data.get("address", {}).get("country", "")
-                },
-                "coordinates": {
-                    "latitude": location_data.get("coordinates", {}).get("latitude"),
-                    "longitude": location_data.get("coordinates", {}).get("longitude")
-                }
-            }
-            # Build event URL
-            event_code = event.get("eventCode")
-            event_url = f"https://www.festavenue.site/user/event/{event_code}" if event_code else None
-            return {
-                "success": True,
-                "event_code": event_code,
-                "event_name": event.get("eventName"),
-                "event_url": event_url,  # NEW: Direct link to event page
-                "description": event.get("description"),
-                "short_description": event.get("shortDescription"),
-                "start_time": event.get("startTimeEventTime"),
-                "end_time": event.get("endTimeEventTime"),
-                "start_sale": event.get("startTicketSaleTime"),
-                "end_sale": event.get("endTicketSaleTime"),
-                "location": location,  # Full nested structure
-                "contact": {
-                    "email": event.get("publicContactEmail"),
-                    "phone": event.get("publicContactPhone"),
-                    "website": event.get("website")
-                },
-                "capacity": event.get("capacity"),
-                "hashtags": event.get("hashtags", [])
-            }
-            print(f"Successfully extracted event data for: {event.get('eventName')}")
-            print(f"=== API CALL COMPLETE ===")
-            return result
-        except httpx.HTTPStatusError as e:
-            return {
-                "success": False,
-                "error": f"HTTP {e.response.status_code}",
-                "message": f"API trả về lỗi khi truy vấn sự kiện {event_code}"
-            }
-        except Exception as e:
-            return {
-                "success": False,
-                "error": str(e),
-                "message": "Không thể kết nối đến API để lấy thông tin sự kiện"
-            }
-    async def close(self):
-        """Close HTTP client"""
-        await self.client.aclose()

+"""
+Tools Service for LLM Function Calling
+HuggingFace-compatible với prompt engineering
+"""
+import httpx
+from typing import List, Dict, Any, Optional
+import json
+import asyncio
+class ToolsService:
+    """
+    Manages external API tools that LLM can call via prompt engineering
+    """
+    def __init__(self, base_url: str = "https://www.festavenue.site"):
+        self.base_url = base_url
+        self.client = httpx.AsyncClient(timeout=10.0)
+    def get_tools_prompt(self) -> str:
+        """
+        Return prompt instruction for HuggingFace LLM về available tools
+        """
+        return """
+AVAILABLE TOOLS:
+Bạn có thể sử dụng các công cụ sau để lấy thông tin chi tiết:
+1. get_event_details(event_code: str)
+   - Mô tả: Lấy thông tin đầy đủ về một sự kiện từ hệ thống
+   - Khi nào dùng: Khi user hỏi về ngày giờ chính xác, địa điểm cụ thể, thông tin liên hệ, hoặc chi tiết khác về một sự kiện
+   - Tham số: event_code = ID sự kiện (LẤY TỪ metadata.id_use TRONG CONTEXT, KHÔNG PHẢI tên sự kiện!)
+   VÍ DỤ QUAN TRỌNG:
+   Context có:
+   ```
+   metadata: {
+     "id_use": "69194cf61c0eda56688806f7",  ← DÙNG CÁI NÀY!
+     "texts": ["Y-CONCERT - Festival âm nhạc..."]
+   }
+   ```
+   → Dùng event_code = "69194cf61c0eda56688806f7" (NOT "Y-CONCERT")
+CÚ PHÁP GỌI TOOL:
+Khi bạn cần gọi tool, hãy trả lời CHÍNH XÁC theo format JSON này:
+```json
+{
+  "tool_call": true,
+  "function_name": "get_event_details",
+  "arguments": {
+    "event_code": "69194cf61c0eda56688806f7"
+  },
+  "reason": "Cần lấy thông tin chính xác về ngày giờ tổ chức"
+}
+```
+QUAN TRỌNG:
+- event_code PHẢI LÀ metadata.id_use từ context (dạng MongoDB ObjectId)
+- KHÔNG dùng tên sự kiện như "Y-CONCERT" làm event_code
+- CHỈ trả JSON khi BẮT BUỘC cần gọi tool
+- Nếu có thể trả lời từ context sẵn có, đừng gọi tool
+- Sau khi nhận kết quả từ tool, hãy trả lời user bằng ngôn ngữ tự nhiên
+"""
+    async def parse_and_execute(self, llm_response: str) -> Optional[Dict[str, Any]]:
+        """
+        Parse LLM response và execute tool nếu có
+        Returns:
+            None nếu không có tool call
+            Dict với tool result nếu có tool call
+        """
+        # Try to extract JSON from response
+        try:
+            # Tìm JSON block trong response
+            if "```json" in llm_response:
+                json_start = llm_response.find("```json") + 7
+                json_end = llm_response.find("```", json_start)
+                json_str = llm_response[json_start:json_end].strip()
+            elif "{" in llm_response and "}" in llm_response:
+                # Fallback: tìm JSON object đầu tiên
+                json_start = llm_response.find("{")
+                json_end = llm_response.rfind("}") + 1
+                json_str = llm_response[json_start:json_end]
+            else:
+                return None
+            tool_call = json.loads(json_str)
+            # Handle multiple JSON formats from LLM
+            # Format 1: HF API nested wrapper
+            # {"name": "tool_call", "arguments": {"tool_call": true, ...}}
+            if "name" in tool_call and "arguments" in tool_call and isinstance(tool_call["arguments"], dict):
+                if "tool_call" in tool_call["arguments"]:
+                    tool_call = tool_call["arguments"]  # Unwrap
+            # Format 2: Direct tool name format
+            # {"name": "tool.get_event_details", "arguments": {"event_code": "..."}}
+            if "name" in tool_call and "arguments" in tool_call:
+                function_name = tool_call["name"]
+                # Remove "tool." prefix if exists
+                if function_name.startswith("tool."):
+                    function_name = function_name.replace("tool.", "")
+                # Convert to standard format
+                tool_call = {
+                    "tool_call": True,
+                    "function_name": function_name,
+                    "arguments": tool_call["arguments"],
+                    "reason": "Converted from alternate format"
+                }
+            # Validate tool call structure
+            if not tool_call.get("tool_call"):
+                return None
+            function_name = tool_call.get("function_name")
+            arguments = tool_call.get("arguments", {})
+            # Execute tool
+            if function_name == "get_event_details":
+                result = await self._get_event_details(arguments.get("event_code"))
+                return {
+                    "function": function_name,
+                    "arguments": arguments,
+                    "result": result
+                }
+            else:
+                return {
+                    "function": function_name,
+                    "arguments": arguments,
+                    "result": {"success": False, "error": f"Unknown function: {function_name}"}
+                }
+        except (json.JSONDecodeError, KeyError, ValueError) as e:
+            # Không phải tool call, response bình thường
+            return None
+    async def _get_event_details(self, event_code: str) -> Dict[str, Any]:
+        """
+        Call getEventByEventCode API
+        """
+        print(f"\n=== CALLING API get_event_details ===")
+        print(f"Event Code: {event_code}")
+        try:
+            url = f"https://hoalacrent.io.vn/api/v0/event/get-event-by-event-code"
+            params = {"eventCode": event_code}
+            print(f"URL: {url}")
+            print(f"Params: {params}")
+            response = await self.client.get(url, params=params)
+            print(f"Status Code: {response.status_code}")
+            # Log raw response for debugging
+            raw_text = response.text
+            print(f"Raw Response Length: {len(raw_text)} chars")
+            print(f"Raw Response Preview (first 200 chars): {raw_text[:200]}")
+            response.raise_for_status()
+            # Try to parse JSON
+            try:
+                data = response.json()
+            except json.JSONDecodeError as e:
+                print(f"JSON Decode Error: {e}")
+                print(f"Full Raw Response: {raw_text}")
+                return {
+                    "success": False,
+                    "error": f"Invalid JSON response from API",
+                    "message": "API trả về dữ liệu không hợp lệ (không phải JSON)",
+                    "raw_response_preview": raw_text[:500]
+                }
+            print(f"Response Data Keys: {list(data.keys()) if data else 'None'}")
+            print(f"Has 'data' field: {'data' in data}")
+            # Extract relevant fields
+            event = data.get("data", {})
+            if not event:
+                return {
+                    "success": False,
+                    "error": "Event not found",
+                    "message": f"Không tìm thấy sự kiện với mã {event_code}"
+                }
+            # Extract location với nested address structure
+            location_data = event.get("location", {})
+            location = {
+                "address": {
+                    "street": location_data.get("address", {}).get("street", ""),
+                    "city": location_data.get("address", {}).get("city", ""),
+                    "state": location_data.get("address", {}).get("state", ""),
+                    "postalCode": location_data.get("address", {}).get("postalCode", ""),
+                    "country": location_data.get("address", {}).get("country", "")
+                },
+                "coordinates": {
+                    "latitude": location_data.get("coordinates", {}).get("latitude"),
+                    "longitude": location_data.get("coordinates", {}).get("longitude")
+                }
+            }
+            # Build event URL
+            event_code = event.get("eventCode")
+            event_url = f"https://www.festavenue.site/user/event/{event_code}" if event_code else None
+            return {
+                "success": True,
+                "event_code": event_code,
+                "event_name": event.get("eventName"),
+                "event_url": event_url,  # NEW: Direct link to event page
+                "description": event.get("description"),
+                "short_description": event.get("shortDescription"),
+                "start_time": event.get("startTimeEventTime"),
+                "end_time": event.get("endTimeEventTime"),
+                "start_sale": event.get("startTicketSaleTime"),
+                "end_sale": event.get("endTicketSaleTime"),
+                "location": location,  # Full nested structure
+                "contact": {
+                    "email": event.get("publicContactEmail"),
+                    "phone": event.get("publicContactPhone"),
+                    "website": event.get("website")
+                },
+                "capacity": event.get("capacity"),
+                "hashtags": event.get("hashtags", [])
+            }
+            print(f"Successfully extracted event data for: {event.get('eventName')}")
+            print(f"=== API CALL COMPLETE ===")
+            return result
+        except httpx.HTTPStatusError as e:
+            return {
+                "success": False,
+                "error": f"HTTP {e.response.status_code}",
+                "message": f"API trả về lỗi khi truy vấn sự kiện {event_code}"
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "message": "Không thể kết nối đến API để lấy thông tin sự kiện"
+            }
+    async def close(self):
+        """Close HTTP client"""
+        await self.client.aclose()