minhvtt commited on
Commit
caa8975
·
verified ·
1 Parent(s): e05f8fb

Upload 14 files

Browse files
Files changed (1) hide show
  1. main.py +46 -156
main.py CHANGED
@@ -683,167 +683,57 @@ async def get_stats():
683
  @app.post("/chat", response_model=ChatResponse)
684
  async def chat(request: ChatRequest):
685
  """
686
- Chat endpoint với RAG
687
-
688
- Body:
689
- - message: User message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
  - use_rag: Enable RAG retrieval (default: true)
691
- - top_k: Number of documents to retrieve (default: 3)
692
- - system_message: System prompt (optional)
693
- - max_tokens: Max tokens for response (default: 512)
694
- - temperature: Temperature for generation (default: 0.7)
695
- - hf_token: Hugging Face token (optional, sẽ dùng env nếu không truyền)
696
-
697
  Returns:
698
- - response: Generated response
 
699
  - context_used: Retrieved context documents
 
700
  - timestamp: Response timestamp
701
  """
702
- try:
703
- # Retrieve context if RAG enabled
704
- context_used = []
705
- rag_stats = None
706
-
707
- if request.use_rag:
708
- if request.use_advanced_rag:
709
- # Use Advanced RAG Pipeline (Best Case 2025)
710
- hf_client = None
711
- if request.hf_token or hf_token:
712
- hf_client = InferenceClient(token=request.hf_token or hf_token)
713
-
714
- documents, stats = advanced_rag.hybrid_rag_pipeline(
715
- query=request.message,
716
- top_k=request.top_k,
717
- score_threshold=request.score_threshold,
718
- use_reranking=request.use_reranking,
719
- use_compression=request.use_compression,
720
- use_query_expansion=request.use_query_expansion,
721
- max_context_tokens=500,
722
- hf_client=hf_client
723
- )
724
-
725
- # Convert to dict format
726
- context_used = [
727
- {
728
- "id": doc.id,
729
- "confidence": doc.confidence,
730
- "metadata": doc.metadata
731
- }
732
- for doc in documents
733
- ]
734
- rag_stats = stats
735
-
736
- # Format context using Advanced RAG
737
- context_text = advanced_rag.format_context_for_llm(documents)
738
- else:
739
- # Basic RAG (fallback)
740
- query_embedding = embedding_service.encode_text(request.message)
741
- results = qdrant_service.search(
742
- query_embedding=query_embedding,
743
- limit=request.top_k,
744
- score_threshold=request.score_threshold
745
- )
746
- context_used = results
747
-
748
- context_text = "\n\nRelevant Context:\n"
749
- for i, doc in enumerate(context_used, 1):
750
- doc_text = doc["metadata"].get("text", "")
751
- if not doc_text:
752
- doc_text = " ".join(doc["metadata"].get("texts", []))
753
- confidence = doc["confidence"]
754
- context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
755
-
756
- # Build system message with context
757
- if request.use_rag and context_used:
758
- if request.use_advanced_rag:
759
- # Use Advanced RAG prompt builder
760
- system_message = advanced_rag.build_rag_prompt(
761
- query=request.message,
762
- context=context_text,
763
- system_message=request.system_message
764
- )
765
- else:
766
- # Basic prompt
767
- # Basic prompt with better instructions
768
- system_message = f"""{request.system_message}
769
-
770
- {context_text}
771
-
772
- HƯỚNG DẪN:
773
- - Sử dụng thông tin từ context trên để trả lời câu h��i.
774
- - Trả lời tự nhiên, thân thiện, không copy nguyên văn.
775
- - Nếu tìm thấy sự kiện, hãy tóm tắt các thông tin quan trọng nhất.
776
- """
777
- else:
778
- system_message = request.system_message
779
-
780
- # Use token from request or fallback to env
781
- token = request.hf_token or hf_token
782
- # Generate response
783
- if not token:
784
- response = f"""[LLM Response Placeholder]
785
-
786
- Context retrieved: {len(context_used)} documents
787
- User question: {request.message}
788
-
789
- To enable actual LLM generation:
790
- 1. Set HUGGINGFACE_TOKEN environment variable, OR
791
- 2. Pass hf_token in request body
792
-
793
- Example:
794
- {{
795
- "message": "Your question",
796
- "hf_token": "hf_xxxxxxxxxxxxx"
797
- }}
798
- """
799
- else:
800
- try:
801
- client = InferenceClient(
802
- token=hf_token,
803
- model="openai/gpt-oss-20b"
804
- )
805
-
806
- # Build messages - luôn dùng cấu trúc chuẩn
807
- # System = instructions + context, User = query
808
- messages = [
809
- {"role": "system", "content": system_message},
810
- {"role": "user", "content": request.message}
811
- ]
812
-
813
- # Generate response
814
- response = ""
815
- for msg in client.chat_completion(
816
- messages,
817
- max_tokens=request.max_tokens,
818
- stream=True,
819
- temperature=request.temperature,
820
- top_p=request.top_p,
821
- ):
822
- choices = msg.choices
823
- if len(choices) and choices[0].delta.content:
824
- response += choices[0].delta.content
825
-
826
- except Exception as e:
827
- response = f"Error generating response with LLM: {str(e)}\n\nContext was retrieved successfully, but LLM generation failed."
828
-
829
- # Save to history
830
- chat_data = {
831
- "user_message": request.message,
832
- "assistant_response": response,
833
- "context_used": context_used,
834
- "timestamp": datetime.utcnow()
835
- }
836
- chat_history_collection.insert_one(chat_data)
837
-
838
- return ChatResponse(
839
- response=response,
840
- context_used=context_used,
841
- timestamp=datetime.utcnow().isoformat(),
842
- rag_stats=rag_stats
843
- )
844
-
845
- except Exception as e:
846
- raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
847
 
848
 
849
  @app.post("/documents", response_model=AddDocumentResponse)
 
683
  @app.post("/chat", response_model=ChatResponse)
684
  async def chat(request: ChatRequest):
685
  """
686
+ Multi-turn conversational chatbot với RAG + Function Calling
687
+
688
+ Features:
689
+ - Server-side session management (tự động tạo session_id)
690
+ - ✅ Conversation history tracking
691
+ - ✅ RAG context retrieval
692
+ - ✅ Function calling (gọi API khi cần thông tin chi tiết)
693
+
694
+ Flow:
695
+ 1. Request đầu tiên: Không cần session_id → BE tạo mới
696
+ 2. Request tiếp theo: Gửi session_id từ response trước → BE nhớ context
697
+
698
+ Example:
699
+ ```
700
+ # Lần 1
701
+ POST /chat { "message": "Tìm sự kiện hòa nhạc" }
702
+ Response: { "session_id": "abc-123", "response": "..." }
703
+
704
+ # Lần 2 (follow-up)
705
+ POST /chat { "message": "Ngày tổ chức chính xác?", "session_id": "abc-123" }
706
+ Response: { "session_id": "abc-123", "response": "..." } # Bot hiểu context
707
+ ```
708
+
709
+ Body Parameters:
710
+ - message: User message (required)
711
+ - session_id: Session ID cho multi-turn (optional, tự tạo nếu không có)
712
  - use_rag: Enable RAG retrieval (default: true)
713
+ - enable_tools: Enable function calling (default: true)
714
+ - top_k: Number of documents (default: 3)
715
+ - temperature: LLM temperature (default: 0.7)
716
+
 
 
717
  Returns:
718
+ - response: AI generated response
719
+ - session_id: Session identifier (TRẢ VỀ trong mọi trường hợp)
720
  - context_used: Retrieved context documents
721
+ - tool_calls: API calls made (if any)
722
  - timestamp: Response timestamp
723
  """
724
+ # Import chat endpoint logic
725
+ from chat_endpoint import chat_endpoint
726
+
727
+ return await chat_endpoint(
728
+ request=request,
729
+ conversation_service=conversation_service,
730
+ tools_service=tools_service,
731
+ advanced_rag=advanced_rag,
732
+ embedding_service=embedding_service,
733
+ qdrant_service=qdrant_service,
734
+ chat_history_collection=chat_history_collection,
735
+ hf_token=hf_token
736
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
 
738
 
739
  @app.post("/documents", response_model=AddDocumentResponse)