minhvtt commited on
Commit
ca36499
·
verified ·
1 Parent(s): 883a213

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +66 -24
main.py CHANGED
@@ -10,6 +10,7 @@ import os
10
  from datetime import datetime
11
  from pymongo import MongoClient
12
  from huggingface_hub import InferenceClient
 
13
  from embedding_service import JinaClipEmbeddingService
14
  from qdrant_service import QdrantVectorService
15
  from advanced_rag import AdvancedRAG
@@ -124,7 +125,7 @@ class ChatRequest(BaseModel):
124
  message: str
125
  use_rag: bool = True
126
  top_k: int = 3
127
- system_message: Optional[str] = """Bạn là trợ lý AI chuyên biệt cho hệ thống quản lý sự kiện và mạng hội.
128
  Vai trò của bạn là trả lời các câu hỏi CHÍNH XÁC dựa trên dữ liệu được cung cấp từ hệ thống.
129
 
130
  Quy tắc tuyệt đối:
@@ -685,29 +686,69 @@ async def chat(request: ChatRequest):
685
  try:
686
  # Retrieve context if RAG enabled
687
  context_used = []
 
 
688
  if request.use_rag:
689
- # Generate query embedding
690
- query_embedding = embedding_service.encode_text(request.message)
691
-
692
- # Search in Qdrant
693
- results = qdrant_service.search(
694
- query_embedding=query_embedding,
695
- limit=request.top_k,
696
- score_threshold=0.5
697
- )
698
- context_used = results
699
-
700
- # Build context text
701
- context_text = ""
702
- if context_used:
703
- context_text = "\n\nRelevant Context:\n"
704
- for i, doc in enumerate(context_used, 1):
705
- doc_text = doc["metadata"].get("text", "")
706
- confidence = doc["confidence"]
707
- context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
708
-
709
- # Add context to system message
710
- system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  else:
712
  system_message = request.system_message
713
 
@@ -771,7 +812,8 @@ Example:
771
  return ChatResponse(
772
  response=response,
773
  context_used=context_used,
774
- timestamp=datetime.utcnow().isoformat()
 
775
  )
776
 
777
  except Exception as e:
 
10
  from datetime import datetime
11
  from pymongo import MongoClient
12
  from huggingface_hub import InferenceClient
13
+
14
  from embedding_service import JinaClipEmbeddingService
15
  from qdrant_service import QdrantVectorService
16
  from advanced_rag import AdvancedRAG
 
125
  message: str
126
  use_rag: bool = True
127
  top_k: int = 3
128
+ system_message: Optional[str] = """Bạn là trợ lý AI chuyên biệt cho hệ thống quản lý sự kiện và bán vé.
129
  Vai trò của bạn là trả lời các câu hỏi CHÍNH XÁC dựa trên dữ liệu được cung cấp từ hệ thống.
130
 
131
  Quy tắc tuyệt đối:
 
686
  try:
687
  # Retrieve context if RAG enabled
688
  context_used = []
689
+ rag_stats = None
690
+
691
  if request.use_rag:
692
+ if request.use_advanced_rag:
693
+ # Use Advanced RAG Pipeline (Best Case 2025)
694
+ hf_client = None
695
+ if request.hf_token or hf_token:
696
+ hf_client = InferenceClient(token=request.hf_token or hf_token)
697
+
698
+ documents, stats = advanced_rag.hybrid_rag_pipeline(
699
+ query=request.message,
700
+ top_k=request.top_k,
701
+ score_threshold=request.score_threshold,
702
+ use_reranking=request.use_reranking,
703
+ use_compression=request.use_compression,
704
+ use_query_expansion=request.use_query_expansion,
705
+ max_context_tokens=500,
706
+ hf_client=hf_client
707
+ )
708
+
709
+ # Convert to dict format
710
+ context_used = [
711
+ {
712
+ "id": doc.id,
713
+ "confidence": doc.confidence,
714
+ "metadata": doc.metadata
715
+ }
716
+ for doc in documents
717
+ ]
718
+ rag_stats = stats
719
+
720
+ # Format context using Advanced RAG
721
+ context_text = advanced_rag.format_context_for_llm(documents)
722
+ else:
723
+ # Basic RAG (fallback)
724
+ query_embedding = embedding_service.encode_text(request.message)
725
+ results = qdrant_service.search(
726
+ query_embedding=query_embedding,
727
+ limit=request.top_k,
728
+ score_threshold=request.score_threshold
729
+ )
730
+ context_used = results
731
+
732
+ context_text = "\n\nRelevant Context:\n"
733
+ for i, doc in enumerate(context_used, 1):
734
+ doc_text = doc["metadata"].get("text", "")
735
+ if not doc_text:
736
+ doc_text = " ".join(doc["metadata"].get("texts", []))
737
+ confidence = doc["confidence"]
738
+ context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
739
+
740
+ # Build system message with context
741
+ if request.use_rag and context_used:
742
+ if request.use_advanced_rag:
743
+ # Use Advanced RAG prompt builder
744
+ system_message = advanced_rag.build_rag_prompt(
745
+ query=request.message,
746
+ context=context_text,
747
+ system_message=request.system_message
748
+ )
749
+ else:
750
+ # Basic prompt
751
+ system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
752
  else:
753
  system_message = request.system_message
754
 
 
812
  return ChatResponse(
813
  response=response,
814
  context_used=context_used,
815
+ timestamp=datetime.utcnow().isoformat(),
816
+ rag_stats=rag_stats
817
  )
818
 
819
  except Exception as e: