Spaces:
Sleeping
Sleeping
Upload 14 files
Browse files
main.py
CHANGED
|
@@ -683,167 +683,57 @@ async def get_stats():
|
|
| 683 |
@app.post("/chat", response_model=ChatResponse)
|
| 684 |
async def chat(request: ChatRequest):
|
| 685 |
"""
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
- use_rag: Enable RAG retrieval (default: true)
|
| 691 |
-
-
|
| 692 |
-
-
|
| 693 |
-
-
|
| 694 |
-
|
| 695 |
-
- hf_token: Hugging Face token (optional, sẽ dùng env nếu không truyền)
|
| 696 |
-
|
| 697 |
Returns:
|
| 698 |
-
- response:
|
|
|
|
| 699 |
- context_used: Retrieved context documents
|
|
|
|
| 700 |
- timestamp: Response timestamp
|
| 701 |
"""
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
query=request.message,
|
| 716 |
-
top_k=request.top_k,
|
| 717 |
-
score_threshold=request.score_threshold,
|
| 718 |
-
use_reranking=request.use_reranking,
|
| 719 |
-
use_compression=request.use_compression,
|
| 720 |
-
use_query_expansion=request.use_query_expansion,
|
| 721 |
-
max_context_tokens=500,
|
| 722 |
-
hf_client=hf_client
|
| 723 |
-
)
|
| 724 |
-
|
| 725 |
-
# Convert to dict format
|
| 726 |
-
context_used = [
|
| 727 |
-
{
|
| 728 |
-
"id": doc.id,
|
| 729 |
-
"confidence": doc.confidence,
|
| 730 |
-
"metadata": doc.metadata
|
| 731 |
-
}
|
| 732 |
-
for doc in documents
|
| 733 |
-
]
|
| 734 |
-
rag_stats = stats
|
| 735 |
-
|
| 736 |
-
# Format context using Advanced RAG
|
| 737 |
-
context_text = advanced_rag.format_context_for_llm(documents)
|
| 738 |
-
else:
|
| 739 |
-
# Basic RAG (fallback)
|
| 740 |
-
query_embedding = embedding_service.encode_text(request.message)
|
| 741 |
-
results = qdrant_service.search(
|
| 742 |
-
query_embedding=query_embedding,
|
| 743 |
-
limit=request.top_k,
|
| 744 |
-
score_threshold=request.score_threshold
|
| 745 |
-
)
|
| 746 |
-
context_used = results
|
| 747 |
-
|
| 748 |
-
context_text = "\n\nRelevant Context:\n"
|
| 749 |
-
for i, doc in enumerate(context_used, 1):
|
| 750 |
-
doc_text = doc["metadata"].get("text", "")
|
| 751 |
-
if not doc_text:
|
| 752 |
-
doc_text = " ".join(doc["metadata"].get("texts", []))
|
| 753 |
-
confidence = doc["confidence"]
|
| 754 |
-
context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
|
| 755 |
-
|
| 756 |
-
# Build system message with context
|
| 757 |
-
if request.use_rag and context_used:
|
| 758 |
-
if request.use_advanced_rag:
|
| 759 |
-
# Use Advanced RAG prompt builder
|
| 760 |
-
system_message = advanced_rag.build_rag_prompt(
|
| 761 |
-
query=request.message,
|
| 762 |
-
context=context_text,
|
| 763 |
-
system_message=request.system_message
|
| 764 |
-
)
|
| 765 |
-
else:
|
| 766 |
-
# Basic prompt
|
| 767 |
-
# Basic prompt with better instructions
|
| 768 |
-
system_message = f"""{request.system_message}
|
| 769 |
-
|
| 770 |
-
{context_text}
|
| 771 |
-
|
| 772 |
-
HƯỚNG DẪN:
|
| 773 |
-
- Sử dụng thông tin từ context trên để trả lời câu h��i.
|
| 774 |
-
- Trả lời tự nhiên, thân thiện, không copy nguyên văn.
|
| 775 |
-
- Nếu tìm thấy sự kiện, hãy tóm tắt các thông tin quan trọng nhất.
|
| 776 |
-
"""
|
| 777 |
-
else:
|
| 778 |
-
system_message = request.system_message
|
| 779 |
-
|
| 780 |
-
# Use token from request or fallback to env
|
| 781 |
-
token = request.hf_token or hf_token
|
| 782 |
-
# Generate response
|
| 783 |
-
if not token:
|
| 784 |
-
response = f"""[LLM Response Placeholder]
|
| 785 |
-
|
| 786 |
-
Context retrieved: {len(context_used)} documents
|
| 787 |
-
User question: {request.message}
|
| 788 |
-
|
| 789 |
-
To enable actual LLM generation:
|
| 790 |
-
1. Set HUGGINGFACE_TOKEN environment variable, OR
|
| 791 |
-
2. Pass hf_token in request body
|
| 792 |
-
|
| 793 |
-
Example:
|
| 794 |
-
{{
|
| 795 |
-
"message": "Your question",
|
| 796 |
-
"hf_token": "hf_xxxxxxxxxxxxx"
|
| 797 |
-
}}
|
| 798 |
-
"""
|
| 799 |
-
else:
|
| 800 |
-
try:
|
| 801 |
-
client = InferenceClient(
|
| 802 |
-
token=hf_token,
|
| 803 |
-
model="openai/gpt-oss-20b"
|
| 804 |
-
)
|
| 805 |
-
|
| 806 |
-
# Build messages - luôn dùng cấu trúc chuẩn
|
| 807 |
-
# System = instructions + context, User = query
|
| 808 |
-
messages = [
|
| 809 |
-
{"role": "system", "content": system_message},
|
| 810 |
-
{"role": "user", "content": request.message}
|
| 811 |
-
]
|
| 812 |
-
|
| 813 |
-
# Generate response
|
| 814 |
-
response = ""
|
| 815 |
-
for msg in client.chat_completion(
|
| 816 |
-
messages,
|
| 817 |
-
max_tokens=request.max_tokens,
|
| 818 |
-
stream=True,
|
| 819 |
-
temperature=request.temperature,
|
| 820 |
-
top_p=request.top_p,
|
| 821 |
-
):
|
| 822 |
-
choices = msg.choices
|
| 823 |
-
if len(choices) and choices[0].delta.content:
|
| 824 |
-
response += choices[0].delta.content
|
| 825 |
-
|
| 826 |
-
except Exception as e:
|
| 827 |
-
response = f"Error generating response with LLM: {str(e)}\n\nContext was retrieved successfully, but LLM generation failed."
|
| 828 |
-
|
| 829 |
-
# Save to history
|
| 830 |
-
chat_data = {
|
| 831 |
-
"user_message": request.message,
|
| 832 |
-
"assistant_response": response,
|
| 833 |
-
"context_used": context_used,
|
| 834 |
-
"timestamp": datetime.utcnow()
|
| 835 |
-
}
|
| 836 |
-
chat_history_collection.insert_one(chat_data)
|
| 837 |
-
|
| 838 |
-
return ChatResponse(
|
| 839 |
-
response=response,
|
| 840 |
-
context_used=context_used,
|
| 841 |
-
timestamp=datetime.utcnow().isoformat(),
|
| 842 |
-
rag_stats=rag_stats
|
| 843 |
-
)
|
| 844 |
-
|
| 845 |
-
except Exception as e:
|
| 846 |
-
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
|
| 847 |
|
| 848 |
|
| 849 |
@app.post("/documents", response_model=AddDocumentResponse)
|
|
|
|
| 683 |
@app.post("/chat", response_model=ChatResponse)
|
| 684 |
async def chat(request: ChatRequest):
|
| 685 |
"""
|
| 686 |
+
Multi-turn conversational chatbot với RAG + Function Calling
|
| 687 |
+
|
| 688 |
+
Features:
|
| 689 |
+
- ✅ Server-side session management (tự động tạo session_id)
|
| 690 |
+
- ✅ Conversation history tracking
|
| 691 |
+
- ✅ RAG context retrieval
|
| 692 |
+
- ✅ Function calling (gọi API khi cần thông tin chi tiết)
|
| 693 |
+
|
| 694 |
+
Flow:
|
| 695 |
+
1. Request đầu tiên: Không cần session_id → BE tạo mới
|
| 696 |
+
2. Request tiếp theo: Gửi session_id từ response trước → BE nhớ context
|
| 697 |
+
|
| 698 |
+
Example:
|
| 699 |
+
```
|
| 700 |
+
# Lần 1
|
| 701 |
+
POST /chat { "message": "Tìm sự kiện hòa nhạc" }
|
| 702 |
+
Response: { "session_id": "abc-123", "response": "..." }
|
| 703 |
+
|
| 704 |
+
# Lần 2 (follow-up)
|
| 705 |
+
POST /chat { "message": "Ngày tổ chức chính xác?", "session_id": "abc-123" }
|
| 706 |
+
Response: { "session_id": "abc-123", "response": "..." } # Bot hiểu context
|
| 707 |
+
```
|
| 708 |
+
|
| 709 |
+
Body Parameters:
|
| 710 |
+
- message: User message (required)
|
| 711 |
+
- session_id: Session ID cho multi-turn (optional, tự tạo nếu không có)
|
| 712 |
- use_rag: Enable RAG retrieval (default: true)
|
| 713 |
+
- enable_tools: Enable function calling (default: true)
|
| 714 |
+
- top_k: Number of documents (default: 3)
|
| 715 |
+
- temperature: LLM temperature (default: 0.7)
|
| 716 |
+
|
|
|
|
|
|
|
| 717 |
Returns:
|
| 718 |
+
- response: AI generated response
|
| 719 |
+
- session_id: Session identifier (TRẢ VỀ trong mọi trường hợp)
|
| 720 |
- context_used: Retrieved context documents
|
| 721 |
+
- tool_calls: API calls made (if any)
|
| 722 |
- timestamp: Response timestamp
|
| 723 |
"""
|
| 724 |
+
# Import chat endpoint logic
|
| 725 |
+
from chat_endpoint import chat_endpoint
|
| 726 |
+
|
| 727 |
+
return await chat_endpoint(
|
| 728 |
+
request=request,
|
| 729 |
+
conversation_service=conversation_service,
|
| 730 |
+
tools_service=tools_service,
|
| 731 |
+
advanced_rag=advanced_rag,
|
| 732 |
+
embedding_service=embedding_service,
|
| 733 |
+
qdrant_service=qdrant_service,
|
| 734 |
+
chat_history_collection=chat_history_collection,
|
| 735 |
+
hf_token=hf_token
|
| 736 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
|
| 738 |
|
| 739 |
@app.post("/documents", response_model=AddDocumentResponse)
|