Spaces:

minhvtt
/

ChatbotRAG

Sleeping

App Files Files Community

minhvtt commited on Oct 7

Commit

eda7f22

verified ·

1 Parent(s): 05351f2

Update main.py

Browse files

Files changed (1) hide show

main.py +359 -9

main.py CHANGED Viewed

@@ -1,33 +1,63 @@
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-from typing import Optional, List
 from PIL import Image
 import io
 import numpy as np
 from embedding_service import JinaClipEmbeddingService
 from qdrant_service import QdrantVectorService
 # Initialize FastAPI app
 app = FastAPI(
-    title="Event Social Media Embeddings API",
-    description="API để embeddings và search text + images từ events & social media với Jina CLIP v2 + Qdrant",
-    version="1.0.0"
 )
 # Initialize services
 print("Initializing services...")
 embedding_service = JinaClipEmbeddingService(model_path="jinaai/jina-clip-v2")
 qdrant_service = QdrantVectorService(
-    # URL và API key sẽ lấy từ environment variables
-    collection_name="event_social_media",
     vector_size=embedding_service.get_embedding_dimension()
 )
 print("✓ Services initialized successfully")
-# Pydantic models
 class SearchRequest(BaseModel):
     text: Optional[str] = None
     limit: int = 10
@@ -48,15 +78,62 @@ class IndexResponse(BaseModel):
     message: str
 @app.get("/")
 async def root():
     """Health check endpoint"""
     return {
         "status": "running",
-        "service": "Event Social Media Embeddings API",
         "embedding_model": "Jina CLIP v2",
         "vector_db": "Qdrant",
-        "language_support": "Vietnamese + 88 other languages"
     }
@@ -342,6 +419,279 @@ async def get_stats():
         raise HTTPException(status_code=500, detail=f"Lỗi khi lấy stats: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(

 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from typing import Optional, List, Dict
 from PIL import Image
 import io
 import numpy as np
+import os
+from datetime import datetime
+from pymongo import MongoClient
+from huggingface_hub import InferenceClient
 from embedding_service import JinaClipEmbeddingService
 from qdrant_service import QdrantVectorService
 # Initialize FastAPI app
 app = FastAPI(
+    title="Event Social Media Embeddings & ChatbotRAG API",
+    description="API để embeddings, search và ChatbotRAG với Jina CLIP v2 + Qdrant + MongoDB + LLM",
+    version="2.0.0"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 # Initialize services
 print("Initializing services...")
 embedding_service = JinaClipEmbeddingService(model_path="jinaai/jina-clip-v2")
+collection_name = os.getenv("COLLECTION_NAME", "event_social_media")
 qdrant_service = QdrantVectorService(
+    collection_name=collection_name,
     vector_size=embedding_service.get_embedding_dimension()
 )
+print(f"✓ Qdrant collection: {collection_name}")
+# MongoDB connection
+mongodb_uri = os.getenv("MONGODB_URI", "mongodb+srv://truongtn7122003:7KaI9OT5KTUxWjVI@truongtn7122003.xogin4q.mongodb.net/")
+mongo_client = MongoClient(mongodb_uri)
+db = mongo_client[os.getenv("MONGODB_DB_NAME", "chatbot_rag")]
+documents_collection = db["documents"]
+chat_history_collection = db["chat_history"]
+print("✓ MongoDB connected")
+# Hugging Face token
+hf_token = os.getenv("HUGGINGFACE_TOKEN")
+if hf_token:
+    print("✓ Hugging Face token configured")
 print("✓ Services initialized successfully")
+# Pydantic models for embeddings
 class SearchRequest(BaseModel):
     text: Optional[str] = None
     limit: int = 10
     message: str
+# Pydantic models for ChatbotRAG
+class ChatRequest(BaseModel):
+    message: str
+    use_rag: bool = True
+    top_k: int = 3
+    system_message: Optional[str] = "You are a helpful AI assistant."
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.95
+    hf_token: Optional[str] = None
+class ChatResponse(BaseModel):
+    response: str
+    context_used: List[Dict]
+    timestamp: str
+class AddDocumentRequest(BaseModel):
+    text: str
+    metadata: Optional[Dict] = None
+class AddDocumentResponse(BaseModel):
+    success: bool
+    doc_id: str
+    message: str
 @app.get("/")
 async def root():
     """Health check endpoint"""
     return {
         "status": "running",
+        "service": "Event Social Media Embeddings & ChatbotRAG API",
         "embedding_model": "Jina CLIP v2",
         "vector_db": "Qdrant",
+        "language_support": "Vietnamese + 88 other languages",
+        "endpoints": {
+            "embeddings": {
+                "POST /index": "Index data với text/image",
+                "POST /search": "Hybrid search",
+                "POST /search/text": "Text search",
+                "POST /search/image": "Image search",
+                "DELETE /delete/{doc_id}": "Delete document",
+                "GET /document/{doc_id}": "Get document",
+                "GET /stats": "Collection statistics"
+            },
+            "chatbot_rag": {
+                "POST /chat": "Chat với RAG",
+                "POST /documents": "Add document to knowledge base",
+                "POST /rag/search": "Search in knowledge base",
+                "GET /history": "Get chat history",
+                "DELETE /documents/{doc_id}": "Delete document from knowledge base"
+            }
+        }
     }
         raise HTTPException(status_code=500, detail=f"Lỗi khi lấy stats: {str(e)}")
+# ============================================
+# ChatbotRAG Endpoints
+# ============================================
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    """
+    Chat endpoint với RAG
+    Body:
+    - message: User message
+    - use_rag: Enable RAG retrieval (default: true)
+    - top_k: Number of documents to retrieve (default: 3)
+    - system_message: System prompt (optional)
+    - max_tokens: Max tokens for response (default: 512)
+    - temperature: Temperature for generation (default: 0.7)
+    - hf_token: Hugging Face token (optional, sẽ dùng env nếu không truyền)
+    Returns:
+    - response: Generated response
+    - context_used: Retrieved context documents
+    - timestamp: Response timestamp
+    """
+    try:
+        # Retrieve context if RAG enabled
+        context_used = []
+        if request.use_rag:
+            # Generate query embedding
+            query_embedding = embedding_service.encode_text(request.message)
+            # Search in Qdrant
+            results = qdrant_service.search(
+                query_embedding=query_embedding,
+                limit=request.top_k,
+                score_threshold=0.5
+            )
+            context_used = results
+        # Build context text
+        context_text = ""
+        if context_used:
+            context_text = "\n\nRelevant Context:\n"
+            for i, doc in enumerate(context_used, 1):
+                doc_text = doc["metadata"].get("text", "")
+                confidence = doc["confidence"]
+                context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
+            # Add context to system message
+            system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
+        else:
+            system_message = request.system_message
+        # Use token from request or fallback to env
+        token = request.hf_token or hf_token
+        # Generate response
+        if not token:
+            response = f"""[LLM Response Placeholder]
+Context retrieved: {len(context_used)} documents
+User question: {request.message}
+To enable actual LLM generation:
+1. Set HUGGINGFACE_TOKEN environment variable, OR
+2. Pass hf_token in request body
+Example:
+{{
+  "message": "Your question",
+  "hf_token": "hf_xxxxxxxxxxxxx"
+}}
+"""
+        else:
+            try:
+                client = InferenceClient(
+                    token=token,
+                    model="openai/gpt-oss-20b"
+                )
+                # Build messages
+                messages = [
+                    {"role": "system", "content": system_message},
+                    {"role": "user", "content": request.message}
+                ]
+                # Generate response
+                response = ""
+                for msg in client.chat_completion(
+                    messages,
+                    max_tokens=request.max_tokens,
+                    stream=True,
+                    temperature=request.temperature,
+                    top_p=request.top_p,
+                ):
+                    choices = msg.choices
+                    if len(choices) and choices[0].delta.content:
+                        response += choices[0].delta.content
+            except Exception as e:
+                response = f"Error generating response with LLM: {str(e)}\n\nContext was retrieved successfully, but LLM generation failed."
+        # Save to history
+        chat_data = {
+            "user_message": request.message,
+            "assistant_response": response,
+            "context_used": context_used,
+            "timestamp": datetime.utcnow()
+        }
+        chat_history_collection.insert_one(chat_data)
+        return ChatResponse(
+            response=response,
+            context_used=context_used,
+            timestamp=datetime.utcnow().isoformat()
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/documents", response_model=AddDocumentResponse)
+async def add_document(request: AddDocumentRequest):
+    """
+    Add document to knowledge base
+    Body:
+    - text: Document text
+    - metadata: Additional metadata (optional)
+    Returns:
+    - success: True/False
+    - doc_id: MongoDB document ID
+    - message: Status message
+    """
+    try:
+        # Save to MongoDB
+        doc_data = {
+            "text": request.text,
+            "metadata": request.metadata or {},
+            "created_at": datetime.utcnow()
+        }
+        result = documents_collection.insert_one(doc_data)
+        doc_id = str(result.inserted_id)
+        # Generate embedding
+        embedding = embedding_service.encode_text(request.text)
+        # Index to Qdrant
+        qdrant_service.index_data(
+            doc_id=doc_id,
+            embedding=embedding,
+            metadata={
+                "text": request.text,
+                "source": "api",
+                **(request.metadata or {})
+            }
+        )
+        return AddDocumentResponse(
+            success=True,
+            doc_id=doc_id,
+            message=f"Document added successfully with ID: {doc_id}"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/rag/search", response_model=List[SearchResponse])
+async def rag_search(
+    query: str = Form(...),
+    top_k: int = Form(5),
+    score_threshold: Optional[float] = Form(0.5)
+):
+    """
+    Search in knowledge base
+    Body:
+    - query: Search query
+    - top_k: Number of results (default: 5)
+    - score_threshold: Minimum score (default: 0.5)
+    Returns:
+    - results: List of matching documents
+    """
+    try:
+        # Generate query embedding
+        query_embedding = embedding_service.encode_text(query)
+        # Search in Qdrant
+        results = qdrant_service.search(
+            query_embedding=query_embedding,
+            limit=top_k,
+            score_threshold=score_threshold
+        )
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.get("/history")
+async def get_history(limit: int = 10, skip: int = 0):
+    """
+    Get chat history
+    Query params:
+    - limit: Number of messages to return (default: 10)
+    - skip: Number of messages to skip (default: 0)
+    Returns:
+    - history: List of chat messages
+    """
+    try:
+        history = list(
+            chat_history_collection
+            .find({}, {"_id": 0})
+            .sort("timestamp", -1)
+            .skip(skip)
+            .limit(limit)
+        )
+        # Convert datetime to string
+        for msg in history:
+            if "timestamp" in msg:
+                msg["timestamp"] = msg["timestamp"].isoformat()
+        return {
+            "history": history,
+            "total": chat_history_collection.count_documents({})
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.delete("/documents/{doc_id}")
+async def delete_document_from_kb(doc_id: str):
+    """
+    Delete document from knowledge base
+    Args:
+    - doc_id: Document ID (MongoDB ObjectId)
+    Returns:
+    - success: True/False
+    - message: Status message
+    """
+    try:
+        # Delete from MongoDB
+        result = documents_collection.delete_one({"_id": doc_id})
+        # Delete from Qdrant
+        if result.deleted_count > 0:
+            qdrant_service.delete_by_id(doc_id)
+            return {"success": True, "message": f"Document {doc_id} deleted from knowledge base"}
+        else:
+            raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(