Spaces:

minhvtt
/

ChatbotRAG

Running

App Files Files Community

minhvtt commited on 12 days ago

Commit

358773d

verified ·

1 Parent(s): c99ab26

Upload 13 files

Browse files

Files changed (4) hide show

advanced_rag.py +152 -52
cag_service.py +233 -0
main.py +705 -24
requirements.txt +3 -1

advanced_rag.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """
-Advanced RAG techniques for improved retrieval and generation
-Includes: Query Expansion, Reranking, Contextual Compression, Hybrid Search
 """
 from typing import List, Dict, Optional, Tuple
 import numpy as np
 from dataclasses import dataclass
 import re
 @dataclass
@@ -19,23 +20,86 @@ class RetrievedDocument:
 class AdvancedRAG:
-    """Advanced RAG system with modern techniques"""
     def __init__(self, embedding_service, qdrant_service):
         self.embedding_service = embedding_service
         self.qdrant_service = qdrant_service
-    def expand_query(self, query: str) -> List[str]:
         """
-        Expand query with related terms and variations
-        Simple rule-based expansion for Vietnamese queries
         """
         queries = [query]
-        # Add query variations
-        # Remove question words for alternative search
         question_words = ['ai', 'gì', 'nào', 'đâu', 'khi nào', 'như thế nào',
-                         'tại sao', 'có', 'là', 'được', 'không']
         query_lower = query.lower()
         for qw in question_words:
@@ -43,30 +107,32 @@ class AdvancedRAG:
                 variant = query_lower.replace(qw, '').strip()
                 if variant and variant != query_lower:
                     queries.append(variant)
-        # Extract key nouns/phrases (simple approach)
         words = query.split()
         if len(words) > 3:
-            # Take important words (skip first question word)
             key_phrases = ' '.join(words[1:]) if words[0].lower() in question_words else ' '.join(words[:3])
             if key_phrases not in queries:
                 queries.append(key_phrases)
-        return queries[:3]  # Return top 3 variations
     def multi_query_retrieval(
         self,
         query: str,
         top_k: int = 5,
-        score_threshold: float = 0.5
     ) -> List[RetrievedDocument]:
         """
         Retrieve documents using multiple query variations
-        Combines results from all query variations
         """
-        expanded_queries = self.expand_query(query)
-        all_results = {}  # Use dict to deduplicate by doc_id
         for q in expanded_queries:
             # Generate embedding for each query variant
@@ -92,45 +158,51 @@ class AdvancedRAG:
         # Sort by confidence and return top_k
         sorted_results = sorted(all_results.values(), key=lambda x: x.confidence, reverse=True)
-        return sorted_results[:top_k]
-    def rerank_documents(
         self,
         query: str,
         documents: List[RetrievedDocument],
-        use_cross_encoder: bool = False
     ) -> List[RetrievedDocument]:
         """
-        Rerank documents based on semantic similarity
-        Simple reranking using embedding similarity (can be upgraded to cross-encoder)
         """
         if not documents:
             return documents
-        # Simple reranking: recalculate similarity with original query
-        query_embedding = self.embedding_service.encode_text(query)
         reranked = []
-        for doc in documents:
-            # Get document embedding
-            doc_embedding = self.embedding_service.encode_text(doc.text)
-            # Calculate cosine similarity
-            similarity = np.dot(query_embedding.flatten(), doc_embedding.flatten())
-            # Combine with original confidence (weighted average)
-            new_score = 0.6 * similarity + 0.4 * doc.confidence
             reranked.append(RetrievedDocument(
                 id=doc.id,
                 text=doc.text,
-                confidence=float(new_score),
                 metadata=doc.metadata
             ))
-        # Sort by new score
         reranked.sort(key=lambda x: x.confidence, reverse=True)
-        return reranked
     def compress_context(
         self,
@@ -188,7 +260,6 @@ class AdvancedRAG:
     def _split_sentences(self, text: str) -> List[str]:
         """Split text into sentences (Vietnamese-aware)"""
-        # Simple sentence splitter
         sentences = re.split(r'[.!?]+', text)
         return [s.strip() for s in sentences if s.strip()]
@@ -199,40 +270,69 @@ class AdvancedRAG:
         score_threshold: float = 0.5,
         use_reranking: bool = True,
         use_compression: bool = True,
-        max_context_tokens: int = 500
     ) -> Tuple[List[RetrievedDocument], Dict]:
         """
-        Complete advanced RAG pipeline
-        1. Multi-query retrieval
-        2. Reranking
-        3. Contextual compression
         """
         stats = {
             "original_query": query,
             "expanded_queries": [],
             "initial_results": 0,
             "after_rerank": 0,
-            "after_compression": 0
         }
-        # Step 1: Multi-query retrieval
-        expanded_queries = self.expand_query(query)
         stats["expanded_queries"] = expanded_queries
         documents = self.multi_query_retrieval(
             query=query,
             top_k=top_k * 2,  # Get more candidates for reranking
-            score_threshold=score_threshold
         )
         stats["initial_results"] = len(documents)
-        # Step 2: Reranking (optional)
         if use_reranking and documents:
-            documents = self.rerank_documents(query, documents)
-            documents = documents[:top_k]  # Keep top_k after reranking
         stats["after_rerank"] = len(documents)
-        # Step 3: Contextual compression (optional)
         if use_compression and documents:
             documents = self.compress_context(
                 query=query,

 """
+Advanced RAG techniques for improved retrieval and generation (Best Case 2025)
+Includes: LLM-Based Query Expansion, Cross-Encoder Reranking, Contextual Compression, Hybrid Search
 """
 from typing import List, Dict, Optional, Tuple
 import numpy as np
 from dataclasses import dataclass
 import re
+from sentence_transformers import CrossEncoder
 @dataclass
 class AdvancedRAG:
+    """Advanced RAG system with 2025 best practices"""
     def __init__(self, embedding_service, qdrant_service):
         self.embedding_service = embedding_service
         self.qdrant_service = qdrant_service
+        # Initialize Cross-Encoder for reranking (state-of-the-art)
+        print("Loading Cross-Encoder model for reranking...")
+        self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+        print("✓ Cross-Encoder loaded")
+    def expand_query_llm(
+        self,
+        query: str,
+        hf_client=None
+    ) -> List[str]:
+        """
+        Expand query using LLM (Best Case 2025)
+        Generates query variations, sub-queries, and hypothetical answers
+        Args:
+            query: Original user query
+            hf_client: HuggingFace InferenceClient (optional)
+        Returns:
+            List of expanded queries
+        """
+        queries = [query]
+        # Fallback to rule-based if no LLM client
+        if not hf_client:
+            return self._expand_query_rule_based(query)
+        try:
+            # LLM-based expansion prompt
+            expansion_prompt = f"""Given this user question, generate 2-3 alternative phrasings or sub-questions that would help retrieve relevant information.
+User Question: {query}
+Alternative queries (one per line):"""
+            # Generate expansions
+            response = ""
+            for msg in hf_client.chat_completion(
+                messages=[{"role": "user", "content": expansion_prompt}],
+                max_tokens=150,
+                stream=True,
+                temperature=0.7
+            ):
+                if msg.choices and msg.choices[0].delta.content:
+                    response += msg.choices[0].delta.content
+            # Parse expansions
+            lines = [line.strip() for line in response.split('\n') if line.strip()]
+            # Filter out numbered lists, dashes, etc.
+            clean_lines = []
+            for line in lines:
+                # Remove common list markers
+                cleaned = re.sub(r'^[\d\-\*\•]+[\.\)]\s*', '', line)
+                if cleaned and len(cleaned) > 5:
+                    clean_lines.append(cleaned)
+            queries.extend(clean_lines[:3])  # Add top 3 expansions
+        except Exception as e:
+            print(f"LLM expansion failed, using rule-based: {e}")
+            return self._expand_query_rule_based(query)
+        return queries[:4]  # Original + 3 expansions
+    def _expand_query_rule_based(self, query: str) -> List[str]:
         """
+        Fallback rule-based query expansion
+        Simple but effective Vietnamese-aware expansion
         """
         queries = [query]
+        # Vietnamese question words
         question_words = ['ai', 'gì', 'nào', 'đâu', 'khi nào', 'như thế nào',
+                         'sao', 'tại sao', 'có', 'là', 'được', 'không', 'làm sao']
         query_lower = query.lower()
         for qw in question_words:
                 variant = query_lower.replace(qw, '').strip()
                 if variant and variant != query_lower:
                     queries.append(variant)
+                    break  # One variation is enough
+        # Extract key phrases
         words = query.split()
         if len(words) > 3:
             key_phrases = ' '.join(words[1:]) if words[0].lower() in question_words else ' '.join(words[:3])
             if key_phrases not in queries:
                 queries.append(key_phrases)
+        return queries[:3]
     def multi_query_retrieval(
         self,
         query: str,
         top_k: int = 5,
+        score_threshold: float = 0.5,
+        expanded_queries: Optional[List[str]] = None
     ) -> List[RetrievedDocument]:
         """
         Retrieve documents using multiple query variations
+        Combines results from all query variations with deduplication
         """
+        if expanded_queries is None:
+            expanded_queries = [query]
+        all_results = {}  # Deduplicate by doc_id
         for q in expanded_queries:
             # Generate embedding for each query variant
         # Sort by confidence and return top_k
         sorted_results = sorted(all_results.values(), key=lambda x: x.confidence, reverse=True)
+        return sorted_results[:top_k * 2]  # Return more for reranking
+    def rerank_documents_cross_encoder(
         self,
         query: str,
         documents: List[RetrievedDocument],
+        top_k: int = 5
     ) -> List[RetrievedDocument]:
         """
+        Rerank documents using Cross-Encoder (Best Case 2025)
+        Cross-Encoder provides superior relevance scoring compared to bi-encoders
+        Args:
+            query: Original user query
+            documents: Retrieved documents to rerank
+            top_k: Number of top documents to return
+        Returns:
+            Reranked documents
         """
         if not documents:
             return documents
+        # Prepare query-document pairs for Cross-Encoder
+        pairs = [[query, doc.text] for doc in documents]
+        # Get Cross-Encoder scores
+        ce_scores = self.cross_encoder.predict(pairs)
+        # Create reranked documents with new scores
         reranked = []
+        for doc, ce_score in zip(documents, ce_scores):
+            # Combine CE score with original confidence (weighted)
+            combined_score = 0.7 * float(ce_score) + 0.3 * doc.confidence
             reranked.append(RetrievedDocument(
                 id=doc.id,
                 text=doc.text,
+                confidence=float(combined_score),
                 metadata=doc.metadata
             ))
+        # Sort by new combined score
         reranked.sort(key=lambda x: x.confidence, reverse=True)
+        return reranked[:top_k]
     def compress_context(
         self,
     def _split_sentences(self, text: str) -> List[str]:
         """Split text into sentences (Vietnamese-aware)"""
         sentences = re.split(r'[.!?]+', text)
         return [s.strip() for s in sentences if s.strip()]
         score_threshold: float = 0.5,
         use_reranking: bool = True,
         use_compression: bool = True,
+        use_query_expansion: bool = True,
+        max_context_tokens: int = 500,
+        hf_client=None
     ) -> Tuple[List[RetrievedDocument], Dict]:
         """
+        Complete advanced RAG pipeline (Best Case 2025)
+        1. LLM-based query expansion
+        2. Multi-query retrieval
+        3. Cross-Encoder reranking
+        4. Contextual compression
+        Args:
+            query: User query
+            top_k: Number of documents to return
+            score_threshold: Minimum relevance score
+            use_reranking: Enable Cross-Encoder reranking
+            use_compression: Enable context compression
+            use_query_expansion: Enable LLM-based query expansion
+            max_context_tokens: Max tokens for compression
+            hf_client: HuggingFace InferenceClient for expansion
+        Returns:
+            (documents, stats)
         """
         stats = {
             "original_query": query,
             "expanded_queries": [],
             "initial_results": 0,
             "after_rerank": 0,
+            "after_compression": 0,
+            "used_cross_encoder": use_reranking,
+            "used_llm_expansion": use_query_expansion and hf_client is not None
         }
+        # Step 1: Query Expansion (LLM-based or rule-based)
+        if use_query_expansion:
+            expanded_queries = self.expand_query_llm(query, hf_client)
+        else:
+            expanded_queries = [query]
         stats["expanded_queries"] = expanded_queries
+        # Step 2: Multi-query retrieval
         documents = self.multi_query_retrieval(
             query=query,
             top_k=top_k * 2,  # Get more candidates for reranking
+            score_threshold=score_threshold,
+            expanded_queries=expanded_queries
         )
         stats["initial_results"] = len(documents)
+        # Step 3: Cross-Encoder Reranking (Best Case 2025)
         if use_reranking and documents:
+            documents = self.rerank_documents_cross_encoder(
+                query=query,
+                documents=documents,
+                top_k=top_k
+            )
+        else:
+            documents = documents[:top_k]
         stats["after_rerank"] = len(documents)
+        # Step 4: Contextual compression (optional)
         if use_compression and documents:
             documents = self.compress_context(
                 query=query,

cag_service.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+CAG Service (Cache-Augmented Generation)
+Semantic caching layer for RAG system using Qdrant
+This module implements intelligent caching to reduce latency and LLM costs
+by serving semantically similar queries from cache.
+"""
+from typing import Optional, Dict, Any, Tuple
+from datetime import datetime, timedelta
+import numpy as np
+from qdrant_client import QdrantClient
+from qdrant_client.models import (
+    Distance, VectorParams, PointStruct,
+    SearchParams, Filter, FieldCondition, MatchValue, Range
+)
+import uuid
+import os
+class CAGService:
+    """
+    Cache-Augmented Generation Service
+    Features:
+    - Semantic similarity-based cache lookup (cosine similarity)
+    - TTL (Time-To-Live) for automatic cache expiration
+    - Configurable similarity threshold
+    """
+    def __init__(
+        self,
+        embedding_service,
+        qdrant_url: Optional[str] = None,
+        qdrant_api_key: Optional[str] = None,
+        cache_collection: str = "semantic_cache",
+        vector_size: int = 1024,
+        similarity_threshold: float = 0.9,
+        ttl_hours: int = 24
+    ):
+        """
+        Initialize CAG Service
+        Args:
+            embedding_service: Embedding service for query encoding
+            qdrant_url: Qdrant Cloud URL
+            qdrant_api_key: Qdrant API key
+            cache_collection: Collection name for cache
+            vector_size: Embedding dimension
+            similarity_threshold: Min similarity for cache hit (0-1)
+            ttl_hours: Cache entry lifetime in hours
+        """
+        self.embedding_service = embedding_service
+        self.cache_collection = cache_collection
+        self.similarity_threshold = similarity_threshold
+        self.ttl_hours = ttl_hours
+        # Initialize Qdrant client
+        url = qdrant_url or os.getenv("QDRANT_URL")
+        api_key = qdrant_api_key or os.getenv("QDRANT_API_KEY")
+        if not url or not api_key:
+            raise ValueError("QDRANT_URL and QDRANT_API_KEY required for CAG")
+        self.client = QdrantClient(url=url, api_key=api_key)
+        self.vector_size = vector_size
+        # Ensure cache collection exists
+        self._ensure_cache_collection()
+        print(f"✓ CAG Service initialized (cache: {cache_collection}, threshold: {similarity_threshold})")
+    def _ensure_cache_collection(self):
+        """Create cache collection if it doesn't exist"""
+        collections = self.client.get_collections().collections
+        exists = any(c.name == self.cache_collection for c in collections)
+        if not exists:
+            print(f"Creating semantic cache collection: {self.cache_collection}")
+            self.client.create_collection(
+                collection_name=self.cache_collection,
+                vectors_config=VectorParams(
+                    size=self.vector_size,
+                    distance=Distance.COSINE
+                )
+            )
+            print("✓ Semantic cache collection created")
+    def check_cache(
+        self,
+        query: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Check if query has a cached response
+        Args:
+            query: User query string
+        Returns:
+            Cached data if found (with response, context, metadata), None otherwise
+        """
+        # Generate query embedding
+        query_embedding = self.embedding_service.encode_text(query)
+        if len(query_embedding.shape) > 1:
+            query_embedding = query_embedding.flatten()
+        # Search for similar queries in cache
+        search_result = self.client.search(
+            collection_name=self.cache_collection,
+            query_vector=query_embedding.tolist(),
+            limit=1,
+            score_threshold=self.similarity_threshold,
+            search_params=SearchParams(
+                hnsw_ef=128,
+                exact=False
+            ),
+            with_payload=True
+        )
+        if not search_result:
+            return None
+        hit = search_result[0]
+        # Check TTL
+        cached_at = datetime.fromisoformat(hit.payload.get("cached_at"))
+        expires_at = cached_at + timedelta(hours=self.ttl_hours)
+        if datetime.utcnow() > expires_at:
+            # Cache expired, delete it
+            self.client.delete(
+                collection_name=self.cache_collection,
+                points_selector=[hit.id]
+            )
+            return None
+        # Cache hit!
+        return {
+            "response": hit.payload.get("response"),
+            "context_used": hit.payload.get("context_used", []),
+            "rag_stats": hit.payload.get("rag_stats"),
+            "cached_query": hit.payload.get("original_query"),
+            "similarity_score": float(hit.score),
+            "cached_at": cached_at.isoformat(),
+            "cache_hit": True
+        }
+    def save_to_cache(
+        self,
+        query: str,
+        response: str,
+        context_used: list,
+        rag_stats: Optional[Dict] = None
+    ) -> str:
+        """
+        Save query-response pair to cache
+        Args:
+            query: Original user query
+            response: Generated response
+            context_used: Retrieved context documents
+            rag_stats: RAG pipeline statistics
+        Returns:
+            Cache entry ID
+        """
+        # Generate query embedding
+        query_embedding = self.embedding_service.encode_text(query)
+        if len(query_embedding.shape) > 1:
+            query_embedding = query_embedding.flatten()
+        # Create cache entry
+        cache_id = str(uuid.uuid4())
+        point = PointStruct(
+            id=cache_id,
+            vector=query_embedding.tolist(),
+            payload={
+                "original_query": query,
+                "response": response,
+                "context_used": context_used,
+                "rag_stats": rag_stats or {},
+                "cached_at": datetime.utcnow().isoformat(),
+                "cache_type": "semantic"
+            }
+        )
+        # Save to Qdrant
+        self.client.upsert(
+            collection_name=self.cache_collection,
+            points=[point]
+        )
+        return cache_id
+    def clear_cache(self) -> bool:
+        """
+        Clear all cache entries
+        Returns:
+            Success status
+        """
+        try:
+            # Delete and recreate collection
+            self.client.delete_collection(collection_name=self.cache_collection)
+            self._ensure_cache_collection()
+            print("✓ Semantic cache cleared")
+            return True
+        except Exception as e:
+            print(f"Error clearing cache: {e}")
+            return False
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """
+        Get cache statistics
+        Returns:
+            Cache statistics (size, hit rate, etc.)
+        """
+        try:
+            info = self.client.get_collection(collection_name=self.cache_collection)
+            return {
+                "total_entries": info.points_count,
+                "vectors_count": info.vectors_count,
+                "status": info.status,
+                "ttl_hours": self.ttl_hours,
+                "similarity_threshold": self.similarity_threshold
+            }
+        except Exception as e:
+            print(f"Error getting cache stats: {e}")
+            return {}

main.py CHANGED Viewed

@@ -14,6 +14,7 @@ from huggingface_hub import InferenceClient
 from embedding_service import JinaClipEmbeddingService
 from qdrant_service import QdrantVectorService
 from advanced_rag import AdvancedRAG
 from pdf_parser import PDFIndexer
 from multimodal_pdf_parser import MultimodalPDFIndexer
@@ -57,12 +58,27 @@ hf_token = os.getenv("HUGGINGFACE_TOKEN")
 if hf_token:
     print("✓ Hugging Face token configured")
-# Initialize Advanced RAG
 advanced_rag = AdvancedRAG(
     embedding_service=embedding_service,
     qdrant_service=qdrant_service
 )
-print("✓ Advanced RAG pipeline initialized")
 # Initialize PDF Indexer
 pdf_indexer = PDFIndexer(
@@ -109,7 +125,14 @@ class ChatRequest(BaseModel):
     message: str
     use_rag: bool = True
     top_k: int = 3
-    system_message: Optional[str] = "You are a helpful AI assistant."
     max_tokens: int = 512
     temperature: float = 0.7
     top_p: float = 0.95
@@ -120,6 +143,12 @@ class ChatRequest(BaseModel):
     use_reranking: bool = True
     use_compression: bool = True
     score_threshold: float = 0.5
 class ChatResponse(BaseModel):
@@ -127,6 +156,7 @@ class ChatResponse(BaseModel):
     context_used: List[Dict]
     timestamp: str
     rag_stats: Optional[Dict] = None  # Stats from advanced RAG pipeline
 class AddDocumentRequest(BaseModel):
@@ -148,6 +178,14 @@ class UploadPDFResponse(BaseModel):
     message: str
 @app.get("/")
 async def root():
     """Health check endpoint with comprehensive API documentation"""
@@ -155,6 +193,8 @@ async def root():
         "status": "running",
         "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
         "version": "3.0.0",
         "vector_db": "Qdrant",
         "document_db": "MongoDB",
         "features": {
@@ -165,7 +205,28 @@ async def root():
             "chat_history": "Track conversation history",
             "hybrid_search": "Text + image search with Jina CLIP v2"
         },
         "endpoints": {
             "indexing": {
                 "POST /index": {
                     "description": "Index multiple texts and images (NEW: up to 10 each)",
@@ -182,6 +243,9 @@ async def root():
                         "success": True,
                         "id": "doc1",
                         "message": "Indexed successfully with 2 texts and 1 images"
                     },
                     "use_cases": {
                         "social_media_post": {
@@ -197,6 +261,20 @@ async def root():
                             "description": "Link post to event and user"
                         }
                     }
                 },
                 "POST /documents": {
                     "description": "Add text document to knowledge base",
@@ -221,6 +299,46 @@ async def root():
                     },
                     "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
                 },
                 "POST /upload-pdf-multimodal": {
                     "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
                     "content_type": "multipart/form-data",
@@ -244,10 +362,36 @@ async def root():
                         "document_id": "pdf_multimodal_20251029_150000",
                         "chunks_indexed": 25,
                         "message": "PDF indexed with 25 chunks and 15 images"
                     },
                     "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
                 }
             },
             "search": {
                 "POST /search": {
                     "description": "Hybrid search with text and/or image",
@@ -302,7 +446,71 @@ async def root():
                         "use_reranking": True,
                         "top_k": 5,
                         "score_threshold": 0.5
                     },
                     "example_response_with_images": {
                         "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
                         "context_used": [
@@ -406,29 +614,115 @@ async def root():
                 "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
                 "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
                 "slow_responses": "Disable compression, use basic RAG, decrease top_k"
             }
         },
-        "links": {
-            "docs": "http://localhost:8000/docs",
-            "redoc": "http://localhost:8000/redoc",
-            "openapi": "http://localhost:8000/openapi.json",
-            "guides": {
-                "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
-                "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
-                "pdf_general": "See PDF_RAG_GUIDE.md",
-                "quick_start": "See QUICK_START_PDF.md"
             }
         },
-        "system_info": {
-            "embedding_model": "Jina CLIP v2 (multimodal)",
-            "vector_db": "Qdrant with HNSW index",
-            "document_db": "MongoDB",
-            "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
-            "pdf_parser": "pypdfium2 with URL extraction",
-            "max_inputs": "10 texts + 10 images per /index request"
-        }
-    }
 @app.post("/index", response_model=IndexResponse)
 async def index_data(
     id: str = Form(...),
@@ -436,9 +730,14 @@ async def index_data(
     images: Optional[List[UploadFile]] = File(None),
     id_use: Optional[str] = Form(None),
     id_user: Optional[str] = Form(None)
 ):
     """
     Index data vào vector database (hỗ trợ nhiều texts và images)
     Body:
     - id: Document ID (primary ID)
@@ -446,12 +745,28 @@ async def index_data(
     - images: List of image files (optional) - Tối đa 10 images
     - id_use: ID của SocialMedia hoặc EventCode (optional)
     - id_user: ID của User (optional)
     Returns:
     - success: True/False
     - id: Document ID
     - message: Status message
     Example:
     ```bash
     curl -X POST '/index' \
@@ -474,10 +789,28 @@ async def index_data(
         if images and len(images) > 10:
             raise HTTPException(status_code=400, detail="Tối đa 10 images")
         # Prepare embeddings
         text_embeddings = []
         image_embeddings = []
         # Encode multiple texts (tiếng Việt)
         if texts:
             for text in texts:
@@ -486,6 +819,14 @@ async def index_data(
                     text_embeddings.append(text_emb)
         # Encode multiple images
         if images:
             for image in images:
                 if image.filename:  # Check if image is provided
@@ -497,6 +838,23 @@ async def index_data(
         # Combine embeddings
         all_embeddings = []
         if text_embeddings:
             # Average all text embeddings
             avg_text_embedding = np.mean(text_embeddings, axis=0)
@@ -524,6 +882,12 @@ async def index_data(
             "image_filenames": [img.filename for img in images] if images else [],
             "id_use": id_use if id_use else None,  # ID của SocialMedia hoặc EventCode
             "id_user": id_user if id_user else None  # ID của User
         }
         result = qdrant_service.index_data(
@@ -536,8 +900,11 @@ async def index_data(
             success=True,
             id=result["original_id"],  # Trả về MongoDB ObjectId
             message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
         )
     except HTTPException:
         raise
     except Exception as e:
@@ -763,6 +1130,7 @@ async def get_stats():
 async def chat(request: ChatRequest):
     """
     Chat endpoint với Advanced RAG
     Body:
     - message: User message
@@ -777,28 +1145,68 @@ async def chat(request: ChatRequest):
     - use_reranking: Enable reranking (default: true)
     - use_compression: Enable context compression (default: true)
     - score_threshold: Minimum relevance score (default: 0.5)
     Returns:
     - response: Generated response
     - context_used: Retrieved context documents
     - timestamp: Response timestamp
     - rag_stats: Statistics from RAG pipeline
     """
     try:
         # Retrieve context if RAG enabled
         context_used = []
         rag_stats = None
         if request.use_rag:
             if request.use_advanced_rag:
-                # Use Advanced RAG Pipeline
                 documents, stats = advanced_rag.hybrid_rag_pipeline(
                     query=request.message,
                     top_k=request.top_k,
                     score_threshold=request.score_threshold,
                     use_reranking=request.use_reranking,
                     use_compression=request.use_compression,
-                    max_context_tokens=500
                 )
                 # Convert to dict format for compatibility
@@ -832,8 +1240,26 @@ async def chat(request: ChatRequest):
                     doc_text = doc["metadata"].get("text", "")
                     confidence = doc["confidence"]
                     context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
         # Build system message with context
         if request.use_rag and context_used:
             if request.use_advanced_rag:
                 # Use advanced prompt builder
@@ -904,12 +1330,28 @@ Example:
             "timestamp": datetime.utcnow()
         }
         chat_history_collection.insert_one(chat_data)
         return ChatResponse(
             response=response,
             context_used=context_used,
             timestamp=datetime.utcnow().isoformat(),
             rag_stats=rag_stats
         )
     except Exception as e:
@@ -1308,6 +1750,245 @@ async def upload_pdf_multimodal(
         raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(

 from embedding_service import JinaClipEmbeddingService
 from qdrant_service import QdrantVectorService
 from advanced_rag import AdvancedRAG
+from cag_service import CAGService
 from pdf_parser import PDFIndexer
 from multimodal_pdf_parser import MultimodalPDFIndexer
 if hf_token:
     print("✓ Hugging Face token configured")
+# Initialize Advanced RAG (Best Case 2025)
 advanced_rag = AdvancedRAG(
     embedding_service=embedding_service,
     qdrant_service=qdrant_service
 )
+print("✓ Advanced RAG pipeline initialized (with Cross-Encoder)")
+# Initialize CAG Service (Semantic Cache)
+try:
+    cag_service = CAGService(
+        embedding_service=embedding_service,
+        cache_collection="semantic_cache",
+        vector_size=embedding_service.get_embedding_dimension(),
+        similarity_threshold=0.9,
+        ttl_hours=24
+    )
+    print("✓ CAG Service initialized (Semantic Caching enabled)")
+except Exception as e:
+    print(f"Warning: CAG Service initialization failed: {e}")
+    print("Continuing without semantic caching...")
+    cag_service = None
 # Initialize PDF Indexer
 pdf_indexer = PDFIndexer(
     message: str
     use_rag: bool = True
     top_k: int = 3
+    system_message: Optional[str] = """Bạn là trợ lý AI chuyên biệt cho hệ thống quản lý sự kiện và mạng xã hội.
+Vai trò của bạn là trả lời các câu hỏi CHÍNH XÁC dựa trên dữ liệu được cung cấp từ hệ thống.
+Quy tắc tuyệt đối:
+- CHỈ trả lời câu hỏi liên quan đến: events, social media posts, PDFs đã upload, và dữ liệu trong knowledge base
+- KHÔNG trả lời câu hỏi ngoài phạm vi (tin tức, thời tiết, toán học, lập trình, tư vấn cá nhân, v.v.)
+- Nếu câu hỏi nằm ngoài phạm vi: BẮT BUỘC trả lời "Chúng tôi không thể trả lời câu hỏi này vì nó nằm ngoài vùng application xử lí."
+- Luôn ưu tiên thông tin từ context được cung cấp"""
     max_tokens: int = 512
     temperature: float = 0.7
     top_p: float = 0.95
     use_reranking: bool = True
     use_compression: bool = True
     score_threshold: float = 0.5
+    # Advanced RAG options
+    use_advanced_rag: bool = True
+    use_query_expansion: bool = True
+    use_reranking: bool = True
+    use_compression: bool = True
+    score_threshold: float = 0.5
 class ChatResponse(BaseModel):
     context_used: List[Dict]
     timestamp: str
     rag_stats: Optional[Dict] = None  # Stats from advanced RAG pipeline
+    rag_stats: Optional[Dict] = None  # Stats from advanced RAG pipeline
 class AddDocumentRequest(BaseModel):
     message: str
+class UploadPDFResponse(BaseModel):
+    success: bool
+    document_id: str
+    filename: str
+    chunks_indexed: int
+    message: str
 @app.get("/")
 async def root():
     """Health check endpoint with comprehensive API documentation"""
         "status": "running",
         "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
         "version": "3.0.0",
+        "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
+        "version": "3.0.0",
         "vector_db": "Qdrant",
         "document_db": "MongoDB",
         "features": {
             "chat_history": "Track conversation history",
             "hybrid_search": "Text + image search with Jina CLIP v2"
         },
+        "document_db": "MongoDB",
+        "features": {
+            "multiple_inputs": "Index up to 10 texts + 10 images per request",
+            "advanced_rag": "Query expansion, reranking, contextual compression",
+            "pdf_support": "Upload PDFs and chat about their content",
+            "multimodal_pdf": "PDFs with text and image URLs - perfect for user guides",
+            "chat_history": "Track conversation history",
+            "hybrid_search": "Text + image search with Jina CLIP v2"
+        },
         "endpoints": {
+            "indexing": {
+                "POST /index": {
+                    "description": "Index multiple texts and images (NEW: up to 10 each)",
+                    "content_type": "multipart/form-data",
+                    "body": {
+                        "id": "string (required) - Document ID (primary)",
+                        "texts": "List[string] (optional) - Up to 10 texts",
+                        "images": "List[UploadFile] (optional) - Up to 10 images",
+                        "id_use": "string (optional) - ID của SocialMedia hoặc EventCode",
+                        "id_user": "string (optional) - ID của User"
+                    },
+                    "example": "curl -X POST '/index' -F 'id=doc1' -F 'id_use=social_123' -F 'id_user=user_789' -F 'texts=Text 1' -F 'images=@img1.jpg'",
             "indexing": {
                 "POST /index": {
                     "description": "Index multiple texts and images (NEW: up to 10 each)",
                         "success": True,
                         "id": "doc1",
                         "message": "Indexed successfully with 2 texts and 1 images"
+                        "success": True,
+                        "id": "doc1",
+                        "message": "Indexed successfully with 2 texts and 1 images"
                     },
                     "use_cases": {
                         "social_media_post": {
                             "description": "Link post to event and user"
                         }
                     }
+                    "use_cases": {
+                        "social_media_post": {
+                            "id": "post_uuid_123",
+                            "id_use": "social_media_456",
+                            "id_user": "user_789",
+                            "description": "Link post to social media account and user"
+                        },
+                        "event_post": {
+                            "id": "post_uuid_789",
+                            "id_use": "event_code_ABC123",
+                            "id_user": "user_101",
+                            "description": "Link post to event and user"
+                        }
+                    }
                 },
                 "POST /documents": {
                     "description": "Add text document to knowledge base",
                     },
                     "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
                 },
+                "POST /upload-pdf-multimodal": {
+                    "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
+                    "content_type": "multipart/form-data",
+                    "features": [
+                        "Extracts text from PDF",
+                        "Detects image URLs (http://, https://)",
+                        "Supports markdown: ![alt](url)",
+                        "Supports HTML: <img src='url'>",
+                        "Links images to text chunks",
+                        "Returns images with context in chat"
+                    ],
+                    "body": {
+                        "file": "UploadFile (required) - PDF file with image URLs",
+                        "title": "string (optional) - Document title",
+                        "category": "string (optional) - e.g. 'user_guide', 'tutorial'",
+                        "description": "string (optional)"
+                    },
+                    "example": "curl -X POST '/upload-pdf-multimodal' -F 'file=@guide_with_images.pdf' -F 'category=user_guide'",
+                    "description": "Add text document to knowledge base",
+                    "content_type": "application/json",
+                    "body": {
+                        "text": "string (required) - Document content",
+                        "metadata": "object (optional) - Additional metadata"
+                    },
+                    "example": {
+                        "text": "How to create event: Click 'Create Event' button...",
+                        "metadata": {"category": "tutorial", "source": "user_guide"}
+                    }
+                },
+                "POST /upload-pdf": {
+                    "description": "Upload PDF file (text only)",
+                    "content_type": "multipart/form-data",
+                    "body": {
+                        "file": "UploadFile (required) - PDF file",
+                        "title": "string (optional) - Document title",
+                        "category": "string (optional) - Category",
+                        "description": "string (optional) - Description"
+                    },
+                    "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
+                },
                 "POST /upload-pdf-multimodal": {
                     "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
                     "content_type": "multipart/form-data",
                         "document_id": "pdf_multimodal_20251029_150000",
                         "chunks_indexed": 25,
                         "message": "PDF indexed with 25 chunks and 15 images"
+                        "success": True,
+                        "document_id": "pdf_multimodal_20251029_150000",
+                        "chunks_indexed": 25,
+                        "message": "PDF indexed with 25 chunks and 15 images"
                     },
                     "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
                 }
             },
+            "search": {
+                "POST /search": {
+                    "description": "Hybrid search with text and/or image",
+                    "body": {
+                        "text": "string (optional) - Query text",
+                        "image": "UploadFile (optional) - Query image",
+                        "limit": "int (default: 10)",
+                        "score_threshold": "float (optional, 0-1)",
+                        "text_weight": "float (default: 0.5)",
+                        "image_weight": "float (default: 0.5)"
+                    }
+                },
+                "POST /search/text": {
+                    "description": "Text-only search",
+                    "body": {"text": "string", "limit": "int", "score_threshold": "float"}
+                },
+                "POST /search/image": {
+                    "description": "Image-only search",
+                    "body": {"image": "UploadFile", "limit": "int", "score_threshold": "float"}
+                    "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
+                }
+            },
             "search": {
                 "POST /search": {
                     "description": "Hybrid search with text and/or image",
                         "use_reranking": True,
                         "top_k": 5,
                         "score_threshold": 0.5
+                    "description": "Search in RAG knowledge base",
+                    "body": {"query": "string", "top_k": "int (default: 5)", "score_threshold": "float (default: 0.5)"}
+                }
+            },
+            "chat": {
+                "POST /chat": {
+                    "description": "Chat với Advanced RAG (Query expansion + Reranking + Compression)",
+                    "content_type": "application/json",
+                    "body": {
+                        "message": "string (required) - User question",
+                        "use_rag": "bool (default: true) - Enable RAG retrieval",
+                        "use_advanced_rag": "bool (default: true) - Use advanced RAG pipeline (RECOMMENDED)",
+                        "use_query_expansion": "bool (default: true) - Expand query with variations",
+                        "use_reranking": "bool (default: true) - Rerank results for accuracy",
+                        "use_compression": "bool (default: true) - Compress context to relevant parts",
+                        "top_k": "int (default: 3) - Number of documents to retrieve",
+                        "score_threshold": "float (default: 0.5) - Min relevance score (0-1)",
+                        "max_tokens": "int (default: 512) - Max response tokens",
+                        "temperature": "float (default: 0.7) - Creativity (0-1)",
+                        "hf_token": "string (optional) - Hugging Face token"
+                    },
+                    "response": {
+                        "response": "string - AI answer",
+                        "context_used": "array - Retrieved documents with metadata",
+                        "timestamp": "string",
+                        "rag_stats": "object - RAG pipeline statistics (query variants, retrieval counts)"
+                    },
+                    "example_advanced": {
+                        "message": "Làm sao để upload PDF có hình ảnh?",
+                        "use_advanced_rag": True,
+                        "use_reranking": True,
+                        "top_k": 5,
+                        "score_threshold": 0.5
+                    },
+                    "example_response_with_images": {
+                        "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
+                        "context_used": [
+                            {
+                                "id": "pdf_multimodal_...._p2_c1",
+                                "confidence": 0.89,
+                                "metadata": {
+                                    "text": "Bước 1: Chuẩn bị PDF với image URLs...",
+                                    "has_images": True,
+                                    "image_urls": [
+                                        "https://example.com/screenshot1.png",
+                                        "https://example.com/diagram.jpg"
+                                    ],
+                                    "num_images": 2,
+                                    "page": 2
+                                }
+                            }
+                        ],
+                        "rag_stats": {
+                            "original_query": "Làm sao để upload PDF có hình ảnh?",
+                            "expanded_queries": ["upload PDF hình ảnh", "PDF có ảnh"],
+                            "initial_results": 10,
+                            "after_rerank": 5,
+                            "after_compression": 5
+                        }
                     },
+                    "notes": [
+                        "Advanced RAG significantly improves answer quality",
+                        "When multimodal PDF is used, images are returned in metadata",
+                        "Requires HUGGINGFACE_TOKEN for actual LLM generation"
+                    ]
                     "example_response_with_images": {
                         "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
                         "context_used": [
                 "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
                 "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
                 "slow_responses": "Disable compression, use basic RAG, decrease top_k"
+            }
+                    "description": "Get chat history",
+                    "query_params": {"limit": "int (default: 10)", "skip": "int (default: 0)"},
+                    "response": {"history": "array", "total": "int"}
+                }
+            },
+            "management": {
+                "GET /documents/pdf": {
+                    "description": "List all PDF documents",
+                    "response": {"documents": "array", "total": "int"}
+                },
+                "DELETE /documents/pdf/{document_id}": {
+                    "description": "Delete PDF and all its chunks",
+                    "response": {"success": "bool", "message": "string"}
+                },
+                "GET /document/{doc_id}": {
+                    "description": "Get document by ID",
+                    "response": {"success": "bool", "data": "object"}
+                },
+                "DELETE /delete/{doc_id}": {
+                    "description": "Delete document by ID",
+                    "response": {"success": "bool", "message": "string"}
+                },
+                "GET /stats": {
+                    "description": "Get Qdrant collection statistics",
+                    "response": {"vectors_count": "int", "segments": "int", "indexed_vectors_count": "int"}
+                }
             }
         },
+        "quick_start": {
+            "1_upload_multimodal_pdf": "curl -X POST '/upload-pdf-multimodal' -F 'file=@user_guide.pdf' -F 'title=Guide'",
+            "2_verify_upload": "curl '/documents/pdf'",
+            "3_chat_with_rag": "curl -X POST '/chat' -H 'Content-Type: application/json' -d '{\"message\": \"How to...?\", \"use_advanced_rag\": true}'",
+            "4_see_images_in_context": "response['context_used'][0]['metadata']['image_urls']"
+        },
+        "use_cases": {
+            "user_guide_with_screenshots": {
+                "endpoint": "/upload-pdf-multimodal",
+                "description": "PDFs with text instructions + image URLs for visual guidance",
+                "benefits": ["Images linked to text chunks", "Chatbot returns relevant screenshots", "Perfect for step-by-step guides"]
+            },
+            "simple_text_docs": {
+                "endpoint": "/upload-pdf",
+                "description": "Simple PDFs with text only (FAQ, policies, etc.)"
+            },
+            "social_media_posts": {
+                "endpoint": "/index",
+                "description": "Index multiple posts with texts (up to 10) and images (up to 10)"
+            },
+            "complex_queries": {
+                "endpoint": "/chat",
+                "description": "Use advanced RAG for better accuracy on complex questions",
+                "settings": {"use_advanced_rag": True, "use_reranking": True, "use_compression": True}
             }
         },
+        "best_practices": {
+            "pdf_format": [
+                "Include image URLs in text (http://, https://)",
+                "Use markdown format: ![alt](url) or HTML: <img src='url'>",
+                "Clear structure with headings and sections",
+                "Link images close to their related text"
+            ],
+            "chat_settings": {
+                "for_accuracy": {"temperature": 0.3, "use_advanced_rag": True, "use_reranking": True},
+                "for_creativity": {"temperature": 0.8, "use_advanced_rag": False},
+                "for_factual_answers": {"temperature": 0.3, "use_compression": True, "score_threshold": 0.6}
+            },
+            "retrieval_tuning": {
+                "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
+                "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
+                "slow_responses": "Disable compression, use basic RAG, decrease top_k"
+            }
+        },
+        "links": {
+            "docs": "http://localhost:8000/docs",
+            "redoc": "http://localhost:8000/redoc",
+            "openapi": "http://localhost:8000/openapi.json",
+            "guides": {
+                "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
+                "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
+                "pdf_general": "See PDF_RAG_GUIDE.md",
+                "quick_start": "See QUICK_START_PDF.md"
+            }
+        },
+        "system_info": {
+            "embedding_model": "Jina CLIP v2 (multimodal)",
+            "vector_db": "Qdrant with HNSW index",
+            "document_db": "MongoDB",
+            "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
+            "pdf_parser": "pypdfium2 with URL extraction",
+            "max_inputs": "10 texts + 10 images per /index request"
+            "openapi": "http://localhost:8000/openapi.json",
+            "guides": {
+                "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
+                "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
+                "pdf_general": "See PDF_RAG_GUIDE.md",
+                "quick_start": "See QUICK_START_PDF.md"
+            }
+        },
+        "system_info": {
+            "embedding_model": "Jina CLIP v2 (multimodal)",
+            "vector_db": "Qdrant with HNSW index",
+            "document_db": "MongoDB",
+            "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
+            "pdf_parser": "pypdfium2 with URL extraction",
+            "max_inputs": "10 texts + 10 images per /index request"
+        }
+    }
 @app.post("/index", response_model=IndexResponse)
 async def index_data(
     id: str = Form(...),
     images: Optional[List[UploadFile]] = File(None),
     id_use: Optional[str] = Form(None),
     id_user: Optional[str] = Form(None)
+    texts: Optional[List[str]] = Form(None),
+    images: Optional[List[UploadFile]] = File(None),
+    id_use: Optional[str] = Form(None),
+    id_user: Optional[str] = Form(None)
 ):
     """
     Index data vào vector database (hỗ trợ nhiều texts và images)
+    Index data vào vector database (hỗ trợ nhiều texts và images)
     Body:
     - id: Document ID (primary ID)
     - images: List of image files (optional) - Tối đa 10 images
     - id_use: ID của SocialMedia hoặc EventCode (optional)
     - id_user: ID của User (optional)
+    - id: Document ID (primary ID)
+    - texts: List of text contents (tiếng Việt supported) - Tối đa 10 texts
+    - images: List of image files (optional) - Tối đa 10 images
+    - id_use: ID của SocialMedia hoặc EventCode (optional)
+    - id_user: ID của User (optional)
     Returns:
     - success: True/False
     - id: Document ID
     - message: Status message
+    Example:
+    ```bash
+    curl -X POST '/index' \
+      -F 'id=doc123' \
+      -F 'id_use=social_media_456' \
+      -F 'id_user=user_789' \
+      -F 'texts=Post content 1' \
+      -F 'texts=Post content 2' \
+      -F 'images=@image1.jpg'
+    ```
     Example:
     ```bash
     curl -X POST '/index' \
         if images and len(images) > 10:
             raise HTTPException(status_code=400, detail="Tối đa 10 images")
+        # Validation
+        if texts is None and images is None:
+            raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất texts hoặc images")
+        if texts and len(texts) > 10:
+            raise HTTPException(status_code=400, detail="Tối đa 10 texts")
+        if images and len(images) > 10:
+            raise HTTPException(status_code=400, detail="Tối đa 10 images")
         # Prepare embeddings
         text_embeddings = []
         image_embeddings = []
+        text_embeddings = []
+        image_embeddings = []
+        # Encode multiple texts (tiếng Việt)
+        if texts:
+            for text in texts:
+                if text and text.strip():
+                    text_emb = embedding_service.encode_text(text)
+                    text_embeddings.append(text_emb)
         # Encode multiple texts (tiếng Việt)
         if texts:
             for text in texts:
                     text_embeddings.append(text_emb)
         # Encode multiple images
+        if images:
+            for image in images:
+                if image.filename:  # Check if image is provided
+                    image_bytes = await image.read()
+                    pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+                    image_emb = embedding_service.encode_image(pil_image)
+                    image_embeddings.append(image_emb)
+        # Encode multiple images
         if images:
             for image in images:
                 if image.filename:  # Check if image is provided
         # Combine embeddings
         all_embeddings = []
+        if text_embeddings:
+            # Average all text embeddings
+            avg_text_embedding = np.mean(text_embeddings, axis=0)
+            all_embeddings.append(avg_text_embedding)
+        if image_embeddings:
+            # Average all image embeddings
+            avg_image_embedding = np.mean(image_embeddings, axis=0)
+            all_embeddings.append(avg_image_embedding)
+        if not all_embeddings:
+            raise HTTPException(status_code=400, detail="Không có embedding nào được tạo từ texts hoặc images")
+        # Final combined embedding
+        combined_embedding = np.mean(all_embeddings, axis=0)
+        all_embeddings = []
         if text_embeddings:
             # Average all text embeddings
             avg_text_embedding = np.mean(text_embeddings, axis=0)
             "image_filenames": [img.filename for img in images] if images else [],
             "id_use": id_use if id_use else None,  # ID của SocialMedia hoặc EventCode
             "id_user": id_user if id_user else None  # ID của User
+            "texts": texts if texts else [],
+            "text_count": len(texts) if texts else 0,
+            "image_count": len(images) if images else 0,
+            "image_filenames": [img.filename for img in images] if images else [],
+            "id_use": id_use if id_use else None,  # ID của SocialMedia hoặc EventCode
+            "id_user": id_user if id_user else None  # ID của User
         }
         result = qdrant_service.index_data(
             success=True,
             id=result["original_id"],  # Trả về MongoDB ObjectId
             message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
+            message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
         )
+    except HTTPException:
+        raise
     except HTTPException:
         raise
     except Exception as e:
 async def chat(request: ChatRequest):
     """
     Chat endpoint với Advanced RAG
+    Chat endpoint với Advanced RAG
     Body:
     - message: User message
     - use_reranking: Enable reranking (default: true)
     - use_compression: Enable context compression (default: true)
     - score_threshold: Minimum relevance score (default: 0.5)
+    - use_advanced_rag: Use advanced RAG pipeline (default: true)
+    - use_query_expansion: Enable query expansion (default: true)
+    - use_reranking: Enable reranking (default: true)
+    - use_compression: Enable context compression (default: true)
+    - score_threshold: Minimum relevance score (default: 0.5)
     Returns:
     - response: Generated response
     - context_used: Retrieved context documents
     - timestamp: Response timestamp
     - rag_stats: Statistics from RAG pipeline
+    - rag_stats: Statistics from RAG pipeline
     """
     try:
+        # ============================================
+        # CAG Layer: Check Semantic Cache First
+        # ============================================
+        cache_hit = None
+        if cag_service and request.use_rag:
+            cache_hit = cag_service.check_cache(request.message)
+            if cache_hit:
+                # Cache hit! Return cached response immediately
+                return ChatResponse(
+                    response=cache_hit["response"],
+                    context_used=cache_hit["context_used"],
+                    timestamp=datetime.utcnow().isoformat(),
+                    rag_stats={
+                        **cache_hit.get("rag_stats", {}),
+                        "cache_hit": True,
+                        "cached_query": cache_hit["cached_query"],
+                        "similarity_score": cache_hit["similarity_score"],
+                        "cached_at": cache_hit["cached_at"]
+                    }
+                )
+        # ============================================
+        # RAG Pipeline (if cache miss)
+        # ============================================
         # Retrieve context if RAG enabled
         context_used = []
         rag_stats = None
+        rag_stats = None
         if request.use_rag:
             if request.use_advanced_rag:
+                # Initialize LLM client for query expansion
+                hf_client = None
+                if request.hf_token or hf_token:
+                    hf_client = InferenceClient(token=request.hf_token or hf_token)
+                # Use Advanced RAG Pipeline (Best Case 2025)
                 documents, stats = advanced_rag.hybrid_rag_pipeline(
                     query=request.message,
                     top_k=request.top_k,
                     score_threshold=request.score_threshold,
                     use_reranking=request.use_reranking,
                     use_compression=request.use_compression,
+                    use_query_expansion=request.use_query_expansion,
+                    max_context_tokens=500,
+                    hf_client=hf_client
                 )
                 # Convert to dict format for compatibility
                     doc_text = doc["metadata"].get("text", "")
                     confidence = doc["confidence"]
                     context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
+                # Build context text (basic format)
+                context_text = "\n\nRelevant Context:\n"
+                for i, doc in enumerate(context_used, 1):
+                    doc_text = doc["metadata"].get("text", "")
+                    confidence = doc["confidence"]
+                    context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
         # Build system message with context
+        if request.use_rag and context_used:
+            if request.use_advanced_rag:
+                # Use advanced prompt builder
+                system_message = advanced_rag.build_rag_prompt(
+                    query=request.message,
+                    context=context_text,
+                    system_message=request.system_message
+                )
+            else:
+                # Basic prompt
+                system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
+        # Build system message with context
         if request.use_rag and context_used:
             if request.use_advanced_rag:
                 # Use advanced prompt builder
             "timestamp": datetime.utcnow()
         }
         chat_history_collection.insert_one(chat_data)
+        # ============================================
+        # CAG: Save to Cache (if RAG was used)
+        # ============================================
+        if cag_service and request.use_rag and context_used and response:
+            try:
+                cag_service.save_to_cache(
+                    query=request.message,
+                    response=response,
+                    context_used=context_used,
+                    rag_stats=rag_stats
+                )
+            except Exception as cache_error:
+                print(f"Warning: Failed to save to cache: {cache_error}")
         return ChatResponse(
             response=response,
             context_used=context_used,
             timestamp=datetime.utcnow().isoformat(),
             rag_stats=rag_stats
+            timestamp=datetime.utcnow().isoformat(),
+            rag_stats=rag_stats
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
+@app.post("/upload-pdf", response_model=UploadPDFResponse)
+async def upload_pdf(
+    file: UploadFile = File(...),
+    document_id: Optional[str] = Form(None),
+    title: Optional[str] = Form(None),
+    description: Optional[str] = Form(None),
+    category: Optional[str] = Form(None)
+):
+    """
+    Upload and index PDF file into knowledge base
+    Body (multipart/form-data):
+    - file: PDF file (required)
+    - document_id: Custom document ID (optional, auto-generated if not provided)
+    - title: Document title (optional)
+    - description: Document description (optional)
+    - category: Document category (optional, e.g., "user_guide", "faq")
+    Returns:
+    - success: True/False
+    - document_id: Document ID
+    - filename: Original filename
+    - chunks_indexed: Number of chunks created
+    - message: Status message
+    Example:
+    ```bash
+    curl -X POST "http://localhost:8000/upload-pdf" \
+      -F "file=@user_guide.pdf" \
+      -F "title=Hướng dẫn sử dụng ChatbotRAG" \
+      -F "category=user_guide"
+    ```
+    """
+    try:
+        # Validate file type
+        if not file.filename.endswith('.pdf'):
+            raise HTTPException(status_code=400, detail="Only PDF files are allowed")
+        # Generate document ID if not provided
+        if not document_id:
+            from datetime import datetime
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            document_id = f"pdf_{timestamp}"
+        # Read PDF bytes
+        pdf_bytes = await file.read()
+        # Prepare metadata
+        metadata = {}
+        if title:
+            metadata['title'] = title
+        if description:
+            metadata['description'] = description
+        if category:
+            metadata['category'] = category
+        # Index PDF
+        result = pdf_indexer.index_pdf_bytes(
+            pdf_bytes=pdf_bytes,
+            document_id=document_id,
+            filename=file.filename,
+            document_metadata=metadata
+        )
+        return UploadPDFResponse(
+            success=True,
+            document_id=result['document_id'],
+            filename=result['filename'],
+            chunks_indexed=result['chunks_indexed'],
+            message=f"PDF '{file.filename}' đã được index thành công với {result['chunks_indexed']} chunks"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error uploading PDF: {str(e)}")
+@app.get("/documents/pdf")
+async def list_pdf_documents():
+    """
+    List all PDF documents in knowledge base
+    Returns:
+    - documents: List of PDF documents with metadata
+    """
+    try:
+        docs = list(documents_collection.find(
+            {"type": "pdf"},
+            {"_id": 0}
+        ))
+        return {"documents": docs, "total": len(docs)}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.delete("/documents/pdf/{document_id}")
+async def delete_pdf_document(document_id: str):
+    """
+    Delete PDF document and all its chunks from knowledge base
+    Args:
+    - document_id: Document ID
+    Returns:
+    - success: True/False
+    - message: Status message
+    """
+    try:
+        # Get document info
+        doc = documents_collection.find_one({"document_id": document_id, "type": "pdf"})
+        if not doc:
+            raise HTTPException(status_code=404, detail=f"PDF document {document_id} not found")
+        # Delete all chunks from Qdrant
+        chunk_ids = doc.get('chunk_ids', [])
+        for chunk_id in chunk_ids:
+            try:
+                qdrant_service.delete_by_id(chunk_id)
+            except:
+                pass  # Chunk might already be deleted
+        # Delete from MongoDB
+        documents_collection.delete_one({"document_id": document_id})
+        return {
+            "success": True,
+            "message": f"PDF document {document_id} and {len(chunk_ids)} chunks deleted"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/upload-pdf-multimodal", response_model=UploadPDFResponse)
+async def upload_pdf_multimodal(
+    file: UploadFile = File(...),
+    document_id: Optional[str] = Form(None),
+    title: Optional[str] = Form(None),
+    description: Optional[str] = Form(None),
+    category: Optional[str] = Form(None)
+):
+    """
+    Upload PDF with text and image URLs (for user guides with screenshots)
+    This endpoint is optimized for PDFs containing:
+    - Text instructions
+    - Image URLs (http://... or https://...)
+    - Markdown images: ![alt](url)
+    - HTML images: <img src="url">
+    The system will:
+    1. Extract text from PDF
+    2. Detect all image URLs in the text
+    3. Link images to their corresponding text chunks
+    4. Store image URLs in metadata
+    5. Return images along with text during chat
+    Body (multipart/form-data):
+    - file: PDF file (required)
+    - document_id: Custom document ID (optional, auto-generated if not provided)
+    - title: Document title (optional)
+    - description: Document description (optional)
+    - category: Document category (optional, e.g., "user_guide", "tutorial")
+    Returns:
+    - success: True/False
+    - document_id: Document ID
+    - filename: Original filename
+    - chunks_indexed: Number of chunks created
+    - message: Status message (includes image count)
+    Example:
+    ```bash
+    curl -X POST "http://localhost:8000/upload-pdf-multimodal" \
+      -F "file=@user_guide_with_images.pdf" \
+      -F "title=Hướng dẫn có ảnh minh họa" \
+      -F "category=user_guide"
+    ```
+    Example Response:
+    ```json
+    {
+      "success": true,
+      "document_id": "pdf_20251029_150000",
+      "filename": "user_guide_with_images.pdf",
+      "chunks_indexed": 25,
+      "message": "PDF 'user_guide_with_images.pdf' indexed with 25 chunks and 15 images"
+    }
+    ```
+    """
+    try:
+        # Validate file type
+        if not file.filename.endswith('.pdf'):
+            raise HTTPException(status_code=400, detail="Only PDF files are allowed")
+        # Generate document ID if not provided
+        if not document_id:
+            from datetime import datetime
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            document_id = f"pdf_multimodal_{timestamp}"
+        # Read PDF bytes
+        pdf_bytes = await file.read()
+        # Prepare metadata
+        metadata = {'type': 'multimodal'}
+        if title:
+            metadata['title'] = title
+        if description:
+            metadata['description'] = description
+        if category:
+            metadata['category'] = category
+        # Index PDF with multimodal parser
+        result = multimodal_pdf_indexer.index_pdf_bytes(
+            pdf_bytes=pdf_bytes,
+            document_id=document_id,
+            filename=file.filename,
+            document_metadata=metadata
+        )
+        return UploadPDFResponse(
+            success=True,
+            document_id=result['document_id'],
+            filename=result['filename'],
+            chunks_indexed=result['chunks_indexed'],
+            message=f"PDF '{file.filename}' indexed successfully with {result['chunks_indexed']} chunks and {result.get('images_found', 0)} images"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(

requirements.txt CHANGED Viewed

@@ -14,6 +14,9 @@ torchvision>=0.15.0
 pillow>=10.0.0
 numpy>=1.24.0
 # Vector Database
 qdrant-client>=1.12.1
 grpcio>=1.60.0
@@ -31,4 +34,3 @@ einops
 # PDF Processing
 pypdfium2>=4.30.0

 pillow>=10.0.0
 numpy>=1.24.0
+# RAG & Reranking (Best Case 2025)
+sentence-transformers>=2.0.0
 # Vector Database
 qdrant-client>=1.12.1
 grpcio>=1.60.0
 # PDF Processing
 pypdfium2>=4.30.0