minhvtt commited on
Commit
358773d
·
verified ·
1 Parent(s): c99ab26

Upload 13 files

Browse files
Files changed (4) hide show
  1. advanced_rag.py +152 -52
  2. cag_service.py +233 -0
  3. main.py +705 -24
  4. requirements.txt +3 -1
advanced_rag.py CHANGED
@@ -1,12 +1,13 @@
1
  """
2
- Advanced RAG techniques for improved retrieval and generation
3
- Includes: Query Expansion, Reranking, Contextual Compression, Hybrid Search
4
  """
5
 
6
  from typing import List, Dict, Optional, Tuple
7
  import numpy as np
8
  from dataclasses import dataclass
9
  import re
 
10
 
11
 
12
  @dataclass
@@ -19,23 +20,86 @@ class RetrievedDocument:
19
 
20
 
21
  class AdvancedRAG:
22
- """Advanced RAG system with modern techniques"""
23
 
24
  def __init__(self, embedding_service, qdrant_service):
25
  self.embedding_service = embedding_service
26
  self.qdrant_service = qdrant_service
 
 
 
 
 
27
 
28
- def expand_query(self, query: str) -> List[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  """
30
- Expand query with related terms and variations
31
- Simple rule-based expansion for Vietnamese queries
32
  """
33
  queries = [query]
34
 
35
- # Add query variations
36
- # Remove question words for alternative search
37
  question_words = ['ai', 'gì', 'nào', 'đâu', 'khi nào', 'như thế nào',
38
- 'tại sao', 'có', 'là', 'được', 'không']
39
 
40
  query_lower = query.lower()
41
  for qw in question_words:
@@ -43,30 +107,32 @@ class AdvancedRAG:
43
  variant = query_lower.replace(qw, '').strip()
44
  if variant and variant != query_lower:
45
  queries.append(variant)
 
46
 
47
- # Extract key nouns/phrases (simple approach)
48
  words = query.split()
49
  if len(words) > 3:
50
- # Take important words (skip first question word)
51
  key_phrases = ' '.join(words[1:]) if words[0].lower() in question_words else ' '.join(words[:3])
52
  if key_phrases not in queries:
53
  queries.append(key_phrases)
54
 
55
- return queries[:3] # Return top 3 variations
56
 
57
  def multi_query_retrieval(
58
  self,
59
  query: str,
60
  top_k: int = 5,
61
- score_threshold: float = 0.5
 
62
  ) -> List[RetrievedDocument]:
63
  """
64
  Retrieve documents using multiple query variations
65
- Combines results from all query variations
66
  """
67
- expanded_queries = self.expand_query(query)
 
68
 
69
- all_results = {} # Use dict to deduplicate by doc_id
70
 
71
  for q in expanded_queries:
72
  # Generate embedding for each query variant
@@ -92,45 +158,51 @@ class AdvancedRAG:
92
 
93
  # Sort by confidence and return top_k
94
  sorted_results = sorted(all_results.values(), key=lambda x: x.confidence, reverse=True)
95
- return sorted_results[:top_k]
96
 
97
- def rerank_documents(
98
  self,
99
  query: str,
100
  documents: List[RetrievedDocument],
101
- use_cross_encoder: bool = False
102
  ) -> List[RetrievedDocument]:
103
  """
104
- Rerank documents based on semantic similarity
105
- Simple reranking using embedding similarity (can be upgraded to cross-encoder)
 
 
 
 
 
 
 
 
106
  """
107
  if not documents:
108
  return documents
109
 
110
- # Simple reranking: recalculate similarity with original query
111
- query_embedding = self.embedding_service.encode_text(query)
112
-
 
 
 
 
113
  reranked = []
114
- for doc in documents:
115
- # Get document embedding
116
- doc_embedding = self.embedding_service.encode_text(doc.text)
117
-
118
- # Calculate cosine similarity
119
- similarity = np.dot(query_embedding.flatten(), doc_embedding.flatten())
120
-
121
- # Combine with original confidence (weighted average)
122
- new_score = 0.6 * similarity + 0.4 * doc.confidence
123
-
124
  reranked.append(RetrievedDocument(
125
  id=doc.id,
126
  text=doc.text,
127
- confidence=float(new_score),
128
  metadata=doc.metadata
129
  ))
130
-
131
- # Sort by new score
132
  reranked.sort(key=lambda x: x.confidence, reverse=True)
133
- return reranked
134
 
135
  def compress_context(
136
  self,
@@ -188,7 +260,6 @@ class AdvancedRAG:
188
 
189
  def _split_sentences(self, text: str) -> List[str]:
190
  """Split text into sentences (Vietnamese-aware)"""
191
- # Simple sentence splitter
192
  sentences = re.split(r'[.!?]+', text)
193
  return [s.strip() for s in sentences if s.strip()]
194
 
@@ -199,40 +270,69 @@ class AdvancedRAG:
199
  score_threshold: float = 0.5,
200
  use_reranking: bool = True,
201
  use_compression: bool = True,
202
- max_context_tokens: int = 500
 
 
203
  ) -> Tuple[List[RetrievedDocument], Dict]:
204
  """
205
- Complete advanced RAG pipeline
206
- 1. Multi-query retrieval
207
- 2. Reranking
208
- 3. Contextual compression
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  """
210
  stats = {
211
  "original_query": query,
212
  "expanded_queries": [],
213
  "initial_results": 0,
214
  "after_rerank": 0,
215
- "after_compression": 0
 
 
216
  }
217
 
218
- # Step 1: Multi-query retrieval
219
- expanded_queries = self.expand_query(query)
 
 
 
 
220
  stats["expanded_queries"] = expanded_queries
221
 
 
222
  documents = self.multi_query_retrieval(
223
  query=query,
224
  top_k=top_k * 2, # Get more candidates for reranking
225
- score_threshold=score_threshold
 
226
  )
227
  stats["initial_results"] = len(documents)
228
 
229
- # Step 2: Reranking (optional)
230
  if use_reranking and documents:
231
- documents = self.rerank_documents(query, documents)
232
- documents = documents[:top_k] # Keep top_k after reranking
 
 
 
 
 
233
  stats["after_rerank"] = len(documents)
234
 
235
- # Step 3: Contextual compression (optional)
236
  if use_compression and documents:
237
  documents = self.compress_context(
238
  query=query,
 
1
  """
2
+ Advanced RAG techniques for improved retrieval and generation (Best Case 2025)
3
+ Includes: LLM-Based Query Expansion, Cross-Encoder Reranking, Contextual Compression, Hybrid Search
4
  """
5
 
6
  from typing import List, Dict, Optional, Tuple
7
  import numpy as np
8
  from dataclasses import dataclass
9
  import re
10
+ from sentence_transformers import CrossEncoder
11
 
12
 
13
  @dataclass
 
20
 
21
 
22
  class AdvancedRAG:
23
+ """Advanced RAG system with 2025 best practices"""
24
 
25
  def __init__(self, embedding_service, qdrant_service):
26
  self.embedding_service = embedding_service
27
  self.qdrant_service = qdrant_service
28
+
29
+ # Initialize Cross-Encoder for reranking (state-of-the-art)
30
+ print("Loading Cross-Encoder model for reranking...")
31
+ self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
32
+ print("✓ Cross-Encoder loaded")
33
 
34
+ def expand_query_llm(
35
+ self,
36
+ query: str,
37
+ hf_client=None
38
+ ) -> List[str]:
39
+ """
40
+ Expand query using LLM (Best Case 2025)
41
+ Generates query variations, sub-queries, and hypothetical answers
42
+
43
+ Args:
44
+ query: Original user query
45
+ hf_client: HuggingFace InferenceClient (optional)
46
+
47
+ Returns:
48
+ List of expanded queries
49
+ """
50
+ queries = [query]
51
+
52
+ # Fallback to rule-based if no LLM client
53
+ if not hf_client:
54
+ return self._expand_query_rule_based(query)
55
+
56
+ try:
57
+ # LLM-based expansion prompt
58
+ expansion_prompt = f"""Given this user question, generate 2-3 alternative phrasings or sub-questions that would help retrieve relevant information.
59
+
60
+ User Question: {query}
61
+
62
+ Alternative queries (one per line):"""
63
+
64
+ # Generate expansions
65
+ response = ""
66
+ for msg in hf_client.chat_completion(
67
+ messages=[{"role": "user", "content": expansion_prompt}],
68
+ max_tokens=150,
69
+ stream=True,
70
+ temperature=0.7
71
+ ):
72
+ if msg.choices and msg.choices[0].delta.content:
73
+ response += msg.choices[0].delta.content
74
+
75
+ # Parse expansions
76
+ lines = [line.strip() for line in response.split('\n') if line.strip()]
77
+ # Filter out numbered lists, dashes, etc.
78
+ clean_lines = []
79
+ for line in lines:
80
+ # Remove common list markers
81
+ cleaned = re.sub(r'^[\d\-\*\•]+[\.\)]\s*', '', line)
82
+ if cleaned and len(cleaned) > 5:
83
+ clean_lines.append(cleaned)
84
+
85
+ queries.extend(clean_lines[:3]) # Add top 3 expansions
86
+
87
+ except Exception as e:
88
+ print(f"LLM expansion failed, using rule-based: {e}")
89
+ return self._expand_query_rule_based(query)
90
+
91
+ return queries[:4] # Original + 3 expansions
92
+
93
+ def _expand_query_rule_based(self, query: str) -> List[str]:
94
  """
95
+ Fallback rule-based query expansion
96
+ Simple but effective Vietnamese-aware expansion
97
  """
98
  queries = [query]
99
 
100
+ # Vietnamese question words
 
101
  question_words = ['ai', 'gì', 'nào', 'đâu', 'khi nào', 'như thế nào',
102
+ 'sao', 'tại sao', 'có', 'là', 'được', 'không', 'làm sao']
103
 
104
  query_lower = query.lower()
105
  for qw in question_words:
 
107
  variant = query_lower.replace(qw, '').strip()
108
  if variant and variant != query_lower:
109
  queries.append(variant)
110
+ break # One variation is enough
111
 
112
+ # Extract key phrases
113
  words = query.split()
114
  if len(words) > 3:
 
115
  key_phrases = ' '.join(words[1:]) if words[0].lower() in question_words else ' '.join(words[:3])
116
  if key_phrases not in queries:
117
  queries.append(key_phrases)
118
 
119
+ return queries[:3]
120
 
121
  def multi_query_retrieval(
122
  self,
123
  query: str,
124
  top_k: int = 5,
125
+ score_threshold: float = 0.5,
126
+ expanded_queries: Optional[List[str]] = None
127
  ) -> List[RetrievedDocument]:
128
  """
129
  Retrieve documents using multiple query variations
130
+ Combines results from all query variations with deduplication
131
  """
132
+ if expanded_queries is None:
133
+ expanded_queries = [query]
134
 
135
+ all_results = {} # Deduplicate by doc_id
136
 
137
  for q in expanded_queries:
138
  # Generate embedding for each query variant
 
158
 
159
  # Sort by confidence and return top_k
160
  sorted_results = sorted(all_results.values(), key=lambda x: x.confidence, reverse=True)
161
+ return sorted_results[:top_k * 2] # Return more for reranking
162
 
163
+ def rerank_documents_cross_encoder(
164
  self,
165
  query: str,
166
  documents: List[RetrievedDocument],
167
+ top_k: int = 5
168
  ) -> List[RetrievedDocument]:
169
  """
170
+ Rerank documents using Cross-Encoder (Best Case 2025)
171
+ Cross-Encoder provides superior relevance scoring compared to bi-encoders
172
+
173
+ Args:
174
+ query: Original user query
175
+ documents: Retrieved documents to rerank
176
+ top_k: Number of top documents to return
177
+
178
+ Returns:
179
+ Reranked documents
180
  """
181
  if not documents:
182
  return documents
183
 
184
+ # Prepare query-document pairs for Cross-Encoder
185
+ pairs = [[query, doc.text] for doc in documents]
186
+
187
+ # Get Cross-Encoder scores
188
+ ce_scores = self.cross_encoder.predict(pairs)
189
+
190
+ # Create reranked documents with new scores
191
  reranked = []
192
+ for doc, ce_score in zip(documents, ce_scores):
193
+ # Combine CE score with original confidence (weighted)
194
+ combined_score = 0.7 * float(ce_score) + 0.3 * doc.confidence
195
+
 
 
 
 
 
 
196
  reranked.append(RetrievedDocument(
197
  id=doc.id,
198
  text=doc.text,
199
+ confidence=float(combined_score),
200
  metadata=doc.metadata
201
  ))
202
+
203
+ # Sort by new combined score
204
  reranked.sort(key=lambda x: x.confidence, reverse=True)
205
+ return reranked[:top_k]
206
 
207
  def compress_context(
208
  self,
 
260
 
261
  def _split_sentences(self, text: str) -> List[str]:
262
  """Split text into sentences (Vietnamese-aware)"""
 
263
  sentences = re.split(r'[.!?]+', text)
264
  return [s.strip() for s in sentences if s.strip()]
265
 
 
270
  score_threshold: float = 0.5,
271
  use_reranking: bool = True,
272
  use_compression: bool = True,
273
+ use_query_expansion: bool = True,
274
+ max_context_tokens: int = 500,
275
+ hf_client=None
276
  ) -> Tuple[List[RetrievedDocument], Dict]:
277
  """
278
+ Complete advanced RAG pipeline (Best Case 2025)
279
+ 1. LLM-based query expansion
280
+ 2. Multi-query retrieval
281
+ 3. Cross-Encoder reranking
282
+ 4. Contextual compression
283
+
284
+ Args:
285
+ query: User query
286
+ top_k: Number of documents to return
287
+ score_threshold: Minimum relevance score
288
+ use_reranking: Enable Cross-Encoder reranking
289
+ use_compression: Enable context compression
290
+ use_query_expansion: Enable LLM-based query expansion
291
+ max_context_tokens: Max tokens for compression
292
+ hf_client: HuggingFace InferenceClient for expansion
293
+
294
+ Returns:
295
+ (documents, stats)
296
  """
297
  stats = {
298
  "original_query": query,
299
  "expanded_queries": [],
300
  "initial_results": 0,
301
  "after_rerank": 0,
302
+ "after_compression": 0,
303
+ "used_cross_encoder": use_reranking,
304
+ "used_llm_expansion": use_query_expansion and hf_client is not None
305
  }
306
 
307
+ # Step 1: Query Expansion (LLM-based or rule-based)
308
+ if use_query_expansion:
309
+ expanded_queries = self.expand_query_llm(query, hf_client)
310
+ else:
311
+ expanded_queries = [query]
312
+
313
  stats["expanded_queries"] = expanded_queries
314
 
315
+ # Step 2: Multi-query retrieval
316
  documents = self.multi_query_retrieval(
317
  query=query,
318
  top_k=top_k * 2, # Get more candidates for reranking
319
+ score_threshold=score_threshold,
320
+ expanded_queries=expanded_queries
321
  )
322
  stats["initial_results"] = len(documents)
323
 
324
+ # Step 3: Cross-Encoder Reranking (Best Case 2025)
325
  if use_reranking and documents:
326
+ documents = self.rerank_documents_cross_encoder(
327
+ query=query,
328
+ documents=documents,
329
+ top_k=top_k
330
+ )
331
+ else:
332
+ documents = documents[:top_k]
333
  stats["after_rerank"] = len(documents)
334
 
335
+ # Step 4: Contextual compression (optional)
336
  if use_compression and documents:
337
  documents = self.compress_context(
338
  query=query,
cag_service.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CAG Service (Cache-Augmented Generation)
3
+ Semantic caching layer for RAG system using Qdrant
4
+
5
+ This module implements intelligent caching to reduce latency and LLM costs
6
+ by serving semantically similar queries from cache.
7
+ """
8
+
9
+ from typing import Optional, Dict, Any, Tuple
10
+ from datetime import datetime, timedelta
11
+ import numpy as np
12
+ from qdrant_client import QdrantClient
13
+ from qdrant_client.models import (
14
+ Distance, VectorParams, PointStruct,
15
+ SearchParams, Filter, FieldCondition, MatchValue, Range
16
+ )
17
+ import uuid
18
+ import os
19
+
20
+
21
+ class CAGService:
22
+ """
23
+ Cache-Augmented Generation Service
24
+
25
+ Features:
26
+ - Semantic similarity-based cache lookup (cosine similarity)
27
+ - TTL (Time-To-Live) for automatic cache expiration
28
+ - Configurable similarity threshold
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ embedding_service,
34
+ qdrant_url: Optional[str] = None,
35
+ qdrant_api_key: Optional[str] = None,
36
+ cache_collection: str = "semantic_cache",
37
+ vector_size: int = 1024,
38
+ similarity_threshold: float = 0.9,
39
+ ttl_hours: int = 24
40
+ ):
41
+ """
42
+ Initialize CAG Service
43
+
44
+ Args:
45
+ embedding_service: Embedding service for query encoding
46
+ qdrant_url: Qdrant Cloud URL
47
+ qdrant_api_key: Qdrant API key
48
+ cache_collection: Collection name for cache
49
+ vector_size: Embedding dimension
50
+ similarity_threshold: Min similarity for cache hit (0-1)
51
+ ttl_hours: Cache entry lifetime in hours
52
+ """
53
+ self.embedding_service = embedding_service
54
+ self.cache_collection = cache_collection
55
+ self.similarity_threshold = similarity_threshold
56
+ self.ttl_hours = ttl_hours
57
+
58
+ # Initialize Qdrant client
59
+ url = qdrant_url or os.getenv("QDRANT_URL")
60
+ api_key = qdrant_api_key or os.getenv("QDRANT_API_KEY")
61
+
62
+ if not url or not api_key:
63
+ raise ValueError("QDRANT_URL and QDRANT_API_KEY required for CAG")
64
+
65
+ self.client = QdrantClient(url=url, api_key=api_key)
66
+ self.vector_size = vector_size
67
+
68
+ # Ensure cache collection exists
69
+ self._ensure_cache_collection()
70
+
71
+ print(f"✓ CAG Service initialized (cache: {cache_collection}, threshold: {similarity_threshold})")
72
+
73
+ def _ensure_cache_collection(self):
74
+ """Create cache collection if it doesn't exist"""
75
+ collections = self.client.get_collections().collections
76
+ exists = any(c.name == self.cache_collection for c in collections)
77
+
78
+ if not exists:
79
+ print(f"Creating semantic cache collection: {self.cache_collection}")
80
+ self.client.create_collection(
81
+ collection_name=self.cache_collection,
82
+ vectors_config=VectorParams(
83
+ size=self.vector_size,
84
+ distance=Distance.COSINE
85
+ )
86
+ )
87
+ print("✓ Semantic cache collection created")
88
+
89
+ def check_cache(
90
+ self,
91
+ query: str
92
+ ) -> Optional[Dict[str, Any]]:
93
+ """
94
+ Check if query has a cached response
95
+
96
+ Args:
97
+ query: User query string
98
+
99
+ Returns:
100
+ Cached data if found (with response, context, metadata), None otherwise
101
+ """
102
+ # Generate query embedding
103
+ query_embedding = self.embedding_service.encode_text(query)
104
+
105
+ if len(query_embedding.shape) > 1:
106
+ query_embedding = query_embedding.flatten()
107
+
108
+ # Search for similar queries in cache
109
+ search_result = self.client.search(
110
+ collection_name=self.cache_collection,
111
+ query_vector=query_embedding.tolist(),
112
+ limit=1,
113
+ score_threshold=self.similarity_threshold,
114
+ search_params=SearchParams(
115
+ hnsw_ef=128,
116
+ exact=False
117
+ ),
118
+ with_payload=True
119
+ )
120
+
121
+ if not search_result:
122
+ return None
123
+
124
+ hit = search_result[0]
125
+
126
+ # Check TTL
127
+ cached_at = datetime.fromisoformat(hit.payload.get("cached_at"))
128
+ expires_at = cached_at + timedelta(hours=self.ttl_hours)
129
+
130
+ if datetime.utcnow() > expires_at:
131
+ # Cache expired, delete it
132
+ self.client.delete(
133
+ collection_name=self.cache_collection,
134
+ points_selector=[hit.id]
135
+ )
136
+ return None
137
+
138
+ # Cache hit!
139
+ return {
140
+ "response": hit.payload.get("response"),
141
+ "context_used": hit.payload.get("context_used", []),
142
+ "rag_stats": hit.payload.get("rag_stats"),
143
+ "cached_query": hit.payload.get("original_query"),
144
+ "similarity_score": float(hit.score),
145
+ "cached_at": cached_at.isoformat(),
146
+ "cache_hit": True
147
+ }
148
+
149
+ def save_to_cache(
150
+ self,
151
+ query: str,
152
+ response: str,
153
+ context_used: list,
154
+ rag_stats: Optional[Dict] = None
155
+ ) -> str:
156
+ """
157
+ Save query-response pair to cache
158
+
159
+ Args:
160
+ query: Original user query
161
+ response: Generated response
162
+ context_used: Retrieved context documents
163
+ rag_stats: RAG pipeline statistics
164
+
165
+ Returns:
166
+ Cache entry ID
167
+ """
168
+ # Generate query embedding
169
+ query_embedding = self.embedding_service.encode_text(query)
170
+
171
+ if len(query_embedding.shape) > 1:
172
+ query_embedding = query_embedding.flatten()
173
+
174
+ # Create cache entry
175
+ cache_id = str(uuid.uuid4())
176
+
177
+ point = PointStruct(
178
+ id=cache_id,
179
+ vector=query_embedding.tolist(),
180
+ payload={
181
+ "original_query": query,
182
+ "response": response,
183
+ "context_used": context_used,
184
+ "rag_stats": rag_stats or {},
185
+ "cached_at": datetime.utcnow().isoformat(),
186
+ "cache_type": "semantic"
187
+ }
188
+ )
189
+
190
+ # Save to Qdrant
191
+ self.client.upsert(
192
+ collection_name=self.cache_collection,
193
+ points=[point]
194
+ )
195
+
196
+ return cache_id
197
+
198
+ def clear_cache(self) -> bool:
199
+ """
200
+ Clear all cache entries
201
+
202
+ Returns:
203
+ Success status
204
+ """
205
+ try:
206
+ # Delete and recreate collection
207
+ self.client.delete_collection(collection_name=self.cache_collection)
208
+ self._ensure_cache_collection()
209
+ print("✓ Semantic cache cleared")
210
+ return True
211
+ except Exception as e:
212
+ print(f"Error clearing cache: {e}")
213
+ return False
214
+
215
+ def get_cache_stats(self) -> Dict[str, Any]:
216
+ """
217
+ Get cache statistics
218
+
219
+ Returns:
220
+ Cache statistics (size, hit rate, etc.)
221
+ """
222
+ try:
223
+ info = self.client.get_collection(collection_name=self.cache_collection)
224
+ return {
225
+ "total_entries": info.points_count,
226
+ "vectors_count": info.vectors_count,
227
+ "status": info.status,
228
+ "ttl_hours": self.ttl_hours,
229
+ "similarity_threshold": self.similarity_threshold
230
+ }
231
+ except Exception as e:
232
+ print(f"Error getting cache stats: {e}")
233
+ return {}
main.py CHANGED
@@ -14,6 +14,7 @@ from huggingface_hub import InferenceClient
14
  from embedding_service import JinaClipEmbeddingService
15
  from qdrant_service import QdrantVectorService
16
  from advanced_rag import AdvancedRAG
 
17
  from pdf_parser import PDFIndexer
18
  from multimodal_pdf_parser import MultimodalPDFIndexer
19
 
@@ -57,12 +58,27 @@ hf_token = os.getenv("HUGGINGFACE_TOKEN")
57
  if hf_token:
58
  print("✓ Hugging Face token configured")
59
 
60
- # Initialize Advanced RAG
61
  advanced_rag = AdvancedRAG(
62
  embedding_service=embedding_service,
63
  qdrant_service=qdrant_service
64
  )
65
- print("✓ Advanced RAG pipeline initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Initialize PDF Indexer
68
  pdf_indexer = PDFIndexer(
@@ -109,7 +125,14 @@ class ChatRequest(BaseModel):
109
  message: str
110
  use_rag: bool = True
111
  top_k: int = 3
112
- system_message: Optional[str] = "You are a helpful AI assistant."
 
 
 
 
 
 
 
113
  max_tokens: int = 512
114
  temperature: float = 0.7
115
  top_p: float = 0.95
@@ -120,6 +143,12 @@ class ChatRequest(BaseModel):
120
  use_reranking: bool = True
121
  use_compression: bool = True
122
  score_threshold: float = 0.5
 
 
 
 
 
 
123
 
124
 
125
  class ChatResponse(BaseModel):
@@ -127,6 +156,7 @@ class ChatResponse(BaseModel):
127
  context_used: List[Dict]
128
  timestamp: str
129
  rag_stats: Optional[Dict] = None # Stats from advanced RAG pipeline
 
130
 
131
 
132
  class AddDocumentRequest(BaseModel):
@@ -148,6 +178,14 @@ class UploadPDFResponse(BaseModel):
148
  message: str
149
 
150
 
 
 
 
 
 
 
 
 
151
  @app.get("/")
152
  async def root():
153
  """Health check endpoint with comprehensive API documentation"""
@@ -155,6 +193,8 @@ async def root():
155
  "status": "running",
156
  "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
157
  "version": "3.0.0",
 
 
158
  "vector_db": "Qdrant",
159
  "document_db": "MongoDB",
160
  "features": {
@@ -165,7 +205,28 @@ async def root():
165
  "chat_history": "Track conversation history",
166
  "hybrid_search": "Text + image search with Jina CLIP v2"
167
  },
 
 
 
 
 
 
 
 
 
168
  "endpoints": {
 
 
 
 
 
 
 
 
 
 
 
 
169
  "indexing": {
170
  "POST /index": {
171
  "description": "Index multiple texts and images (NEW: up to 10 each)",
@@ -182,6 +243,9 @@ async def root():
182
  "success": True,
183
  "id": "doc1",
184
  "message": "Indexed successfully with 2 texts and 1 images"
 
 
 
185
  },
186
  "use_cases": {
187
  "social_media_post": {
@@ -197,6 +261,20 @@ async def root():
197
  "description": "Link post to event and user"
198
  }
199
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  },
201
  "POST /documents": {
202
  "description": "Add text document to knowledge base",
@@ -221,6 +299,46 @@ async def root():
221
  },
222
  "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
223
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  "POST /upload-pdf-multimodal": {
225
  "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
226
  "content_type": "multipart/form-data",
@@ -244,10 +362,36 @@ async def root():
244
  "document_id": "pdf_multimodal_20251029_150000",
245
  "chunks_indexed": 25,
246
  "message": "PDF indexed with 25 chunks and 15 images"
 
 
 
 
247
  },
248
  "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
249
  }
250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  "search": {
252
  "POST /search": {
253
  "description": "Hybrid search with text and/or image",
@@ -302,7 +446,71 @@ async def root():
302
  "use_reranking": True,
303
  "top_k": 5,
304
  "score_threshold": 0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  },
 
 
 
 
 
306
  "example_response_with_images": {
307
  "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
308
  "context_used": [
@@ -406,29 +614,115 @@ async def root():
406
  "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
407
  "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
408
  "slow_responses": "Disable compression, use basic RAG, decrease top_k"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  }
410
  },
411
- "links": {
412
- "docs": "http://localhost:8000/docs",
413
- "redoc": "http://localhost:8000/redoc",
414
- "openapi": "http://localhost:8000/openapi.json",
415
- "guides": {
416
- "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
417
- "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
418
- "pdf_general": "See PDF_RAG_GUIDE.md",
419
- "quick_start": "See QUICK_START_PDF.md"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  }
421
  },
422
- "system_info": {
423
- "embedding_model": "Jina CLIP v2 (multimodal)",
424
- "vector_db": "Qdrant with HNSW index",
425
- "document_db": "MongoDB",
426
- "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
427
- "pdf_parser": "pypdfium2 with URL extraction",
428
- "max_inputs": "10 texts + 10 images per /index request"
429
- }
430
- }
431
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  @app.post("/index", response_model=IndexResponse)
433
  async def index_data(
434
  id: str = Form(...),
@@ -436,9 +730,14 @@ async def index_data(
436
  images: Optional[List[UploadFile]] = File(None),
437
  id_use: Optional[str] = Form(None),
438
  id_user: Optional[str] = Form(None)
 
 
 
 
439
  ):
440
  """
441
  Index data vào vector database (hỗ trợ nhiều texts và images)
 
442
 
443
  Body:
444
  - id: Document ID (primary ID)
@@ -446,12 +745,28 @@ async def index_data(
446
  - images: List of image files (optional) - Tối đa 10 images
447
  - id_use: ID của SocialMedia hoặc EventCode (optional)
448
  - id_user: ID của User (optional)
 
 
 
 
 
449
 
450
  Returns:
451
  - success: True/False
452
  - id: Document ID
453
  - message: Status message
454
 
 
 
 
 
 
 
 
 
 
 
 
455
  Example:
456
  ```bash
457
  curl -X POST '/index' \
@@ -474,10 +789,28 @@ async def index_data(
474
  if images and len(images) > 10:
475
  raise HTTPException(status_code=400, detail="Tối đa 10 images")
476
 
 
 
 
 
 
 
 
 
 
 
477
  # Prepare embeddings
478
  text_embeddings = []
479
  image_embeddings = []
 
 
480
 
 
 
 
 
 
 
481
  # Encode multiple texts (tiếng Việt)
482
  if texts:
483
  for text in texts:
@@ -486,6 +819,14 @@ async def index_data(
486
  text_embeddings.append(text_emb)
487
 
488
  # Encode multiple images
 
 
 
 
 
 
 
 
489
  if images:
490
  for image in images:
491
  if image.filename: # Check if image is provided
@@ -497,6 +838,23 @@ async def index_data(
497
  # Combine embeddings
498
  all_embeddings = []
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  if text_embeddings:
501
  # Average all text embeddings
502
  avg_text_embedding = np.mean(text_embeddings, axis=0)
@@ -524,6 +882,12 @@ async def index_data(
524
  "image_filenames": [img.filename for img in images] if images else [],
525
  "id_use": id_use if id_use else None, # ID của SocialMedia hoặc EventCode
526
  "id_user": id_user if id_user else None # ID của User
 
 
 
 
 
 
527
  }
528
 
529
  result = qdrant_service.index_data(
@@ -536,8 +900,11 @@ async def index_data(
536
  success=True,
537
  id=result["original_id"], # Trả về MongoDB ObjectId
538
  message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
 
539
  )
540
 
 
 
541
  except HTTPException:
542
  raise
543
  except Exception as e:
@@ -763,6 +1130,7 @@ async def get_stats():
763
  async def chat(request: ChatRequest):
764
  """
765
  Chat endpoint với Advanced RAG
 
766
 
767
  Body:
768
  - message: User message
@@ -777,28 +1145,68 @@ async def chat(request: ChatRequest):
777
  - use_reranking: Enable reranking (default: true)
778
  - use_compression: Enable context compression (default: true)
779
  - score_threshold: Minimum relevance score (default: 0.5)
 
 
 
 
 
780
 
781
  Returns:
782
  - response: Generated response
783
  - context_used: Retrieved context documents
784
  - timestamp: Response timestamp
785
  - rag_stats: Statistics from RAG pipeline
 
786
  """
787
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
  # Retrieve context if RAG enabled
789
  context_used = []
790
  rag_stats = None
791
 
 
 
792
  if request.use_rag:
793
  if request.use_advanced_rag:
794
- # Use Advanced RAG Pipeline
 
 
 
 
 
795
  documents, stats = advanced_rag.hybrid_rag_pipeline(
796
  query=request.message,
797
  top_k=request.top_k,
798
  score_threshold=request.score_threshold,
799
  use_reranking=request.use_reranking,
800
  use_compression=request.use_compression,
801
- max_context_tokens=500
 
 
802
  )
803
 
804
  # Convert to dict format for compatibility
@@ -832,8 +1240,26 @@ async def chat(request: ChatRequest):
832
  doc_text = doc["metadata"].get("text", "")
833
  confidence = doc["confidence"]
834
  context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
 
 
 
 
 
 
835
 
836
  # Build system message with context
 
 
 
 
 
 
 
 
 
 
 
 
837
  if request.use_rag and context_used:
838
  if request.use_advanced_rag:
839
  # Use advanced prompt builder
@@ -904,12 +1330,28 @@ Example:
904
  "timestamp": datetime.utcnow()
905
  }
906
  chat_history_collection.insert_one(chat_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
907
 
908
  return ChatResponse(
909
  response=response,
910
  context_used=context_used,
911
  timestamp=datetime.utcnow().isoformat(),
912
  rag_stats=rag_stats
 
 
913
  )
914
 
915
  except Exception as e:
@@ -1308,6 +1750,245 @@ async def upload_pdf_multimodal(
1308
  raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
1309
 
1310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311
  if __name__ == "__main__":
1312
  import uvicorn
1313
  uvicorn.run(
 
14
  from embedding_service import JinaClipEmbeddingService
15
  from qdrant_service import QdrantVectorService
16
  from advanced_rag import AdvancedRAG
17
+ from cag_service import CAGService
18
  from pdf_parser import PDFIndexer
19
  from multimodal_pdf_parser import MultimodalPDFIndexer
20
 
 
58
  if hf_token:
59
  print("✓ Hugging Face token configured")
60
 
61
+ # Initialize Advanced RAG (Best Case 2025)
62
  advanced_rag = AdvancedRAG(
63
  embedding_service=embedding_service,
64
  qdrant_service=qdrant_service
65
  )
66
+ print("✓ Advanced RAG pipeline initialized (with Cross-Encoder)")
67
+
68
+ # Initialize CAG Service (Semantic Cache)
69
+ try:
70
+ cag_service = CAGService(
71
+ embedding_service=embedding_service,
72
+ cache_collection="semantic_cache",
73
+ vector_size=embedding_service.get_embedding_dimension(),
74
+ similarity_threshold=0.9,
75
+ ttl_hours=24
76
+ )
77
+ print("✓ CAG Service initialized (Semantic Caching enabled)")
78
+ except Exception as e:
79
+ print(f"Warning: CAG Service initialization failed: {e}")
80
+ print("Continuing without semantic caching...")
81
+ cag_service = None
82
 
83
  # Initialize PDF Indexer
84
  pdf_indexer = PDFIndexer(
 
125
  message: str
126
  use_rag: bool = True
127
  top_k: int = 3
128
+ system_message: Optional[str] = """Bạn trợ AI chuyên biệt cho hệ thống quản lý sự kiện và mạng xã hội.
129
+ Vai trò của bạn là trả lời các câu hỏi CHÍNH XÁC dựa trên dữ liệu được cung cấp từ hệ thống.
130
+
131
+ Quy tắc tuyệt đối:
132
+ - CHỈ trả lời câu hỏi liên quan đến: events, social media posts, PDFs đã upload, và dữ liệu trong knowledge base
133
+ - KHÔNG trả lời câu hỏi ngoài phạm vi (tin tức, thời tiết, toán học, lập trình, tư vấn cá nhân, v.v.)
134
+ - Nếu câu hỏi nằm ngoài phạm vi: BẮT BUỘC trả lời "Chúng tôi không thể trả lời câu hỏi này vì nó nằm ngoài vùng application xử lí."
135
+ - Luôn ưu tiên thông tin từ context được cung cấp"""
136
  max_tokens: int = 512
137
  temperature: float = 0.7
138
  top_p: float = 0.95
 
143
  use_reranking: bool = True
144
  use_compression: bool = True
145
  score_threshold: float = 0.5
146
+ # Advanced RAG options
147
+ use_advanced_rag: bool = True
148
+ use_query_expansion: bool = True
149
+ use_reranking: bool = True
150
+ use_compression: bool = True
151
+ score_threshold: float = 0.5
152
 
153
 
154
  class ChatResponse(BaseModel):
 
156
  context_used: List[Dict]
157
  timestamp: str
158
  rag_stats: Optional[Dict] = None # Stats from advanced RAG pipeline
159
+ rag_stats: Optional[Dict] = None # Stats from advanced RAG pipeline
160
 
161
 
162
  class AddDocumentRequest(BaseModel):
 
178
  message: str
179
 
180
 
181
+ class UploadPDFResponse(BaseModel):
182
+ success: bool
183
+ document_id: str
184
+ filename: str
185
+ chunks_indexed: int
186
+ message: str
187
+
188
+
189
  @app.get("/")
190
  async def root():
191
  """Health check endpoint with comprehensive API documentation"""
 
193
  "status": "running",
194
  "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
195
  "version": "3.0.0",
196
+ "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
197
+ "version": "3.0.0",
198
  "vector_db": "Qdrant",
199
  "document_db": "MongoDB",
200
  "features": {
 
205
  "chat_history": "Track conversation history",
206
  "hybrid_search": "Text + image search with Jina CLIP v2"
207
  },
208
+ "document_db": "MongoDB",
209
+ "features": {
210
+ "multiple_inputs": "Index up to 10 texts + 10 images per request",
211
+ "advanced_rag": "Query expansion, reranking, contextual compression",
212
+ "pdf_support": "Upload PDFs and chat about their content",
213
+ "multimodal_pdf": "PDFs with text and image URLs - perfect for user guides",
214
+ "chat_history": "Track conversation history",
215
+ "hybrid_search": "Text + image search with Jina CLIP v2"
216
+ },
217
  "endpoints": {
218
+ "indexing": {
219
+ "POST /index": {
220
+ "description": "Index multiple texts and images (NEW: up to 10 each)",
221
+ "content_type": "multipart/form-data",
222
+ "body": {
223
+ "id": "string (required) - Document ID (primary)",
224
+ "texts": "List[string] (optional) - Up to 10 texts",
225
+ "images": "List[UploadFile] (optional) - Up to 10 images",
226
+ "id_use": "string (optional) - ID của SocialMedia hoặc EventCode",
227
+ "id_user": "string (optional) - ID của User"
228
+ },
229
+ "example": "curl -X POST '/index' -F 'id=doc1' -F 'id_use=social_123' -F 'id_user=user_789' -F 'texts=Text 1' -F 'images=@img1.jpg'",
230
  "indexing": {
231
  "POST /index": {
232
  "description": "Index multiple texts and images (NEW: up to 10 each)",
 
243
  "success": True,
244
  "id": "doc1",
245
  "message": "Indexed successfully with 2 texts and 1 images"
246
+ "success": True,
247
+ "id": "doc1",
248
+ "message": "Indexed successfully with 2 texts and 1 images"
249
  },
250
  "use_cases": {
251
  "social_media_post": {
 
261
  "description": "Link post to event and user"
262
  }
263
  }
264
+ "use_cases": {
265
+ "social_media_post": {
266
+ "id": "post_uuid_123",
267
+ "id_use": "social_media_456",
268
+ "id_user": "user_789",
269
+ "description": "Link post to social media account and user"
270
+ },
271
+ "event_post": {
272
+ "id": "post_uuid_789",
273
+ "id_use": "event_code_ABC123",
274
+ "id_user": "user_101",
275
+ "description": "Link post to event and user"
276
+ }
277
+ }
278
  },
279
  "POST /documents": {
280
  "description": "Add text document to knowledge base",
 
299
  },
300
  "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
301
  },
302
+ "POST /upload-pdf-multimodal": {
303
+ "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
304
+ "content_type": "multipart/form-data",
305
+ "features": [
306
+ "Extracts text from PDF",
307
+ "Detects image URLs (http://, https://)",
308
+ "Supports markdown: ![alt](url)",
309
+ "Supports HTML: <img src='url'>",
310
+ "Links images to text chunks",
311
+ "Returns images with context in chat"
312
+ ],
313
+ "body": {
314
+ "file": "UploadFile (required) - PDF file with image URLs",
315
+ "title": "string (optional) - Document title",
316
+ "category": "string (optional) - e.g. 'user_guide', 'tutorial'",
317
+ "description": "string (optional)"
318
+ },
319
+ "example": "curl -X POST '/upload-pdf-multimodal' -F 'file=@guide_with_images.pdf' -F 'category=user_guide'",
320
+ "description": "Add text document to knowledge base",
321
+ "content_type": "application/json",
322
+ "body": {
323
+ "text": "string (required) - Document content",
324
+ "metadata": "object (optional) - Additional metadata"
325
+ },
326
+ "example": {
327
+ "text": "How to create event: Click 'Create Event' button...",
328
+ "metadata": {"category": "tutorial", "source": "user_guide"}
329
+ }
330
+ },
331
+ "POST /upload-pdf": {
332
+ "description": "Upload PDF file (text only)",
333
+ "content_type": "multipart/form-data",
334
+ "body": {
335
+ "file": "UploadFile (required) - PDF file",
336
+ "title": "string (optional) - Document title",
337
+ "category": "string (optional) - Category",
338
+ "description": "string (optional) - Description"
339
+ },
340
+ "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
341
+ },
342
  "POST /upload-pdf-multimodal": {
343
  "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
344
  "content_type": "multipart/form-data",
 
362
  "document_id": "pdf_multimodal_20251029_150000",
363
  "chunks_indexed": 25,
364
  "message": "PDF indexed with 25 chunks and 15 images"
365
+ "success": True,
366
+ "document_id": "pdf_multimodal_20251029_150000",
367
+ "chunks_indexed": 25,
368
+ "message": "PDF indexed with 25 chunks and 15 images"
369
  },
370
  "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
371
  }
372
  },
373
+ "search": {
374
+ "POST /search": {
375
+ "description": "Hybrid search with text and/or image",
376
+ "body": {
377
+ "text": "string (optional) - Query text",
378
+ "image": "UploadFile (optional) - Query image",
379
+ "limit": "int (default: 10)",
380
+ "score_threshold": "float (optional, 0-1)",
381
+ "text_weight": "float (default: 0.5)",
382
+ "image_weight": "float (default: 0.5)"
383
+ }
384
+ },
385
+ "POST /search/text": {
386
+ "description": "Text-only search",
387
+ "body": {"text": "string", "limit": "int", "score_threshold": "float"}
388
+ },
389
+ "POST /search/image": {
390
+ "description": "Image-only search",
391
+ "body": {"image": "UploadFile", "limit": "int", "score_threshold": "float"}
392
+ "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
393
+ }
394
+ },
395
  "search": {
396
  "POST /search": {
397
  "description": "Hybrid search with text and/or image",
 
446
  "use_reranking": True,
447
  "top_k": 5,
448
  "score_threshold": 0.5
449
+ "description": "Search in RAG knowledge base",
450
+ "body": {"query": "string", "top_k": "int (default: 5)", "score_threshold": "float (default: 0.5)"}
451
+ }
452
+ },
453
+ "chat": {
454
+ "POST /chat": {
455
+ "description": "Chat với Advanced RAG (Query expansion + Reranking + Compression)",
456
+ "content_type": "application/json",
457
+ "body": {
458
+ "message": "string (required) - User question",
459
+ "use_rag": "bool (default: true) - Enable RAG retrieval",
460
+ "use_advanced_rag": "bool (default: true) - Use advanced RAG pipeline (RECOMMENDED)",
461
+ "use_query_expansion": "bool (default: true) - Expand query with variations",
462
+ "use_reranking": "bool (default: true) - Rerank results for accuracy",
463
+ "use_compression": "bool (default: true) - Compress context to relevant parts",
464
+ "top_k": "int (default: 3) - Number of documents to retrieve",
465
+ "score_threshold": "float (default: 0.5) - Min relevance score (0-1)",
466
+ "max_tokens": "int (default: 512) - Max response tokens",
467
+ "temperature": "float (default: 0.7) - Creativity (0-1)",
468
+ "hf_token": "string (optional) - Hugging Face token"
469
+ },
470
+ "response": {
471
+ "response": "string - AI answer",
472
+ "context_used": "array - Retrieved documents with metadata",
473
+ "timestamp": "string",
474
+ "rag_stats": "object - RAG pipeline statistics (query variants, retrieval counts)"
475
+ },
476
+ "example_advanced": {
477
+ "message": "Làm sao để upload PDF có hình ảnh?",
478
+ "use_advanced_rag": True,
479
+ "use_reranking": True,
480
+ "top_k": 5,
481
+ "score_threshold": 0.5
482
+ },
483
+ "example_response_with_images": {
484
+ "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
485
+ "context_used": [
486
+ {
487
+ "id": "pdf_multimodal_...._p2_c1",
488
+ "confidence": 0.89,
489
+ "metadata": {
490
+ "text": "Bước 1: Chuẩn bị PDF với image URLs...",
491
+ "has_images": True,
492
+ "image_urls": [
493
+ "https://example.com/screenshot1.png",
494
+ "https://example.com/diagram.jpg"
495
+ ],
496
+ "num_images": 2,
497
+ "page": 2
498
+ }
499
+ }
500
+ ],
501
+ "rag_stats": {
502
+ "original_query": "Làm sao để upload PDF có hình ảnh?",
503
+ "expanded_queries": ["upload PDF hình ảnh", "PDF có ảnh"],
504
+ "initial_results": 10,
505
+ "after_rerank": 5,
506
+ "after_compression": 5
507
+ }
508
  },
509
+ "notes": [
510
+ "Advanced RAG significantly improves answer quality",
511
+ "When multimodal PDF is used, images are returned in metadata",
512
+ "Requires HUGGINGFACE_TOKEN for actual LLM generation"
513
+ ]
514
  "example_response_with_images": {
515
  "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
516
  "context_used": [
 
614
  "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
615
  "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
616
  "slow_responses": "Disable compression, use basic RAG, decrease top_k"
617
+ }
618
+ "description": "Get chat history",
619
+ "query_params": {"limit": "int (default: 10)", "skip": "int (default: 0)"},
620
+ "response": {"history": "array", "total": "int"}
621
+ }
622
+ },
623
+ "management": {
624
+ "GET /documents/pdf": {
625
+ "description": "List all PDF documents",
626
+ "response": {"documents": "array", "total": "int"}
627
+ },
628
+ "DELETE /documents/pdf/{document_id}": {
629
+ "description": "Delete PDF and all its chunks",
630
+ "response": {"success": "bool", "message": "string"}
631
+ },
632
+ "GET /document/{doc_id}": {
633
+ "description": "Get document by ID",
634
+ "response": {"success": "bool", "data": "object"}
635
+ },
636
+ "DELETE /delete/{doc_id}": {
637
+ "description": "Delete document by ID",
638
+ "response": {"success": "bool", "message": "string"}
639
+ },
640
+ "GET /stats": {
641
+ "description": "Get Qdrant collection statistics",
642
+ "response": {"vectors_count": "int", "segments": "int", "indexed_vectors_count": "int"}
643
+ }
644
  }
645
  },
646
+ "quick_start": {
647
+ "1_upload_multimodal_pdf": "curl -X POST '/upload-pdf-multimodal' -F 'file=@user_guide.pdf' -F 'title=Guide'",
648
+ "2_verify_upload": "curl '/documents/pdf'",
649
+ "3_chat_with_rag": "curl -X POST '/chat' -H 'Content-Type: application/json' -d '{\"message\": \"How to...?\", \"use_advanced_rag\": true}'",
650
+ "4_see_images_in_context": "response['context_used'][0]['metadata']['image_urls']"
651
+ },
652
+ "use_cases": {
653
+ "user_guide_with_screenshots": {
654
+ "endpoint": "/upload-pdf-multimodal",
655
+ "description": "PDFs with text instructions + image URLs for visual guidance",
656
+ "benefits": ["Images linked to text chunks", "Chatbot returns relevant screenshots", "Perfect for step-by-step guides"]
657
+ },
658
+ "simple_text_docs": {
659
+ "endpoint": "/upload-pdf",
660
+ "description": "Simple PDFs with text only (FAQ, policies, etc.)"
661
+ },
662
+ "social_media_posts": {
663
+ "endpoint": "/index",
664
+ "description": "Index multiple posts with texts (up to 10) and images (up to 10)"
665
+ },
666
+ "complex_queries": {
667
+ "endpoint": "/chat",
668
+ "description": "Use advanced RAG for better accuracy on complex questions",
669
+ "settings": {"use_advanced_rag": True, "use_reranking": True, "use_compression": True}
670
  }
671
  },
672
+ "best_practices": {
673
+ "pdf_format": [
674
+ "Include image URLs in text (http://, https://)",
675
+ "Use markdown format: ![alt](url) or HTML: <img src='url'>",
676
+ "Clear structure with headings and sections",
677
+ "Link images close to their related text"
678
+ ],
679
+ "chat_settings": {
680
+ "for_accuracy": {"temperature": 0.3, "use_advanced_rag": True, "use_reranking": True},
681
+ "for_creativity": {"temperature": 0.8, "use_advanced_rag": False},
682
+ "for_factual_answers": {"temperature": 0.3, "use_compression": True, "score_threshold": 0.6}
683
+ },
684
+ "retrieval_tuning": {
685
+ "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
686
+ "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
687
+ "slow_responses": "Disable compression, use basic RAG, decrease top_k"
688
+ }
689
+ },
690
+ "links": {
691
+ "docs": "http://localhost:8000/docs",
692
+ "redoc": "http://localhost:8000/redoc",
693
+ "openapi": "http://localhost:8000/openapi.json",
694
+ "guides": {
695
+ "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
696
+ "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
697
+ "pdf_general": "See PDF_RAG_GUIDE.md",
698
+ "quick_start": "See QUICK_START_PDF.md"
699
+ }
700
+ },
701
+ "system_info": {
702
+ "embedding_model": "Jina CLIP v2 (multimodal)",
703
+ "vector_db": "Qdrant with HNSW index",
704
+ "document_db": "MongoDB",
705
+ "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
706
+ "pdf_parser": "pypdfium2 with URL extraction",
707
+ "max_inputs": "10 texts + 10 images per /index request"
708
+ "openapi": "http://localhost:8000/openapi.json",
709
+ "guides": {
710
+ "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
711
+ "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
712
+ "pdf_general": "See PDF_RAG_GUIDE.md",
713
+ "quick_start": "See QUICK_START_PDF.md"
714
+ }
715
+ },
716
+ "system_info": {
717
+ "embedding_model": "Jina CLIP v2 (multimodal)",
718
+ "vector_db": "Qdrant with HNSW index",
719
+ "document_db": "MongoDB",
720
+ "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
721
+ "pdf_parser": "pypdfium2 with URL extraction",
722
+ "max_inputs": "10 texts + 10 images per /index request"
723
+ }
724
+ }
725
+
726
  @app.post("/index", response_model=IndexResponse)
727
  async def index_data(
728
  id: str = Form(...),
 
730
  images: Optional[List[UploadFile]] = File(None),
731
  id_use: Optional[str] = Form(None),
732
  id_user: Optional[str] = Form(None)
733
+ texts: Optional[List[str]] = Form(None),
734
+ images: Optional[List[UploadFile]] = File(None),
735
+ id_use: Optional[str] = Form(None),
736
+ id_user: Optional[str] = Form(None)
737
  ):
738
  """
739
  Index data vào vector database (hỗ trợ nhiều texts và images)
740
+ Index data vào vector database (hỗ trợ nhiều texts và images)
741
 
742
  Body:
743
  - id: Document ID (primary ID)
 
745
  - images: List of image files (optional) - Tối đa 10 images
746
  - id_use: ID của SocialMedia hoặc EventCode (optional)
747
  - id_user: ID của User (optional)
748
+ - id: Document ID (primary ID)
749
+ - texts: List of text contents (tiếng Việt supported) - Tối đa 10 texts
750
+ - images: List of image files (optional) - Tối đa 10 images
751
+ - id_use: ID của SocialMedia hoặc EventCode (optional)
752
+ - id_user: ID của User (optional)
753
 
754
  Returns:
755
  - success: True/False
756
  - id: Document ID
757
  - message: Status message
758
 
759
+ Example:
760
+ ```bash
761
+ curl -X POST '/index' \
762
+ -F 'id=doc123' \
763
+ -F 'id_use=social_media_456' \
764
+ -F 'id_user=user_789' \
765
+ -F 'texts=Post content 1' \
766
+ -F 'texts=Post content 2' \
767
+ -F 'images=@image1.jpg'
768
+ ```
769
+
770
  Example:
771
  ```bash
772
  curl -X POST '/index' \
 
789
  if images and len(images) > 10:
790
  raise HTTPException(status_code=400, detail="Tối đa 10 images")
791
 
792
+ # Validation
793
+ if texts is None and images is None:
794
+ raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất texts hoặc images")
795
+
796
+ if texts and len(texts) > 10:
797
+ raise HTTPException(status_code=400, detail="Tối đa 10 texts")
798
+
799
+ if images and len(images) > 10:
800
+ raise HTTPException(status_code=400, detail="Tối đa 10 images")
801
+
802
  # Prepare embeddings
803
  text_embeddings = []
804
  image_embeddings = []
805
+ text_embeddings = []
806
+ image_embeddings = []
807
 
808
+ # Encode multiple texts (tiếng Việt)
809
+ if texts:
810
+ for text in texts:
811
+ if text and text.strip():
812
+ text_emb = embedding_service.encode_text(text)
813
+ text_embeddings.append(text_emb)
814
  # Encode multiple texts (tiếng Việt)
815
  if texts:
816
  for text in texts:
 
819
  text_embeddings.append(text_emb)
820
 
821
  # Encode multiple images
822
+ if images:
823
+ for image in images:
824
+ if image.filename: # Check if image is provided
825
+ image_bytes = await image.read()
826
+ pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
827
+ image_emb = embedding_service.encode_image(pil_image)
828
+ image_embeddings.append(image_emb)
829
+ # Encode multiple images
830
  if images:
831
  for image in images:
832
  if image.filename: # Check if image is provided
 
838
  # Combine embeddings
839
  all_embeddings = []
840
 
841
+ if text_embeddings:
842
+ # Average all text embeddings
843
+ avg_text_embedding = np.mean(text_embeddings, axis=0)
844
+ all_embeddings.append(avg_text_embedding)
845
+
846
+ if image_embeddings:
847
+ # Average all image embeddings
848
+ avg_image_embedding = np.mean(image_embeddings, axis=0)
849
+ all_embeddings.append(avg_image_embedding)
850
+
851
+ if not all_embeddings:
852
+ raise HTTPException(status_code=400, detail="Không có embedding nào được tạo từ texts hoặc images")
853
+
854
+ # Final combined embedding
855
+ combined_embedding = np.mean(all_embeddings, axis=0)
856
+ all_embeddings = []
857
+
858
  if text_embeddings:
859
  # Average all text embeddings
860
  avg_text_embedding = np.mean(text_embeddings, axis=0)
 
882
  "image_filenames": [img.filename for img in images] if images else [],
883
  "id_use": id_use if id_use else None, # ID của SocialMedia hoặc EventCode
884
  "id_user": id_user if id_user else None # ID của User
885
+ "texts": texts if texts else [],
886
+ "text_count": len(texts) if texts else 0,
887
+ "image_count": len(images) if images else 0,
888
+ "image_filenames": [img.filename for img in images] if images else [],
889
+ "id_use": id_use if id_use else None, # ID của SocialMedia hoặc EventCode
890
+ "id_user": id_user if id_user else None # ID của User
891
  }
892
 
893
  result = qdrant_service.index_data(
 
900
  success=True,
901
  id=result["original_id"], # Trả về MongoDB ObjectId
902
  message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
903
+ message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
904
  )
905
 
906
+ except HTTPException:
907
+ raise
908
  except HTTPException:
909
  raise
910
  except Exception as e:
 
1130
  async def chat(request: ChatRequest):
1131
  """
1132
  Chat endpoint với Advanced RAG
1133
+ Chat endpoint với Advanced RAG
1134
 
1135
  Body:
1136
  - message: User message
 
1145
  - use_reranking: Enable reranking (default: true)
1146
  - use_compression: Enable context compression (default: true)
1147
  - score_threshold: Minimum relevance score (default: 0.5)
1148
+ - use_advanced_rag: Use advanced RAG pipeline (default: true)
1149
+ - use_query_expansion: Enable query expansion (default: true)
1150
+ - use_reranking: Enable reranking (default: true)
1151
+ - use_compression: Enable context compression (default: true)
1152
+ - score_threshold: Minimum relevance score (default: 0.5)
1153
 
1154
  Returns:
1155
  - response: Generated response
1156
  - context_used: Retrieved context documents
1157
  - timestamp: Response timestamp
1158
  - rag_stats: Statistics from RAG pipeline
1159
+ - rag_stats: Statistics from RAG pipeline
1160
  """
1161
  try:
1162
+ # ============================================
1163
+ # CAG Layer: Check Semantic Cache First
1164
+ # ============================================
1165
+ cache_hit = None
1166
+ if cag_service and request.use_rag:
1167
+ cache_hit = cag_service.check_cache(request.message)
1168
+
1169
+ if cache_hit:
1170
+ # Cache hit! Return cached response immediately
1171
+ return ChatResponse(
1172
+ response=cache_hit["response"],
1173
+ context_used=cache_hit["context_used"],
1174
+ timestamp=datetime.utcnow().isoformat(),
1175
+ rag_stats={
1176
+ **cache_hit.get("rag_stats", {}),
1177
+ "cache_hit": True,
1178
+ "cached_query": cache_hit["cached_query"],
1179
+ "similarity_score": cache_hit["similarity_score"],
1180
+ "cached_at": cache_hit["cached_at"]
1181
+ }
1182
+ )
1183
+
1184
+ # ============================================
1185
+ # RAG Pipeline (if cache miss)
1186
+ # ============================================
1187
  # Retrieve context if RAG enabled
1188
  context_used = []
1189
  rag_stats = None
1190
 
1191
+ rag_stats = None
1192
+
1193
  if request.use_rag:
1194
  if request.use_advanced_rag:
1195
+ # Initialize LLM client for query expansion
1196
+ hf_client = None
1197
+ if request.hf_token or hf_token:
1198
+ hf_client = InferenceClient(token=request.hf_token or hf_token)
1199
+
1200
+ # Use Advanced RAG Pipeline (Best Case 2025)
1201
  documents, stats = advanced_rag.hybrid_rag_pipeline(
1202
  query=request.message,
1203
  top_k=request.top_k,
1204
  score_threshold=request.score_threshold,
1205
  use_reranking=request.use_reranking,
1206
  use_compression=request.use_compression,
1207
+ use_query_expansion=request.use_query_expansion,
1208
+ max_context_tokens=500,
1209
+ hf_client=hf_client
1210
  )
1211
 
1212
  # Convert to dict format for compatibility
 
1240
  doc_text = doc["metadata"].get("text", "")
1241
  confidence = doc["confidence"]
1242
  context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
1243
+ # Build context text (basic format)
1244
+ context_text = "\n\nRelevant Context:\n"
1245
+ for i, doc in enumerate(context_used, 1):
1246
+ doc_text = doc["metadata"].get("text", "")
1247
+ confidence = doc["confidence"]
1248
+ context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
1249
 
1250
  # Build system message with context
1251
+ if request.use_rag and context_used:
1252
+ if request.use_advanced_rag:
1253
+ # Use advanced prompt builder
1254
+ system_message = advanced_rag.build_rag_prompt(
1255
+ query=request.message,
1256
+ context=context_text,
1257
+ system_message=request.system_message
1258
+ )
1259
+ else:
1260
+ # Basic prompt
1261
+ system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
1262
+ # Build system message with context
1263
  if request.use_rag and context_used:
1264
  if request.use_advanced_rag:
1265
  # Use advanced prompt builder
 
1330
  "timestamp": datetime.utcnow()
1331
  }
1332
  chat_history_collection.insert_one(chat_data)
1333
+
1334
+ # ============================================
1335
+ # CAG: Save to Cache (if RAG was used)
1336
+ # ============================================
1337
+ if cag_service and request.use_rag and context_used and response:
1338
+ try:
1339
+ cag_service.save_to_cache(
1340
+ query=request.message,
1341
+ response=response,
1342
+ context_used=context_used,
1343
+ rag_stats=rag_stats
1344
+ )
1345
+ except Exception as cache_error:
1346
+ print(f"Warning: Failed to save to cache: {cache_error}")
1347
 
1348
  return ChatResponse(
1349
  response=response,
1350
  context_used=context_used,
1351
  timestamp=datetime.utcnow().isoformat(),
1352
  rag_stats=rag_stats
1353
+ timestamp=datetime.utcnow().isoformat(),
1354
+ rag_stats=rag_stats
1355
  )
1356
 
1357
  except Exception as e:
 
1750
  raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
1751
 
1752
 
1753
+ @app.post("/upload-pdf", response_model=UploadPDFResponse)
1754
+ async def upload_pdf(
1755
+ file: UploadFile = File(...),
1756
+ document_id: Optional[str] = Form(None),
1757
+ title: Optional[str] = Form(None),
1758
+ description: Optional[str] = Form(None),
1759
+ category: Optional[str] = Form(None)
1760
+ ):
1761
+ """
1762
+ Upload and index PDF file into knowledge base
1763
+
1764
+ Body (multipart/form-data):
1765
+ - file: PDF file (required)
1766
+ - document_id: Custom document ID (optional, auto-generated if not provided)
1767
+ - title: Document title (optional)
1768
+ - description: Document description (optional)
1769
+ - category: Document category (optional, e.g., "user_guide", "faq")
1770
+
1771
+ Returns:
1772
+ - success: True/False
1773
+ - document_id: Document ID
1774
+ - filename: Original filename
1775
+ - chunks_indexed: Number of chunks created
1776
+ - message: Status message
1777
+
1778
+ Example:
1779
+ ```bash
1780
+ curl -X POST "http://localhost:8000/upload-pdf" \
1781
+ -F "file=@user_guide.pdf" \
1782
+ -F "title=Hướng dẫn sử dụng ChatbotRAG" \
1783
+ -F "category=user_guide"
1784
+ ```
1785
+ """
1786
+ try:
1787
+ # Validate file type
1788
+ if not file.filename.endswith('.pdf'):
1789
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed")
1790
+
1791
+ # Generate document ID if not provided
1792
+ if not document_id:
1793
+ from datetime import datetime
1794
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1795
+ document_id = f"pdf_{timestamp}"
1796
+
1797
+ # Read PDF bytes
1798
+ pdf_bytes = await file.read()
1799
+
1800
+ # Prepare metadata
1801
+ metadata = {}
1802
+ if title:
1803
+ metadata['title'] = title
1804
+ if description:
1805
+ metadata['description'] = description
1806
+ if category:
1807
+ metadata['category'] = category
1808
+
1809
+ # Index PDF
1810
+ result = pdf_indexer.index_pdf_bytes(
1811
+ pdf_bytes=pdf_bytes,
1812
+ document_id=document_id,
1813
+ filename=file.filename,
1814
+ document_metadata=metadata
1815
+ )
1816
+
1817
+ return UploadPDFResponse(
1818
+ success=True,
1819
+ document_id=result['document_id'],
1820
+ filename=result['filename'],
1821
+ chunks_indexed=result['chunks_indexed'],
1822
+ message=f"PDF '{file.filename}' đã được index thành công với {result['chunks_indexed']} chunks"
1823
+ )
1824
+
1825
+ except HTTPException:
1826
+ raise
1827
+ except Exception as e:
1828
+ raise HTTPException(status_code=500, detail=f"Error uploading PDF: {str(e)}")
1829
+
1830
+
1831
+ @app.get("/documents/pdf")
1832
+ async def list_pdf_documents():
1833
+ """
1834
+ List all PDF documents in knowledge base
1835
+
1836
+ Returns:
1837
+ - documents: List of PDF documents with metadata
1838
+ """
1839
+ try:
1840
+ docs = list(documents_collection.find(
1841
+ {"type": "pdf"},
1842
+ {"_id": 0}
1843
+ ))
1844
+ return {"documents": docs, "total": len(docs)}
1845
+ except Exception as e:
1846
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1847
+
1848
+
1849
+ @app.delete("/documents/pdf/{document_id}")
1850
+ async def delete_pdf_document(document_id: str):
1851
+ """
1852
+ Delete PDF document and all its chunks from knowledge base
1853
+
1854
+ Args:
1855
+ - document_id: Document ID
1856
+
1857
+ Returns:
1858
+ - success: True/False
1859
+ - message: Status message
1860
+ """
1861
+ try:
1862
+ # Get document info
1863
+ doc = documents_collection.find_one({"document_id": document_id, "type": "pdf"})
1864
+
1865
+ if not doc:
1866
+ raise HTTPException(status_code=404, detail=f"PDF document {document_id} not found")
1867
+
1868
+ # Delete all chunks from Qdrant
1869
+ chunk_ids = doc.get('chunk_ids', [])
1870
+ for chunk_id in chunk_ids:
1871
+ try:
1872
+ qdrant_service.delete_by_id(chunk_id)
1873
+ except:
1874
+ pass # Chunk might already be deleted
1875
+
1876
+ # Delete from MongoDB
1877
+ documents_collection.delete_one({"document_id": document_id})
1878
+
1879
+ return {
1880
+ "success": True,
1881
+ "message": f"PDF document {document_id} and {len(chunk_ids)} chunks deleted"
1882
+ }
1883
+
1884
+ except HTTPException:
1885
+ raise
1886
+ except Exception as e:
1887
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1888
+
1889
+
1890
+ @app.post("/upload-pdf-multimodal", response_model=UploadPDFResponse)
1891
+ async def upload_pdf_multimodal(
1892
+ file: UploadFile = File(...),
1893
+ document_id: Optional[str] = Form(None),
1894
+ title: Optional[str] = Form(None),
1895
+ description: Optional[str] = Form(None),
1896
+ category: Optional[str] = Form(None)
1897
+ ):
1898
+ """
1899
+ Upload PDF with text and image URLs (for user guides with screenshots)
1900
+
1901
+ This endpoint is optimized for PDFs containing:
1902
+ - Text instructions
1903
+ - Image URLs (http://... or https://...)
1904
+ - Markdown images: ![alt](url)
1905
+ - HTML images: <img src="url">
1906
+
1907
+ The system will:
1908
+ 1. Extract text from PDF
1909
+ 2. Detect all image URLs in the text
1910
+ 3. Link images to their corresponding text chunks
1911
+ 4. Store image URLs in metadata
1912
+ 5. Return images along with text during chat
1913
+
1914
+ Body (multipart/form-data):
1915
+ - file: PDF file (required)
1916
+ - document_id: Custom document ID (optional, auto-generated if not provided)
1917
+ - title: Document title (optional)
1918
+ - description: Document description (optional)
1919
+ - category: Document category (optional, e.g., "user_guide", "tutorial")
1920
+
1921
+ Returns:
1922
+ - success: True/False
1923
+ - document_id: Document ID
1924
+ - filename: Original filename
1925
+ - chunks_indexed: Number of chunks created
1926
+ - message: Status message (includes image count)
1927
+
1928
+ Example:
1929
+ ```bash
1930
+ curl -X POST "http://localhost:8000/upload-pdf-multimodal" \
1931
+ -F "file=@user_guide_with_images.pdf" \
1932
+ -F "title=Hướng dẫn có ảnh minh họa" \
1933
+ -F "category=user_guide"
1934
+ ```
1935
+
1936
+ Example Response:
1937
+ ```json
1938
+ {
1939
+ "success": true,
1940
+ "document_id": "pdf_20251029_150000",
1941
+ "filename": "user_guide_with_images.pdf",
1942
+ "chunks_indexed": 25,
1943
+ "message": "PDF 'user_guide_with_images.pdf' indexed with 25 chunks and 15 images"
1944
+ }
1945
+ ```
1946
+ """
1947
+ try:
1948
+ # Validate file type
1949
+ if not file.filename.endswith('.pdf'):
1950
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed")
1951
+
1952
+ # Generate document ID if not provided
1953
+ if not document_id:
1954
+ from datetime import datetime
1955
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1956
+ document_id = f"pdf_multimodal_{timestamp}"
1957
+
1958
+ # Read PDF bytes
1959
+ pdf_bytes = await file.read()
1960
+
1961
+ # Prepare metadata
1962
+ metadata = {'type': 'multimodal'}
1963
+ if title:
1964
+ metadata['title'] = title
1965
+ if description:
1966
+ metadata['description'] = description
1967
+ if category:
1968
+ metadata['category'] = category
1969
+
1970
+ # Index PDF with multimodal parser
1971
+ result = multimodal_pdf_indexer.index_pdf_bytes(
1972
+ pdf_bytes=pdf_bytes,
1973
+ document_id=document_id,
1974
+ filename=file.filename,
1975
+ document_metadata=metadata
1976
+ )
1977
+
1978
+ return UploadPDFResponse(
1979
+ success=True,
1980
+ document_id=result['document_id'],
1981
+ filename=result['filename'],
1982
+ chunks_indexed=result['chunks_indexed'],
1983
+ message=f"PDF '{file.filename}' indexed successfully with {result['chunks_indexed']} chunks and {result.get('images_found', 0)} images"
1984
+ )
1985
+
1986
+ except HTTPException:
1987
+ raise
1988
+ except Exception as e:
1989
+ raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
1990
+
1991
+
1992
  if __name__ == "__main__":
1993
  import uvicorn
1994
  uvicorn.run(
requirements.txt CHANGED
@@ -14,6 +14,9 @@ torchvision>=0.15.0
14
  pillow>=10.0.0
15
  numpy>=1.24.0
16
 
 
 
 
17
  # Vector Database
18
  qdrant-client>=1.12.1
19
  grpcio>=1.60.0
@@ -31,4 +34,3 @@ einops
31
  # PDF Processing
32
  pypdfium2>=4.30.0
33
 
34
-
 
14
  pillow>=10.0.0
15
  numpy>=1.24.0
16
 
17
+ # RAG & Reranking (Best Case 2025)
18
+ sentence-transformers>=2.0.0
19
+
20
  # Vector Database
21
  qdrant-client>=1.12.1
22
  grpcio>=1.60.0
 
34
  # PDF Processing
35
  pypdfium2>=4.30.0
36