tuan3335 commited on
Commit
c282f35
·
1 Parent(s): 6bef95f

feat: use LangChain StrOutputParser for clean answer output; optimize Wikipedia search query with AI

Browse files
Files changed (2) hide show
  1. agent.py +19 -27
  2. utils/wiki_tool.py +39 -14
agent.py CHANGED
@@ -84,6 +84,7 @@ class AIBrain:
84
  prompt = f"""<instruction>
85
  Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
86
 
 
87
  - If the question is about an event in the past or future (e.g., "when was", "in what year", "has ever", "will happen", "history", "prediction"), choose "wiki".
88
  - If the question asks about a specific topic, person, place, object, or event (e.g., "who is", "what is", "where is", "when is", "why", "how"), choose "wiki".
89
  - If the data source is unclear or you are not sure, prefer "wiki".
@@ -143,42 +144,33 @@ Return this exact JSON format:
143
 
144
  def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
145
  """Generate final answer using Qwen3 with context"""
146
-
147
  if tool_results and tool_results.get("tool_results"):
148
  context = build_context_summary(
149
  tool_results.get("tool_results", []),
150
  tool_results.get("cached_data", {})
151
  )
152
  else:
153
- context = "No additional context available"
154
-
155
- prompt = f"""<instruction>
156
- Generate a comprehensive answer to the user's question using the provided context.
157
- </instruction>
158
-
159
- <question>{question}</question>
160
 
161
- <context>
162
- {context}
163
- </context>
164
 
165
- <output_rules>
166
- - Provide direct, accurate answers
167
- - Use context information when relevant
168
- - Be concise but complete
169
- - No thinking process in output
170
- - Professional tone
171
- </output_rules>
172
-
173
- Answer:"""
174
-
175
  response = self._generate_with_qwen3(prompt, 2048)
176
-
177
- # Clean up response
178
- if "Answer:" in response:
179
- response = response.split("Answer:")[-1].strip()
180
-
181
- return response
 
 
 
 
 
 
182
 
183
  # Initialize AI Brain globally
184
  ai_brain = AIBrain()
 
84
  prompt = f"""<instruction>
85
  Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
86
 
87
+ - If the question is about a historical event, a specific person, place, object, or something that requires searching the internet (e.g., Wikipedia), you MUST choose "wiki".
88
  - If the question is about an event in the past or future (e.g., "when was", "in what year", "has ever", "will happen", "history", "prediction"), choose "wiki".
89
  - If the question asks about a specific topic, person, place, object, or event (e.g., "who is", "what is", "where is", "when is", "why", "how"), choose "wiki".
90
  - If the data source is unclear or you are not sure, prefer "wiki".
 
144
 
145
  def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
146
  """Generate final answer using Qwen3 with context"""
 
147
  if tool_results and tool_results.get("tool_results"):
148
  context = build_context_summary(
149
  tool_results.get("tool_results", []),
150
  tool_results.get("cached_data", {})
151
  )
152
  else:
153
+ context = ""
154
+ prompt = f"""
155
+ Answer the following question with only the answer. Do not explain, do not add any extra text, do not repeat the question, do not add punctuation or any prefix/suffix. Just output the answer as short and direct as possible. If the answer is not available, reply with 'No data'.
 
 
 
 
156
 
157
+ Context (if any): {context}
 
 
158
 
159
+ Question: {question}
160
+ """
 
 
 
 
 
 
 
 
161
  response = self._generate_with_qwen3(prompt, 2048)
162
+ # Dùng LangChain StrOutputParser để lấy phần text cuối cùng
163
+ parser = StrOutputParser()
164
+ answer = parser.parse(response)
165
+ answer = answer.strip()
166
+ # Remove common prefixes
167
+ for prefix in ["Answer:", "The answer is", "FINAL ANSWER:", "Final answer:", "final answer:"]:
168
+ if answer.lower().startswith(prefix.lower()):
169
+ answer = answer[len(prefix):].strip()
170
+ # Remove trailing period if only one word/number
171
+ if answer.endswith(".") and answer.count(" ") < 2:
172
+ answer = answer[:-1].strip()
173
+ return answer
174
 
175
  # Initialize AI Brain globally
176
  ai_brain = AIBrain()
utils/wiki_tool.py CHANGED
@@ -4,6 +4,15 @@ Wikipedia Tool - Tìm kiếm thông tin trên Wikipedia
4
 
5
  import wikipedia
6
  from typing import Dict, Any, Optional
 
 
 
 
 
 
 
 
 
7
 
8
  def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
9
  """
@@ -109,27 +118,43 @@ def extract_search_query_from_question(question: str) -> str:
109
  # Fallback: lấy toàn bộ câu hỏi
110
  return question.replace("?", "").strip()
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
113
  """
114
- Tìm kiếm Wikipedia từ câu hỏi, tự động trích xuất query
115
-
116
- Args:
117
- question: Câu hỏi cần tìm kiếm
118
- lang: Ngôn ngữ Wikipedia
119
-
120
- Returns:
121
- Dict chứa thông tin Wikipedia
122
  """
123
- # Trích xuất query từ câu hỏi
124
- search_query = extract_search_query_from_question(question)
125
-
126
- print(f"🔍 Wikipedia search query extracted: '{search_query}' from question: '{question[:50]}...'")
127
-
128
  # Tìm kiếm
129
  result = search_wikipedia(search_query, lang)
130
  result["original_question"] = question
131
  result["extracted_query"] = search_query
132
-
133
  return result
134
 
135
  def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]:
 
4
 
5
  import wikipedia
6
  from typing import Dict, Any, Optional
7
+ import os
8
+ from huggingface_hub import InferenceClient
9
+
10
+ # Khởi tạo client AI để optimize query
11
+ HF_TOKEN = os.environ.get("HF_TOKEN")
12
+ if HF_TOKEN:
13
+ ai_client = InferenceClient(provider="auto", api_key=HF_TOKEN)
14
+ else:
15
+ ai_client = None
16
 
17
  def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
18
  """
 
118
  # Fallback: lấy toàn bộ câu hỏi
119
  return question.replace("?", "").strip()
120
 
121
+ def optimize_wiki_query_with_ai(question: str) -> str:
122
+ """
123
+ Dùng AI để sinh ra key search Wikipedia tốt nhất từ câu hỏi
124
+ """
125
+ if not ai_client:
126
+ return extract_search_query_from_question(question)
127
+ prompt = f"""
128
+ Given the following question, extract the best possible Wikipedia search query (a short phrase or entity name, not a full sentence). Only output the search query, nothing else.
129
+
130
+ Question: {question}
131
+ """
132
+ try:
133
+ completion = ai_client.chat.completions.create(
134
+ model="Qwen/Qwen3-8B",
135
+ messages=[{"role": "user", "content": prompt}],
136
+ max_tokens=32
137
+ )
138
+ query = completion.choices[0].message.content.strip()
139
+ # Nếu AI trả về rỗng, fallback
140
+ if not query:
141
+ return extract_search_query_from_question(question)
142
+ return query
143
+ except Exception as e:
144
+ print(f"[WikiTool] AI optimize query failed: {e}")
145
+ return extract_search_query_from_question(question)
146
+
147
  def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
148
  """
149
+ Tìm kiếm Wikipedia từ câu hỏi, tự động optimize query bằng AI
 
 
 
 
 
 
 
150
  """
151
+ # Optimize query bằng AI
152
+ search_query = optimize_wiki_query_with_ai(question)
153
+ print(f"🔍 Wikipedia search query (AI optimized): '{search_query}' from question: '{question[:50]}...'")
 
 
154
  # Tìm kiếm
155
  result = search_wikipedia(search_query, lang)
156
  result["original_question"] = question
157
  result["extracted_query"] = search_query
 
158
  return result
159
 
160
  def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]: