Spaces:
Sleeping
Sleeping
feat: use LangChain StrOutputParser for clean answer output; optimize Wikipedia search query with AI
Browse files- agent.py +19 -27
- utils/wiki_tool.py +39 -14
agent.py
CHANGED
|
@@ -84,6 +84,7 @@ class AIBrain:
|
|
| 84 |
prompt = f"""<instruction>
|
| 85 |
Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
|
| 86 |
|
|
|
|
| 87 |
- If the question is about an event in the past or future (e.g., "when was", "in what year", "has ever", "will happen", "history", "prediction"), choose "wiki".
|
| 88 |
- If the question asks about a specific topic, person, place, object, or event (e.g., "who is", "what is", "where is", "when is", "why", "how"), choose "wiki".
|
| 89 |
- If the data source is unclear or you are not sure, prefer "wiki".
|
|
@@ -143,42 +144,33 @@ Return this exact JSON format:
|
|
| 143 |
|
| 144 |
def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
|
| 145 |
"""Generate final answer using Qwen3 with context"""
|
| 146 |
-
|
| 147 |
if tool_results and tool_results.get("tool_results"):
|
| 148 |
context = build_context_summary(
|
| 149 |
tool_results.get("tool_results", []),
|
| 150 |
tool_results.get("cached_data", {})
|
| 151 |
)
|
| 152 |
else:
|
| 153 |
-
context = "
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
Generate a comprehensive answer to the user's question using the provided context.
|
| 157 |
-
</instruction>
|
| 158 |
-
|
| 159 |
-
<question>{question}</question>
|
| 160 |
|
| 161 |
-
|
| 162 |
-
{context}
|
| 163 |
-
</context>
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
- Use context information when relevant
|
| 168 |
-
- Be concise but complete
|
| 169 |
-
- No thinking process in output
|
| 170 |
-
- Professional tone
|
| 171 |
-
</output_rules>
|
| 172 |
-
|
| 173 |
-
Answer:"""
|
| 174 |
-
|
| 175 |
response = self._generate_with_qwen3(prompt, 2048)
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
# Initialize AI Brain globally
|
| 184 |
ai_brain = AIBrain()
|
|
|
|
| 84 |
prompt = f"""<instruction>
|
| 85 |
Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
|
| 86 |
|
| 87 |
+
- If the question is about a historical event, a specific person, place, object, or something that requires searching the internet (e.g., Wikipedia), you MUST choose "wiki".
|
| 88 |
- If the question is about an event in the past or future (e.g., "when was", "in what year", "has ever", "will happen", "history", "prediction"), choose "wiki".
|
| 89 |
- If the question asks about a specific topic, person, place, object, or event (e.g., "who is", "what is", "where is", "when is", "why", "how"), choose "wiki".
|
| 90 |
- If the data source is unclear or you are not sure, prefer "wiki".
|
|
|
|
| 144 |
|
| 145 |
def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
|
| 146 |
"""Generate final answer using Qwen3 with context"""
|
|
|
|
| 147 |
if tool_results and tool_results.get("tool_results"):
|
| 148 |
context = build_context_summary(
|
| 149 |
tool_results.get("tool_results", []),
|
| 150 |
tool_results.get("cached_data", {})
|
| 151 |
)
|
| 152 |
else:
|
| 153 |
+
context = ""
|
| 154 |
+
prompt = f"""
|
| 155 |
+
Answer the following question with only the answer. Do not explain, do not add any extra text, do not repeat the question, do not add punctuation or any prefix/suffix. Just output the answer as short and direct as possible. If the answer is not available, reply with 'No data'.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
Context (if any): {context}
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
Question: {question}
|
| 160 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
response = self._generate_with_qwen3(prompt, 2048)
|
| 162 |
+
# Dùng LangChain StrOutputParser để lấy phần text cuối cùng
|
| 163 |
+
parser = StrOutputParser()
|
| 164 |
+
answer = parser.parse(response)
|
| 165 |
+
answer = answer.strip()
|
| 166 |
+
# Remove common prefixes
|
| 167 |
+
for prefix in ["Answer:", "The answer is", "FINAL ANSWER:", "Final answer:", "final answer:"]:
|
| 168 |
+
if answer.lower().startswith(prefix.lower()):
|
| 169 |
+
answer = answer[len(prefix):].strip()
|
| 170 |
+
# Remove trailing period if only one word/number
|
| 171 |
+
if answer.endswith(".") and answer.count(" ") < 2:
|
| 172 |
+
answer = answer[:-1].strip()
|
| 173 |
+
return answer
|
| 174 |
|
| 175 |
# Initialize AI Brain globally
|
| 176 |
ai_brain = AIBrain()
|
utils/wiki_tool.py
CHANGED
|
@@ -4,6 +4,15 @@ Wikipedia Tool - Tìm kiếm thông tin trên Wikipedia
|
|
| 4 |
|
| 5 |
import wikipedia
|
| 6 |
from typing import Dict, Any, Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
|
| 9 |
"""
|
|
@@ -109,27 +118,43 @@ def extract_search_query_from_question(question: str) -> str:
|
|
| 109 |
# Fallback: lấy toàn bộ câu hỏi
|
| 110 |
return question.replace("?", "").strip()
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
|
| 113 |
"""
|
| 114 |
-
Tìm kiếm Wikipedia từ câu hỏi, tự động
|
| 115 |
-
|
| 116 |
-
Args:
|
| 117 |
-
question: Câu hỏi cần tìm kiếm
|
| 118 |
-
lang: Ngôn ngữ Wikipedia
|
| 119 |
-
|
| 120 |
-
Returns:
|
| 121 |
-
Dict chứa thông tin Wikipedia
|
| 122 |
"""
|
| 123 |
-
#
|
| 124 |
-
search_query =
|
| 125 |
-
|
| 126 |
-
print(f"🔍 Wikipedia search query extracted: '{search_query}' from question: '{question[:50]}...'")
|
| 127 |
-
|
| 128 |
# Tìm kiếm
|
| 129 |
result = search_wikipedia(search_query, lang)
|
| 130 |
result["original_question"] = question
|
| 131 |
result["extracted_query"] = search_query
|
| 132 |
-
|
| 133 |
return result
|
| 134 |
|
| 135 |
def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]:
|
|
|
|
| 4 |
|
| 5 |
import wikipedia
|
| 6 |
from typing import Dict, Any, Optional
|
| 7 |
+
import os
|
| 8 |
+
from huggingface_hub import InferenceClient
|
| 9 |
+
|
| 10 |
+
# Khởi tạo client AI để optimize query
|
| 11 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
+
if HF_TOKEN:
|
| 13 |
+
ai_client = InferenceClient(provider="auto", api_key=HF_TOKEN)
|
| 14 |
+
else:
|
| 15 |
+
ai_client = None
|
| 16 |
|
| 17 |
def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
|
| 18 |
"""
|
|
|
|
| 118 |
# Fallback: lấy toàn bộ câu hỏi
|
| 119 |
return question.replace("?", "").strip()
|
| 120 |
|
| 121 |
+
def optimize_wiki_query_with_ai(question: str) -> str:
|
| 122 |
+
"""
|
| 123 |
+
Dùng AI để sinh ra key search Wikipedia tốt nhất từ câu hỏi
|
| 124 |
+
"""
|
| 125 |
+
if not ai_client:
|
| 126 |
+
return extract_search_query_from_question(question)
|
| 127 |
+
prompt = f"""
|
| 128 |
+
Given the following question, extract the best possible Wikipedia search query (a short phrase or entity name, not a full sentence). Only output the search query, nothing else.
|
| 129 |
+
|
| 130 |
+
Question: {question}
|
| 131 |
+
"""
|
| 132 |
+
try:
|
| 133 |
+
completion = ai_client.chat.completions.create(
|
| 134 |
+
model="Qwen/Qwen3-8B",
|
| 135 |
+
messages=[{"role": "user", "content": prompt}],
|
| 136 |
+
max_tokens=32
|
| 137 |
+
)
|
| 138 |
+
query = completion.choices[0].message.content.strip()
|
| 139 |
+
# Nếu AI trả về rỗng, fallback
|
| 140 |
+
if not query:
|
| 141 |
+
return extract_search_query_from_question(question)
|
| 142 |
+
return query
|
| 143 |
+
except Exception as e:
|
| 144 |
+
print(f"[WikiTool] AI optimize query failed: {e}")
|
| 145 |
+
return extract_search_query_from_question(question)
|
| 146 |
+
|
| 147 |
def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
|
| 148 |
"""
|
| 149 |
+
Tìm kiếm Wikipedia từ câu hỏi, tự động optimize query bằng AI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
"""
|
| 151 |
+
# Optimize query bằng AI
|
| 152 |
+
search_query = optimize_wiki_query_with_ai(question)
|
| 153 |
+
print(f"🔍 Wikipedia search query (AI optimized): '{search_query}' from question: '{question[:50]}...'")
|
|
|
|
|
|
|
| 154 |
# Tìm kiếm
|
| 155 |
result = search_wikipedia(search_query, lang)
|
| 156 |
result["original_question"] = question
|
| 157 |
result["extracted_query"] = search_query
|
|
|
|
| 158 |
return result
|
| 159 |
|
| 160 |
def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]:
|