final_agent_course / utils /wiki_tool.py
tuan3335's picture
fix: append /no_thinking to all Qwen prompts to disable thinking mode via prompt
ef2a762
"""
Wikipedia Tool - Tìm kiếm thông tin trên Wikipedia
"""
import wikipedia
from typing import Dict, Any, Optional
import os
from huggingface_hub import InferenceClient
# Khởi tạo client AI để optimize query
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
ai_client = InferenceClient(provider="auto", api_key=HF_TOKEN)
else:
ai_client = None
def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
"""
Main function: Tìm kiếm thông tin trên Wikipedia
Args:
query: Từ khóa tìm kiếm
lang: Ngôn ngữ Wikipedia (default: "en")
summary_length: Độ dài tóm tắt tối đa
Returns:
Dict chứa thông tin Wikipedia
"""
try:
# Set language
wikipedia.set_lang(lang)
# Tìm kiếm trực tiếp
try:
page = wikipedia.page(query)
return {
"success": True,
"title": page.title,
"summary": page.summary[:summary_length] if page.summary else "No summary available",
"url": page.url,
"categories": page.categories[:5] if hasattr(page, 'categories') else [],
"query_used": query,
"search_method": "direct"
}
except wikipedia.DisambiguationError as e:
# Nếu có nhiều kết quả, lấy kết quả đầu tiên
if e.options:
page = wikipedia.page(e.options[0])
return {
"success": True,
"title": page.title,
"summary": page.summary[:summary_length] if page.summary else "No summary available",
"url": page.url,
"categories": page.categories[:5] if hasattr(page, 'categories') else [],
"query_used": e.options[0],
"search_method": "disambiguation_first",
"other_options": e.options[1:5] # Lấy 4 option khác
}
else:
raise
except wikipedia.PageError:
# Nếu không tìm thấy trang, thử search
search_results = wikipedia.search(query, results=5)
if search_results:
# Thử lấy kết quả đầu tiên
page = wikipedia.page(search_results[0])
return {
"success": True,
"title": page.title,
"summary": page.summary[:summary_length] if page.summary else "No summary available",
"url": page.url,
"categories": page.categories[:5] if hasattr(page, 'categories') else [],
"query_used": search_results[0],
"search_method": "search_first",
"other_results": search_results[1:5]
}
else:
return {
"success": False,
"error": f"No Wikipedia results found for: {query}",
"query_used": query,
"search_method": "search_failed"
}
except Exception as e:
return {
"success": False,
"error": f"Wikipedia search error: {str(e)}",
"query_used": query,
"search_method": "error"
}
def extract_search_query_from_question(question: str) -> str:
"""
Trích xuất từ khóa tìm kiếm từ câu hỏi
"""
# Loại bỏ các từ nghi vấn phổ biến
question_words = ["who", "what", "when", "where", "why", "how", "which", "whose"]
stop_words = ["is", "are", "was", "were", "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by"]
# Chuyển thành lowercase và tách từ
words = question.lower().replace("?", "").split()
# Loại bỏ question words và stop words
filtered_words = [
word for word in words
if word not in question_words and word not in stop_words and len(word) > 2
]
# Ghép lại thành query
if filtered_words:
return " ".join(filtered_words[:4]) # Lấy tối đa 4 từ
else:
# Fallback: lấy toàn bộ câu hỏi
return question.replace("?", "").strip()
def optimize_wiki_query_with_ai(question: str) -> str:
"""
Dùng AI để sinh ra key search Wikipedia tốt nhất từ câu hỏi
"""
if not ai_client:
return extract_search_query_from_question(question)
prompt = f"""
Given the following question, extract the best possible Wikipedia search query (a short phrase or entity name, not a full sentence). Only output the search query, nothing else.
Question: {question}
"""
try:
completion = ai_client.chat.completions.create(
model="Qwen/Qwen3-8B",
messages=[{"role": "user", "content": prompt + "\n/no_thinking"}],
max_tokens=32
)
query = completion.choices[0].message.content.strip()
# Nếu AI trả về rỗng, fallback
if not query:
return extract_search_query_from_question(question)
return query
except Exception as e:
print(f"[WikiTool] AI optimize query failed: {e}")
return extract_search_query_from_question(question)
def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
"""
Tìm kiếm Wikipedia từ câu hỏi, tự động optimize query bằng AI
"""
# Optimize query bằng AI
search_query = optimize_wiki_query_with_ai(question)
print(f"🔍 Wikipedia search query (AI optimized): '{search_query}' from question: '{question[:50]}...'")
# Tìm kiếm
result = search_wikipedia(search_query, lang)
result["original_question"] = question
result["extracted_query"] = search_query
return result
def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]:
"""
Lấy nhiều kết quả Wikipedia
"""
try:
wikipedia.set_lang(lang)
search_results = wikipedia.search(query, results=num_results)
results = []
for result_title in search_results:
try:
page = wikipedia.page(result_title)
results.append({
"title": page.title,
"summary": page.summary[:500] if page.summary else "No summary",
"url": page.url
})
except:
continue
return {
"success": True,
"query": query,
"results": results,
"total_found": len(results)
}
except Exception as e:
return {
"success": False,
"error": f"Error getting multiple results: {str(e)}",
"query": query
}
# Test function
if __name__ == "__main__":
# Test direct search
result1 = search_wikipedia("Mercedes Sosa")
print("Direct search result:", result1["title"] if result1["success"] else result1["error"])
# Test question-based search
result2 = search_wikipedia_from_question("Who was Mercedes Sosa?")
print("Question-based result:", result2["title"] if result2["success"] else result2["error"])
# Test multiple results
result3 = get_multiple_wikipedia_results("Python programming", num_results=2)
print("Multiple results:", len(result3["results"]) if result3["success"] else result3["error"])