Spaces:
Sleeping
Sleeping
File size: 7,698 Bytes
92d2175 c282f35 92d2175 c282f35 ef2a762 c282f35 92d2175 c282f35 92d2175 c282f35 92d2175 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
"""
Wikipedia Tool - Tìm kiếm thông tin trên Wikipedia
"""
import wikipedia
from typing import Dict, Any, Optional
import os
from huggingface_hub import InferenceClient
# Khởi tạo client AI để optimize query
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
ai_client = InferenceClient(provider="auto", api_key=HF_TOKEN)
else:
ai_client = None
def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
"""
Main function: Tìm kiếm thông tin trên Wikipedia
Args:
query: Từ khóa tìm kiếm
lang: Ngôn ngữ Wikipedia (default: "en")
summary_length: Độ dài tóm tắt tối đa
Returns:
Dict chứa thông tin Wikipedia
"""
try:
# Set language
wikipedia.set_lang(lang)
# Tìm kiếm trực tiếp
try:
page = wikipedia.page(query)
return {
"success": True,
"title": page.title,
"summary": page.summary[:summary_length] if page.summary else "No summary available",
"url": page.url,
"categories": page.categories[:5] if hasattr(page, 'categories') else [],
"query_used": query,
"search_method": "direct"
}
except wikipedia.DisambiguationError as e:
# Nếu có nhiều kết quả, lấy kết quả đầu tiên
if e.options:
page = wikipedia.page(e.options[0])
return {
"success": True,
"title": page.title,
"summary": page.summary[:summary_length] if page.summary else "No summary available",
"url": page.url,
"categories": page.categories[:5] if hasattr(page, 'categories') else [],
"query_used": e.options[0],
"search_method": "disambiguation_first",
"other_options": e.options[1:5] # Lấy 4 option khác
}
else:
raise
except wikipedia.PageError:
# Nếu không tìm thấy trang, thử search
search_results = wikipedia.search(query, results=5)
if search_results:
# Thử lấy kết quả đầu tiên
page = wikipedia.page(search_results[0])
return {
"success": True,
"title": page.title,
"summary": page.summary[:summary_length] if page.summary else "No summary available",
"url": page.url,
"categories": page.categories[:5] if hasattr(page, 'categories') else [],
"query_used": search_results[0],
"search_method": "search_first",
"other_results": search_results[1:5]
}
else:
return {
"success": False,
"error": f"No Wikipedia results found for: {query}",
"query_used": query,
"search_method": "search_failed"
}
except Exception as e:
return {
"success": False,
"error": f"Wikipedia search error: {str(e)}",
"query_used": query,
"search_method": "error"
}
def extract_search_query_from_question(question: str) -> str:
"""
Trích xuất từ khóa tìm kiếm từ câu hỏi
"""
# Loại bỏ các từ nghi vấn phổ biến
question_words = ["who", "what", "when", "where", "why", "how", "which", "whose"]
stop_words = ["is", "are", "was", "were", "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by"]
# Chuyển thành lowercase và tách từ
words = question.lower().replace("?", "").split()
# Loại bỏ question words và stop words
filtered_words = [
word for word in words
if word not in question_words and word not in stop_words and len(word) > 2
]
# Ghép lại thành query
if filtered_words:
return " ".join(filtered_words[:4]) # Lấy tối đa 4 từ
else:
# Fallback: lấy toàn bộ câu hỏi
return question.replace("?", "").strip()
def optimize_wiki_query_with_ai(question: str) -> str:
"""
Dùng AI để sinh ra key search Wikipedia tốt nhất từ câu hỏi
"""
if not ai_client:
return extract_search_query_from_question(question)
prompt = f"""
Given the following question, extract the best possible Wikipedia search query (a short phrase or entity name, not a full sentence). Only output the search query, nothing else.
Question: {question}
"""
try:
completion = ai_client.chat.completions.create(
model="Qwen/Qwen3-8B",
messages=[{"role": "user", "content": prompt + "\n/no_thinking"}],
max_tokens=32
)
query = completion.choices[0].message.content.strip()
# Nếu AI trả về rỗng, fallback
if not query:
return extract_search_query_from_question(question)
return query
except Exception as e:
print(f"[WikiTool] AI optimize query failed: {e}")
return extract_search_query_from_question(question)
def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
"""
Tìm kiếm Wikipedia từ câu hỏi, tự động optimize query bằng AI
"""
# Optimize query bằng AI
search_query = optimize_wiki_query_with_ai(question)
print(f"🔍 Wikipedia search query (AI optimized): '{search_query}' from question: '{question[:50]}...'")
# Tìm kiếm
result = search_wikipedia(search_query, lang)
result["original_question"] = question
result["extracted_query"] = search_query
return result
def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]:
"""
Lấy nhiều kết quả Wikipedia
"""
try:
wikipedia.set_lang(lang)
search_results = wikipedia.search(query, results=num_results)
results = []
for result_title in search_results:
try:
page = wikipedia.page(result_title)
results.append({
"title": page.title,
"summary": page.summary[:500] if page.summary else "No summary",
"url": page.url
})
except:
continue
return {
"success": True,
"query": query,
"results": results,
"total_found": len(results)
}
except Exception as e:
return {
"success": False,
"error": f"Error getting multiple results: {str(e)}",
"query": query
}
# Test function
if __name__ == "__main__":
# Test direct search
result1 = search_wikipedia("Mercedes Sosa")
print("Direct search result:", result1["title"] if result1["success"] else result1["error"])
# Test question-based search
result2 = search_wikipedia_from_question("Who was Mercedes Sosa?")
print("Question-based result:", result2["title"] if result2["success"] else result2["error"])
# Test multiple results
result3 = get_multiple_wikipedia_results("Python programming", num_results=2)
print("Multiple results:", len(result3["results"]) if result3["success"] else result3["error"]) |