Spaces:

tuanhqv123
/

final_agent_course

Sleeping

App Files Files Community

tuan3335 commited on Jun 26

Commit

c282f35

1 Parent(s): 6bef95f

feat: use LangChain StrOutputParser for clean answer output; optimize Wikipedia search query with AI

Browse files

Files changed (2) hide show

agent.py +19 -27
utils/wiki_tool.py +39 -14

agent.py CHANGED Viewed

@@ -84,6 +84,7 @@ class AIBrain:
         prompt = f"""<instruction>
 Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
 - If the question is about an event in the past or future (e.g., "when was", "in what year", "has ever", "will happen", "history", "prediction"), choose "wiki".
 - If the question asks about a specific topic, person, place, object, or event (e.g., "who is", "what is", "where is", "when is", "why", "how"), choose "wiki".
 - If the data source is unclear or you are not sure, prefer "wiki".
@@ -143,42 +144,33 @@ Return this exact JSON format:
     def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
         """Generate final answer using Qwen3 with context"""
         if tool_results and tool_results.get("tool_results"):
             context = build_context_summary(
                 tool_results.get("tool_results", []),
                 tool_results.get("cached_data", {})
             )
         else:
-            context = "No additional context available"
-        prompt = f"""<instruction>
-Generate a comprehensive answer to the user's question using the provided context.
-</instruction>
-<question>{question}</question>
-<context>
-{context}
-</context>
-<output_rules>
-- Provide direct, accurate answers
-- Use context information when relevant
-- Be concise but complete
-- No thinking process in output
-- Professional tone
-</output_rules>
-Answer:"""
         response = self._generate_with_qwen3(prompt, 2048)
-        # Clean up response
-        if "Answer:" in response:
-            response = response.split("Answer:")[-1].strip()
-        return response
 # Initialize AI Brain globally
 ai_brain = AIBrain()

         prompt = f"""<instruction>
 Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
+- If the question is about a historical event, a specific person, place, object, or something that requires searching the internet (e.g., Wikipedia), you MUST choose "wiki".
 - If the question is about an event in the past or future (e.g., "when was", "in what year", "has ever", "will happen", "history", "prediction"), choose "wiki".
 - If the question asks about a specific topic, person, place, object, or event (e.g., "who is", "what is", "where is", "when is", "why", "how"), choose "wiki".
 - If the data source is unclear or you are not sure, prefer "wiki".
     def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
         """Generate final answer using Qwen3 with context"""
         if tool_results and tool_results.get("tool_results"):
             context = build_context_summary(
                 tool_results.get("tool_results", []),
                 tool_results.get("cached_data", {})
             )
         else:
+            context = ""
+        prompt = f"""
+Answer the following question with only the answer. Do not explain, do not add any extra text, do not repeat the question, do not add punctuation or any prefix/suffix. Just output the answer as short and direct as possible. If the answer is not available, reply with 'No data'.
+Context (if any): {context}
+Question: {question}
+"""
         response = self._generate_with_qwen3(prompt, 2048)
+        # Dùng LangChain StrOutputParser để lấy phần text cuối cùng
+        parser = StrOutputParser()
+        answer = parser.parse(response)
+        answer = answer.strip()
+        # Remove common prefixes
+        for prefix in ["Answer:", "The answer is", "FINAL ANSWER:", "Final answer:", "final answer:"]:
+            if answer.lower().startswith(prefix.lower()):
+                answer = answer[len(prefix):].strip()
+        # Remove trailing period if only one word/number
+        if answer.endswith(".") and answer.count(" ") < 2:
+            answer = answer[:-1].strip()
+        return answer
 # Initialize AI Brain globally
 ai_brain = AIBrain()

utils/wiki_tool.py CHANGED Viewed

@@ -4,6 +4,15 @@ Wikipedia Tool - Tìm kiếm thông tin trên Wikipedia
 import wikipedia
 from typing import Dict, Any, Optional
 def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
     """
@@ -109,27 +118,43 @@ def extract_search_query_from_question(question: str) -> str:
         # Fallback: lấy toàn bộ câu hỏi
         return question.replace("?", "").strip()
 def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
     """
-    Tìm kiếm Wikipedia từ câu hỏi, tự động trích xuất query
-    Args:
-        question: Câu hỏi cần tìm kiếm
-        lang: Ngôn ngữ Wikipedia
-    Returns:
-        Dict chứa thông tin Wikipedia
     """
-    # Trích xuất query từ câu hỏi
-    search_query = extract_search_query_from_question(question)
-    print(f"🔍 Wikipedia search query extracted: '{search_query}' from question: '{question[:50]}...'")
     # Tìm kiếm
     result = search_wikipedia(search_query, lang)
     result["original_question"] = question
     result["extracted_query"] = search_query
     return result
 def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]:

 import wikipedia
 from typing import Dict, Any, Optional
+import os
+from huggingface_hub import InferenceClient
+# Khởi tạo client AI để optimize query
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if HF_TOKEN:
+    ai_client = InferenceClient(provider="auto", api_key=HF_TOKEN)
+else:
+    ai_client = None
 def search_wikipedia(query: str, lang: str = "en", summary_length: int = 2000) -> Dict[str, Any]:
     """
         # Fallback: lấy toàn bộ câu hỏi
         return question.replace("?", "").strip()
+def optimize_wiki_query_with_ai(question: str) -> str:
+    """
+    Dùng AI để sinh ra key search Wikipedia tốt nhất từ câu hỏi
+    """
+    if not ai_client:
+        return extract_search_query_from_question(question)
+    prompt = f"""
+Given the following question, extract the best possible Wikipedia search query (a short phrase or entity name, not a full sentence). Only output the search query, nothing else.
+Question: {question}
+"""
+    try:
+        completion = ai_client.chat.completions.create(
+            model="Qwen/Qwen3-8B",
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=32
+        )
+        query = completion.choices[0].message.content.strip()
+        # Nếu AI trả về rỗng, fallback
+        if not query:
+            return extract_search_query_from_question(question)
+        return query
+    except Exception as e:
+        print(f"[WikiTool] AI optimize query failed: {e}")
+        return extract_search_query_from_question(question)
 def search_wikipedia_from_question(question: str, lang: str = "en") -> Dict[str, Any]:
     """
+    Tìm kiếm Wikipedia từ câu hỏi, tự động optimize query bằng AI
     """
+    # Optimize query bằng AI
+    search_query = optimize_wiki_query_with_ai(question)
+    print(f"🔍 Wikipedia search query (AI optimized): '{search_query}' from question: '{question[:50]}...'")
     # Tìm kiếm
     result = search_wikipedia(search_query, lang)
     result["original_question"] = question
     result["extracted_query"] = search_query
     return result
 def get_multiple_wikipedia_results(query: str, lang: str = "en", num_results: int = 3) -> Dict[str, Any]: