Spaces:

tuanhqv123
/

final_agent_course

Sleeping

App Files Files Community

tuan3335 commited on Jun 26

Commit

a9b5cb5

1 Parent(s): 1cf80b8

Add structured output with Pydantic, fix tool selection logic, add YouTube cookies support, disable thinking mode

Browse files

Files changed (6) hide show

__pycache__/app.cpython-312.pyc +0 -0
agent.py +92 -71
cookies.txt +0 -0
requirements.txt +3 -0
utils/tool_orchestrator.py +12 -13
utils/youtube_tool.py +12 -1

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (6.18 kB). View file

agent.py CHANGED Viewed

@@ -30,6 +30,9 @@ from huggingface_hub import InferenceClient
 # Groq imports for fallback
 from groq import Groq
 # Utils system imports
 from utils import (
     process_question_with_tools,
@@ -66,6 +69,25 @@ class AgentState(TypedDict):
     final_answer: str
     processing_complete: bool
 # =============================================================================
 # AI BRAIN WITH LANGCHAIN
 # =============================================================================
@@ -93,6 +115,30 @@ class LangChainQwen3Brain:
         print("🧠 LangChain Hybrid Brain initialized (HF + Groq fallback)")
     def _invoke_model(self, messages: List[Dict[str, str]]) -> str:
         """Invoke model with messages - try HF first, fallback to Groq"""
@@ -120,63 +166,48 @@ class LangChainQwen3Brain:
                 return completion.choices[0].message.content
             except Exception as groq_error:
                 return f"AI Error: Both HF ({str(hf_error)[:50]}) and Groq ({str(groq_error)[:50]}) failed"
     def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
-        """AI analyzes question and decides approach"""
-        system_prompt = get_system_prompt("main_agent")
-        analysis_prompt = f"""
 Analyze this question and decide the approach:
 Question: "{question}"
 Task ID: "{task_id}"
-Provide your analysis in JSON format:
-{{
-    "question_type": "youtube|image|audio|wiki|file|text|math",
-    "needs_tools": true/false,
-    "reasoning": "your reasoning",
-    "confidence": "high|medium|low",
-    "can_answer_directly": true/false,
-    "suggested_approach": "brief description"
-}}
-Important:
-- If task_id is provided, likely has file attachment
-- Look for URLs, especially YouTube
-- Consider if question seems reversed/malformed
-- Be intelligent about what tools are actually needed
 """
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": analysis_prompt}
-        ]
-        response = self._invoke_model(messages)
-        # Try to parse JSON
-        try:
-            # Extract JSON from response
-            import re
-            json_match = re.search(r'\{.*\}', response, re.DOTALL)
-            if json_match:
-                analysis = json.loads(json_match.group())
-                return analysis
-            else:
-                raise ValueError("No JSON found")
-        except:
-            # Fallback analysis
-            question_type = analyze_question_type(question)
-            return {
-                "question_type": question_type,
-                "needs_tools": bool(task_id) or question_type != "text",
-                "reasoning": "JSON parsing failed, using fallback analysis",
-                "confidence": "medium",
-                "can_answer_directly": question_type == "text" and not task_id,
-                "suggested_approach": f"Use {question_type} processing"
-            }
     def generate_final_answer(self, question: str, tool_results: Dict[str, Any], context: str = "") -> str:
         """Generate final answer using LangChain"""
@@ -194,7 +225,7 @@ Important:
             "final_answer",
             question=question,
             context_summary=context_summary
-        )
         messages = [
             {"role": "system", "content": get_system_prompt("reasoning_agent")},
@@ -204,9 +235,13 @@ Important:
         return self._invoke_model(messages)
     def decide_on_reversed_text(self, original: str, reversed: str) -> Dict[str, Any]:
-        """AI decides which version of text to use"""
-        decision_prompt = f"""
 You are analyzing two versions of the same text to determine which makes more sense:
 Original: "{original}"
@@ -215,28 +250,14 @@ Reversed: "{reversed}"
 Analyze both versions and decide which one is more likely to be the correct question.
 Consider grammar, word order, and meaning.
-Respond in JSON format:
-{{
-    "chosen_version": "original|reversed",
-    "reasoning": "your reasoning",
-    "confidence": "high|medium|low"
-}}
 """
-        messages = [
-            {"role": "system", "content": "You are a text analysis expert."},
-            {"role": "user", "content": decision_prompt}
-        ]
-        response = self._invoke_model(messages)
-        try:
-            import re
-            json_match = re.search(r'\{.*\}', response, re.DOTALL)
-            if json_match:
-                return json.loads(json_match.group())
-        except:
-            pass
         # Fallback decision
         return {

 # Groq imports for fallback
 from groq import Groq
+# Pydantic for structured output
+from pydantic import BaseModel, Field
 # Utils system imports
 from utils import (
     process_question_with_tools,
     final_answer: str
     processing_complete: bool
+# =============================================================================
+# PYDANTIC SCHEMAS FOR STRUCTURED OUTPUT
+# =============================================================================
+class QuestionAnalysis(BaseModel):
+    """Schema for AI question analysis"""
+    question_type: str = Field(description="Type: youtube|image|audio|wiki|file|text|math")
+    needs_tools: bool = Field(description="Whether tools are needed")
+    reasoning: str = Field(description="AI reasoning for the decision")
+    confidence: str = Field(description="Confidence level: high|medium|low")
+    can_answer_directly: bool = Field(description="Can answer without tools")
+    suggested_approach: str = Field(description="Brief description of approach")
+class TextDecision(BaseModel):
+    """Schema for reversed text decision"""
+    chosen_version: str = Field(description="original|reversed")
+    reasoning: str = Field(description="Reasoning for the choice")
+    confidence: str = Field(description="Confidence level: high|medium|low")
 # =============================================================================
 # AI BRAIN WITH LANGCHAIN
 # =============================================================================
         print("🧠 LangChain Hybrid Brain initialized (HF + Groq fallback)")
+    def _create_structured_model(self, schema: BaseModel):
+        """Create model with structured output"""
+        try:
+            # Try HuggingFace with structured output
+            from langchain_huggingface import ChatHuggingFace
+            hf_model = ChatHuggingFace(
+                llm=self.hf_client,
+                model_id=self.hf_model
+            )
+            return hf_model.with_structured_output(schema)
+        except Exception as hf_error:
+            print(f"⚠️ HF structured output failed: {str(hf_error)[:50]}...")
+            try:
+                # Fallback to Groq with structured output
+                from langchain_groq import ChatGroq
+                groq_model = ChatGroq(
+                    api_key=os.environ.get("GROQ_API_KEY", ""),
+                    model=self.groq_model
+                )
+                return groq_model.with_structured_output(schema)
+            except Exception as groq_error:
+                print(f"⚠️ Both structured output failed")
+                return None
     def _invoke_model(self, messages: List[Dict[str, str]]) -> str:
         """Invoke model with messages - try HF first, fallback to Groq"""
                 return completion.choices[0].message.content
             except Exception as groq_error:
                 return f"AI Error: Both HF ({str(hf_error)[:50]}) and Groq ({str(groq_error)[:50]}) failed"
     def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
+        """AI analyzes question and decides approach with structured output"""
+        # Create structured model
+        structured_model = self._create_structured_model(QuestionAnalysis)
+        if structured_model:
+            analysis_prompt = f"""
 Analyze this question and decide the approach:
 Question: "{question}"
 Task ID: "{task_id}"
+Important rules:
+- If question asks about Mercedes Sosa albums, Wikipedia, historical facts -> use "wiki"
+- If YouTube URL present -> use "youtube"
+- If mentions image, photo, chess position -> use "image"
+- If mentions audio, voice, mp3 -> use "audio"
+- If mentions file attachment, Excel, CSV -> use "file"
+- For math, tables, logic problems -> use "text" but needs_tools=false
+- Be accurate about question_type to trigger correct tools
+/no_thinking
 """
+            try:
+                result = structured_model.invoke(analysis_prompt)
+                return result.dict()
+            except Exception as e:
+                print(f"⚠️ Structured analysis failed: {str(e)[:50]}...")
+        # Fallback analysis
+        question_type = analyze_question_type(question)
+        return {
+            "question_type": question_type,
+            "needs_tools": bool(task_id) or question_type in ["wiki", "youtube", "image", "audio", "file"],
+            "reasoning": "Fallback analysis - structured output failed",
+            "confidence": "medium",
+            "can_answer_directly": question_type == "text" and not task_id,
+            "suggested_approach": f"Use {question_type} processing"
+        }
     def generate_final_answer(self, question: str, tool_results: Dict[str, Any], context: str = "") -> str:
         """Generate final answer using LangChain"""
             "final_answer",
             question=question,
             context_summary=context_summary
+        ) + "\n\n/no_thinking"
         messages = [
             {"role": "system", "content": get_system_prompt("reasoning_agent")},
         return self._invoke_model(messages)
     def decide_on_reversed_text(self, original: str, reversed: str) -> Dict[str, Any]:
+        """AI decides which version of text to use with structured output"""
+        # Create structured model
+        structured_model = self._create_structured_model(TextDecision)
+        if structured_model:
+            decision_prompt = f"""
 You are analyzing two versions of the same text to determine which makes more sense:
 Original: "{original}"
 Analyze both versions and decide which one is more likely to be the correct question.
 Consider grammar, word order, and meaning.
+/no_thinking
 """
+            try:
+                result = structured_model.invoke(decision_prompt)
+                return result.dict()
+            except Exception as e:
+                print(f"⚠️ Structured decision failed: {str(e)[:50]}...")
         # Fallback decision
         return {

cookies.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -13,6 +13,9 @@ yt-dlp>=2024.12.23
 langchain==0.3.13
 langchain-core==0.3.29
 langgraph==0.2.61
 # Transformers for multimodal models
 transformers>=4.44.0

 langchain==0.3.13
 langchain-core==0.3.29
 langgraph==0.2.61
+langchain-huggingface>=0.1.0
+langchain-groq>=0.2.0
+pydantic>=2.0.0
 # Transformers for multimodal models
 transformers>=4.44.0

utils/tool_orchestrator.py CHANGED Viewed

@@ -71,30 +71,29 @@ class ToolOrchestrator:
     def determine_tools_to_run(self, question_type: str, has_file: bool, has_urls: bool) -> List[str]:
         """
-        Xác định tools cần chạy dựa trên question type
         """
         tools_to_run = []
-        # Luôn chạy text processor nếu cần
-        if question_type in ["text", "unknown"]:
-            tools_to_run.append("text_processor")
-        # Tools theo loại câu hỏi
-        if question_type == "youtube" or "youtube" in str(has_urls):
             tools_to_run.append("youtube_tool")
-        elif question_type == "image" or has_file:
             tools_to_run.append("image_ocr")
-        elif question_type == "audio" or has_file:
             tools_to_run.append("audio_transcript")
         elif question_type == "wiki":
             tools_to_run.append("wiki_search")
-        elif question_type == "file" or has_file:
             tools_to_run.append("file_reader")
-        # Fallback: nếu không xác định được, thử wiki search
-        if not tools_to_run or question_type == "unknown":
-            tools_to_run.append("wiki_search")
         return tools_to_run
     def run_tools_sequence(self, tools_list: List[str], question: str, task_id: str = "") -> List[ToolResult]:

     def determine_tools_to_run(self, question_type: str, has_file: bool, has_urls: bool) -> List[str]:
         """
+        Xác định tools cần chạy dựa trên question type - FIXED LOGIC
         """
         tools_to_run = []
+        # Tools theo loại câu hỏi từ AI analysis
+        if question_type == "youtube":
             tools_to_run.append("youtube_tool")
+        elif question_type == "image":
             tools_to_run.append("image_ocr")
+        elif question_type == "audio":
             tools_to_run.append("audio_transcript")
         elif question_type == "wiki":
             tools_to_run.append("wiki_search")
+        elif question_type == "file":
             tools_to_run.append("file_reader")
+        elif question_type == "text":
+            tools_to_run.append("text_processor")
+        # Fallback cho math hoặc unknown
+        if question_type in ["math", "unknown"] or not tools_to_run:
+            tools_to_run.append("text_processor")
+        print(f"🎯 Tools to run: {tools_to_run}")
         return tools_to_run
     def run_tools_sequence(self, tools_list: List[str], question: str, task_id: str = "") -> List[ToolResult]:

utils/youtube_tool.py CHANGED Viewed

@@ -71,8 +71,12 @@ def get_youtube_content(question: str) -> Dict[str, Any]:
     print(f"Found YouTube URL: {youtube_url}")
     try:
-        # Sử dụng yt-dlp để lấy metadata an toàn
         import yt_dlp
         ydl_opts = {
             'writesubtitles': True,
@@ -83,6 +87,13 @@ def get_youtube_content(question: str) -> Dict[str, Any]:
             'no_warnings': True
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(youtube_url, download=False)

     print(f"Found YouTube URL: {youtube_url}")
     try:
+        # Sử dụng yt-dlp để lấy metadata an toàn với cookies
         import yt_dlp
+        import os
+        # Path to cookies file
+        cookies_path = "cookies.txt"
         ydl_opts = {
             'writesubtitles': True,
             'no_warnings': True
         }
+        # Add cookies if file exists
+        if os.path.exists(cookies_path):
+            ydl_opts['cookiefile'] = cookies_path
+            print(f"🍪 Using cookies from {cookies_path}")
+        else:
+            print("⚠️ No cookies.txt found, trying without cookies")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(youtube_url, download=False)