tuan3335 commited on
Commit
040a6c6
Β·
1 Parent(s): 87a9461

use langchain

Browse files
=2.0.0 ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collecting torch
2
+ Downloading torch-2.7.1-cp312-none-macosx_11_0_arm64.whl.metadata (29 kB)
3
+ Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (3.16.1)
4
+ Requirement already satisfied: typing-extensions>=4.10.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (4.13.2)
5
+ Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (75.1.0)
6
+ Requirement already satisfied: sympy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (1.13.3)
7
+ Requirement already satisfied: networkx in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (3.3)
8
+ Requirement already satisfied: jinja2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (3.1.4)
9
+ Requirement already satisfied: fsspec in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (2024.3.1)
10
+ Requirement already satisfied: mpmath<1.4,>=1.1.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sympy>=1.13.3->torch) (1.3.0)
11
+ Requirement already satisfied: MarkupSafe>=2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from jinja2->torch) (2.1.5)
12
+ Downloading torch-2.7.1-cp312-none-macosx_11_0_arm64.whl (68.6 MB)
13
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 68.6/68.6 MB 53.9 MB/s eta 0:00:00
14
+ Installing collected packages: torch
15
+ Successfully installed torch-2.7.1
agent.py CHANGED
@@ -1,11 +1,6 @@
1
  """
2
- AI AGENT WITH LANGGRAPH + UTILS SYSTEM
3
-
4
- Architecture:
5
- - LangChain/LangGraph workflow vα»›i AI-driven routing
6
- - Qwen3-8B lΓ m main reasoning engine
7
- - Utils system cung cαΊ₯p tools
8
- - AI tα»± quyαΊΏt Δ‘α»‹nh tools vΓ  logic xα»­ lΓ½
9
  """
10
 
11
  import os
@@ -14,26 +9,20 @@ import time
14
  from typing import Dict, Any, List, Optional, Annotated
15
  from dotenv import load_dotenv
16
 
17
- # LangChain imports
18
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
19
  from langchain_core.prompts import ChatPromptTemplate
20
  from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
21
 
22
- # LangGraph imports
23
  from langgraph.graph import StateGraph, END
24
  from langgraph.graph.message import add_messages
25
  from typing_extensions import TypedDict
26
 
27
- # HuggingFace imports
28
- from huggingface_hub import InferenceClient
29
-
30
- # Groq imports for fallback
31
- from groq import Groq
32
-
33
- # Pydantic for structured output
34
  from pydantic import BaseModel, Field
35
 
36
- # Utils system imports
 
 
 
37
  from utils import (
38
  process_question_with_tools,
39
  get_agent_state,
@@ -45,407 +34,274 @@ from utils import (
45
  analyze_question_type
46
  )
47
 
48
- # Load environment
49
  load_dotenv()
50
 
51
- # =============================================================================
52
- # LANGGRAPH STATE DEFINITION
53
- # =============================================================================
54
-
55
  class AgentState(TypedDict):
56
- """LangGraph state for AI agent"""
57
  messages: Annotated[List, add_messages]
58
  question: str
59
  task_id: str
60
-
61
- # AI Analysis
62
  ai_analysis: Dict[str, Any]
63
  should_use_tools: bool
64
-
65
- # Tool processing
66
  tool_processing_result: Dict[str, Any]
67
-
68
- # Final response
69
  final_answer: str
70
  processing_complete: bool
71
 
72
- # =============================================================================
73
- # PYDANTIC SCHEMAS FOR STRUCTURED OUTPUT
74
- # =============================================================================
75
-
76
  class QuestionAnalysis(BaseModel):
77
- """Schema for AI question analysis"""
78
  question_type: str = Field(description="Type: youtube|image|audio|wiki|file|text|math")
79
  needs_tools: bool = Field(description="Whether tools are needed")
80
  reasoning: str = Field(description="AI reasoning for the decision")
81
  confidence: str = Field(description="Confidence level: high|medium|low")
82
- can_answer_directly: bool = Field(description="Can answer without tools")
83
- suggested_approach: str = Field(description="Brief description of approach")
84
 
85
- class TextDecision(BaseModel):
86
- """Schema for reversed text decision"""
87
- chosen_version: str = Field(description="original|reversed")
88
- reasoning: str = Field(description="Reasoning for the choice")
89
- confidence: str = Field(description="Confidence level: high|medium|low")
90
-
91
- # =============================================================================
92
- # AI BRAIN WITH LANGCHAIN
93
- # =============================================================================
94
-
95
- class LangChainQwen3Brain:
96
- """AI Brain using LangChain + HuggingFace with Groq fallback"""
97
-
98
  def __init__(self):
99
- # Primary: HuggingFace
100
- self.hf_client = InferenceClient(
101
- provider="auto",
102
- api_key=os.environ.get("HF_TOKEN", "")
103
- )
104
- self.hf_model = "Qwen/Qwen3-8B"
105
 
106
- # Fallback: Groq
107
- self.groq_client = Groq(
108
- api_key=os.environ.get("GROQ_API_KEY", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  )
110
- self.groq_model = "llama3-8b-8192"
111
 
112
- # Setup parsers
113
- self.json_parser = JsonOutputParser()
114
- self.str_parser = StrOutputParser()
115
 
116
- print("🧠 LangChain Hybrid Brain initialized (HF + Groq fallback)")
117
-
118
- def _create_structured_model(self, schema: BaseModel):
119
- """Create model with structured output"""
120
- try:
121
- # Try HuggingFace with structured output
122
- from langchain_huggingface import ChatHuggingFace
123
- hf_model = ChatHuggingFace(
124
- llm=self.hf_client,
125
- model_id=self.hf_model
126
- )
127
- return hf_model.with_structured_output(schema)
128
- except Exception as hf_error:
129
- print(f"⚠️ HF structured output failed: {str(hf_error)[:50]}...")
130
- try:
131
- # Fallback to Groq with structured output
132
- from langchain_groq import ChatGroq
133
- groq_model = ChatGroq(
134
- api_key=os.environ.get("GROQ_API_KEY", ""),
135
- model=self.groq_model
136
- )
137
- return groq_model.with_structured_output(schema)
138
- except Exception as groq_error:
139
- print(f"⚠️ Both structured output failed")
140
- return None
141
-
142
- def _invoke_model(self, messages: List[Dict[str, str]]) -> str:
143
- """Invoke model with messages - try HF first, fallback to Groq"""
144
 
145
- # Try HuggingFace first
 
 
 
146
  try:
147
- completion = self.hf_client.chat.completions.create(
148
- model=self.hf_model,
149
- messages=messages,
150
- max_tokens=2048,
151
- temperature=0.7
 
 
 
 
152
  )
153
- return completion.choices[0].message.content
154
- except Exception as hf_error:
155
- print(f"⚠️ HuggingFace failed: {str(hf_error)[:100]}...")
156
- print("πŸ”„ Falling back to Groq...")
157
 
158
- # Fallback to Groq
 
 
 
 
 
 
 
 
 
 
 
159
  try:
160
- completion = self.groq_client.chat.completions.create(
161
- model=self.groq_model,
162
- messages=messages,
163
- max_tokens=2048,
164
- temperature=0.7
165
- )
166
- return completion.choices[0].message.content
167
- except Exception as groq_error:
168
- return f"AI Error: Both HF ({str(hf_error)[:50]}) and Groq ({str(groq_error)[:50]}) failed"
169
-
170
  def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
171
- """AI analyzes question and decides approach with structured output"""
172
 
173
- # Create structured model
174
- structured_model = self._create_structured_model(QuestionAnalysis)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- if structured_model:
177
- analysis_prompt = f"""
178
- Analyze this question and decide the approach:
179
-
180
- Question: "{question}"
181
- Task ID: "{task_id}"
182
-
183
- Important rules:
184
- - If question asks about Mercedes Sosa albums, Wikipedia, historical facts -> use "wiki"
185
- - If YouTube URL present -> use "youtube"
186
- - If mentions image, photo, chess position -> use "image"
187
- - If mentions audio, voice, mp3 -> use "audio"
188
- - If mentions file attachment, Excel, CSV -> use "file"
189
- - For math, tables, logic problems -> use "text" but needs_tools=false
190
- - Be accurate about question_type to trigger correct tools
191
-
192
- /no_thinking
193
- """
194
 
195
- try:
196
- result = structured_model.invoke(analysis_prompt)
197
- return result.dict()
198
- except Exception as e:
199
- print(f"⚠️ Structured analysis failed: {str(e)[:50]}...")
200
-
201
- # Fallback analysis
202
- question_type = analyze_question_type(question)
203
- return {
204
- "question_type": question_type,
205
- "needs_tools": bool(task_id) or question_type in ["wiki", "youtube", "image", "audio", "file"],
206
- "reasoning": "Fallback analysis - structured output failed",
207
- "confidence": "medium",
208
- "can_answer_directly": question_type == "text" and not task_id,
209
- "suggested_approach": f"Use {question_type} processing"
210
- }
 
 
 
 
 
 
 
 
 
 
211
 
212
- def generate_final_answer(self, question: str, tool_results: Dict[str, Any], context: str = "") -> str:
213
- """Generate final answer using LangChain"""
214
 
215
- # Build context summary
216
  if tool_results and tool_results.get("tool_results"):
217
- context_summary = build_context_summary(
218
  tool_results.get("tool_results", []),
219
  tool_results.get("cached_data", {})
220
  )
221
  else:
222
- context_summary = context or "No additional context available"
223
-
224
- answer_prompt = get_response_prompt(
225
- "final_answer",
226
- question=question,
227
- context_summary=context_summary
228
- ) + "\n\n/no_thinking"
229
-
230
- messages = [
231
- {"role": "system", "content": get_system_prompt("reasoning_agent")},
232
- {"role": "user", "content": answer_prompt}
233
- ]
234
-
235
- return self._invoke_model(messages)
236
-
237
- def decide_on_reversed_text(self, original: str, reversed: str) -> Dict[str, Any]:
238
- """AI decides which version of text to use with structured output"""
239
 
240
- # Create structured model
241
- structured_model = self._create_structured_model(TextDecision)
242
-
243
- if structured_model:
244
- decision_prompt = f"""
245
- You are analyzing two versions of the same text to determine which makes more sense:
246
 
247
- Original: "{original}"
248
- Reversed: "{reversed}"
249
 
250
- Analyze both versions and decide which one is more likely to be the correct question.
251
- Consider grammar, word order, and meaning.
 
252
 
253
- /no_thinking
254
- """
255
-
256
- try:
257
- result = structured_model.invoke(decision_prompt)
258
- return result.dict()
259
- except Exception as e:
260
- print(f"⚠️ Structured decision failed: {str(e)[:50]}...")
261
-
262
- # Fallback decision
263
- return {
264
- "chosen_version": "reversed" if len(reversed.split()) > 3 else "original",
265
- "reasoning": "Fallback decision based on text structure",
266
- "confidence": "low"
267
- }
268
 
269
- # =============================================================================
270
- # LANGGRAPH NODES
271
- # =============================================================================
 
 
 
 
 
 
272
 
273
- # Initialize AI brain
274
- ai_brain = LangChainQwen3Brain()
275
 
276
  def analyze_question_node(state: AgentState) -> AgentState:
277
- """AI analyzes the question and decides approach"""
278
  question = state["question"]
279
  task_id = state.get("task_id", "")
280
 
281
- print(f"πŸ” AI analyzing question: {question[:50]}...")
282
-
283
- # Get AI analysis
284
  analysis = ai_brain.analyze_question(question, task_id)
285
- state["ai_analysis"] = analysis
286
 
287
- # Determine if tools are needed
288
  state["should_use_tools"] = analysis.get("needs_tools", True)
289
 
290
- print(f"πŸ“Š AI Analysis:")
291
- print(f" Type: {analysis.get('question_type', 'unknown')}")
292
- print(f" Needs tools: {analysis.get('needs_tools', True)}")
293
- print(f" Confidence: {analysis.get('confidence', 'medium')}")
294
- print(f" Reasoning: {analysis.get('reasoning', 'No reasoning provided')}")
295
-
296
  return state
297
 
298
  def process_with_tools_node(state: AgentState) -> AgentState:
299
- """Process question using utils tool system"""
300
  question = state["question"]
301
  task_id = state.get("task_id", "")
302
 
303
- print(f"πŸ”§ Processing with tools...")
 
 
304
 
305
- try:
306
- # Use utils tool orchestrator
307
- result = process_question_with_tools(question, task_id)
308
- state["tool_processing_result"] = result
309
-
310
- print(f"βœ… Tool processing completed:")
311
- print(f" Question type: {result.get('question_type', 'unknown')}")
312
- print(f" Successful tools: {result.get('successful_tools', [])}")
313
- print(f" Failed tools: {result.get('failed_tools', [])}")
314
-
315
- except Exception as e:
316
- print(f"❌ Tool processing failed: {str(e)}")
317
- state["tool_processing_result"] = {
318
- "error": str(e),
319
- "processed_question": question,
320
- "question_type": "error",
321
- "tools_used": [],
322
- "successful_tools": [],
323
- "failed_tools": [],
324
- "tool_results": [],
325
- "cached_data": {}
326
- }
327
 
328
  return state
329
 
330
  def answer_directly_node(state: AgentState) -> AgentState:
331
- """Answer question directly without tools"""
332
  question = state["question"]
333
 
334
- print(f"πŸ’­ AI answering directly...")
335
-
336
- # Generate direct answer
337
- direct_prompt = f"""
338
- Answer this question directly based on your knowledge:
339
-
340
- Question: {question}
341
-
342
- Provide a clear, accurate, and helpful answer.
343
- """
344
-
345
- messages = [
346
- {"role": "system", "content": get_system_prompt("reasoning_agent")},
347
- {"role": "user", "content": direct_prompt}
348
- ]
349
-
350
- answer = ai_brain._invoke_model(messages)
351
  state["final_answer"] = answer
352
  state["processing_complete"] = True
353
 
354
  return state
355
 
356
  def generate_final_answer_node(state: AgentState) -> AgentState:
357
- """Generate final answer using AI + tool results"""
358
  question = state["question"]
359
  tool_results = state.get("tool_processing_result", {})
360
 
361
- print(f"🎯 Generating final answer...")
362
-
363
- # Generate comprehensive answer
364
- answer = ai_brain.generate_final_answer(question, tool_results)
365
-
366
  state["final_answer"] = answer
367
  state["processing_complete"] = True
368
 
369
- print(f"βœ… Final answer generated")
370
-
371
  return state
372
 
373
- # =============================================================================
374
- # LANGGRAPH WORKFLOW
375
- # =============================================================================
376
-
377
  def create_agent_workflow():
378
- """Create LangGraph workflow"""
379
-
380
  workflow = StateGraph(AgentState)
381
 
382
  # Add nodes
383
- workflow.add_node("analyze", analyze_question_node)
384
- workflow.add_node("use_tools", process_with_tools_node)
385
- workflow.add_node("direct_answer", answer_directly_node)
386
- workflow.add_node("generate_answer", generate_final_answer_node)
387
 
388
- # Routing logic
389
  def should_use_tools(state: AgentState) -> str:
390
- """AI-driven routing decision"""
391
- should_use = state.get("should_use_tools", True)
392
- can_answer_directly = state.get("ai_analysis", {}).get("can_answer_directly", False)
393
-
394
- if can_answer_directly and not should_use:
395
- print("πŸš€ AI decided to answer directly")
396
- return "direct_answer"
397
- else:
398
- print("πŸ”§ AI decided to use tools")
399
- return "use_tools"
400
-
401
- # Add conditional edges
402
- workflow.add_conditional_edges(
403
- "analyze",
404
- should_use_tools,
405
- {
406
- "use_tools": "use_tools",
407
- "direct_answer": "direct_answer"
408
- }
409
- )
410
-
411
- # Connect tool processing to final answer
412
- workflow.add_edge("use_tools", "generate_answer")
413
-
414
- # End edges
415
- workflow.add_edge("direct_answer", END)
416
- workflow.add_edge("generate_answer", END)
417
 
418
- # Set entry point
419
- workflow.set_entry_point("analyze")
 
 
 
 
420
 
421
  return workflow.compile()
422
 
423
- # =============================================================================
424
- # MAIN AGENT CLASS
425
- # =============================================================================
426
-
427
  class LangGraphUtilsAgent:
428
- """Main AI Agent using LangGraph + Utils system"""
429
-
430
  def __init__(self):
431
- self.workflow = create_agent_workflow()
432
- self.ai_brain = ai_brain
433
-
434
- print("πŸ€– LangGraph Utils Agent initialized!")
435
- print("🧠 AI Brain: LangChain + HuggingFace with Groq fallback")
436
- print("πŸ”§ Tools: YouTube, Image OCR, Audio Transcript, Wikipedia, File Reader, Text Processor")
437
- print("⚑ Features: AI-driven routing, Smart tool selection, Multimodal processing")
438
 
439
  def process_question(self, question: str, task_id: str = "") -> str:
440
- """Main entry point for processing questions"""
441
  try:
442
- print(f"\nπŸš€ Processing question: {question}")
443
- print(f"πŸ“„ Task ID: {task_id or 'None'}")
444
-
445
- # Reset agent state for new question
446
- reset_agent_state()
447
 
448
- # Initialize LangGraph state
449
  initial_state = {
450
  "messages": [HumanMessage(content=question)],
451
  "question": question,
@@ -457,38 +313,29 @@ class LangGraphUtilsAgent:
457
  "processing_complete": False
458
  }
459
 
460
- # Execute workflow
461
- print("\nπŸ”„ Starting LangGraph workflow...")
462
  start_time = time.time()
 
 
463
 
464
- final_state = self.workflow.invoke(initial_state)
465
-
466
- execution_time = time.time() - start_time
467
- print(f"\n⏱️ Total execution time: {execution_time:.2f} seconds")
468
 
469
- # Return final answer
470
- answer = final_state.get("final_answer", "No answer generated")
471
-
472
- print(f"\nβœ… Question processed successfully!")
473
- return answer
474
 
475
  except Exception as e:
476
- error_msg = f"Agent processing error: {str(e)}"
477
- print(f"\n❌ {error_msg}")
478
- import traceback
479
- traceback.print_exc()
480
- return error_msg
481
-
482
- # =============================================================================
483
- # GLOBAL AGENT INSTANCE
484
- # =============================================================================
485
 
486
- # Create global agent
487
  agent = LangGraphUtilsAgent()
488
 
489
  def process_question(question: str, task_id: str = "") -> str:
490
- """Global function for processing questions"""
491
- return agent.process_question(question, task_id)
 
 
 
492
 
493
  # =============================================================================
494
  # TESTING
@@ -534,4 +381,13 @@ if __name__ == "__main__":
534
 
535
  print(f"\n{'-'*60}")
536
 
537
- print("\nβœ… All tests completed!")
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ AI AGENT WITH LANGGRAPH + HUGGINGFACE INTEGRATION
3
+ Clean architecture with LangChain HuggingFace Pipeline
 
 
 
 
 
4
  """
5
 
6
  import os
 
9
  from typing import Dict, Any, List, Optional, Annotated
10
  from dotenv import load_dotenv
11
 
 
12
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
13
  from langchain_core.prompts import ChatPromptTemplate
14
  from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
15
 
 
16
  from langgraph.graph import StateGraph, END
17
  from langgraph.graph.message import add_messages
18
  from typing_extensions import TypedDict
19
 
 
 
 
 
 
 
 
20
  from pydantic import BaseModel, Field
21
 
22
+ # LangChain HuggingFace Integration
23
+ from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace, HuggingFaceEndpoint
24
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
25
+
26
  from utils import (
27
  process_question_with_tools,
28
  get_agent_state,
 
34
  analyze_question_type
35
  )
36
 
 
37
  load_dotenv()
38
 
 
 
 
 
39
  class AgentState(TypedDict):
 
40
  messages: Annotated[List, add_messages]
41
  question: str
42
  task_id: str
 
 
43
  ai_analysis: Dict[str, Any]
44
  should_use_tools: bool
 
 
45
  tool_processing_result: Dict[str, Any]
 
 
46
  final_answer: str
47
  processing_complete: bool
48
 
 
 
 
 
49
  class QuestionAnalysis(BaseModel):
 
50
  question_type: str = Field(description="Type: youtube|image|audio|wiki|file|text|math")
51
  needs_tools: bool = Field(description="Whether tools are needed")
52
  reasoning: str = Field(description="AI reasoning for the decision")
53
  confidence: str = Field(description="Confidence level: high|medium|low")
 
 
54
 
55
+ class AIBrain:
 
 
 
 
 
 
 
 
 
 
 
 
56
  def __init__(self):
57
+ self.model_name = "Qwen/Qwen3-8B"
 
 
 
 
 
58
 
59
+ print("🧠 Initializing Qwen3-8B with LangChain HuggingFace...")
60
+
61
+ # Load tokenizer with thinking disabled
62
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
63
+
64
+ # Create text generation pipeline with Qwen3
65
+ self.hf_pipeline = pipeline(
66
+ "text-generation",
67
+ model=self.model_name,
68
+ tokenizer=self.tokenizer,
69
+ torch_dtype="auto",
70
+ device_map="auto",
71
+ max_new_tokens=2048,
72
+ temperature=0.7,
73
+ top_p=0.9,
74
+ do_sample=True,
75
+ pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.eos_token_id else self.tokenizer.pad_token_id
76
  )
 
77
 
78
+ # Wrap with LangChain HuggingFacePipeline
79
+ self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline)
 
80
 
81
+ # Create ChatHuggingFace for chat interface
82
+ self.chat_model = ChatHuggingFace(llm=self.llm)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ print("βœ… Qwen3 AI Brain with LangChain HuggingFace initialized")
85
+
86
+ def _generate_with_qwen3(self, prompt: str, max_tokens: int = 2048) -> str:
87
+ """Generate text with Qwen3 via LangChain - thinking disabled"""
88
  try:
89
+ # Prepare messages for chat template with thinking DISABLED
90
+ messages = [{"role": "user", "content": prompt}]
91
+
92
+ # Apply chat template with enable_thinking=False
93
+ text = self.tokenizer.apply_chat_template(
94
+ messages,
95
+ tokenize=False,
96
+ add_generation_prompt=True,
97
+ enable_thinking=False # CRITICAL: Disable thinking mode
98
  )
 
 
 
 
99
 
100
+ # Use LangChain HuggingFace pipeline for generation
101
+ response = self.llm.invoke(text)
102
+
103
+ # Clean up response - remove input prompt
104
+ if text in response:
105
+ response = response.replace(text, "").strip()
106
+
107
+ return response
108
+
109
+ except Exception as e:
110
+ print(f"⚠️ Qwen3 generation error: {str(e)}")
111
+ # Fallback to direct pipeline call
112
  try:
113
+ result = self.hf_pipeline(prompt, max_new_tokens=max_tokens)
114
+ return result[0]['generated_text'].replace(prompt, "").strip()
115
+ except Exception as e2:
116
+ return f"AI generation failed: {str(e2)}"
117
+
 
 
 
 
 
118
  def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
119
+ """Analyze question type using Qwen3 with strict JSON output"""
120
 
121
+ prompt = f"""<instruction>
122
+ Analyze this question and determine the correct tool approach. Return ONLY valid JSON.
123
+ </instruction>
124
+
125
+ <question>{question}</question>
126
+ <task_id>{task_id}</task_id>
127
+
128
+ <classification_rules>
129
+ - YouTube URLs (youtube.com, youtu.be): "youtube"
130
+ - Images, photos, chess positions, visual content: "image"
131
+ - Audio files, voice, sound, mp3: "audio"
132
+ - Excel, CSV, documents, file uploads: "file"
133
+ - Wikipedia searches, historical facts, people info: "wiki"
134
+ - Math calculations, logic, text analysis: "text"
135
+ </classification_rules>
136
+
137
+ Return this exact JSON format:
138
+ {{
139
+ "question_type": "youtube|image|audio|wiki|file|text",
140
+ "needs_tools": true,
141
+ "reasoning": "Brief explanation of classification",
142
+ "confidence": "high"
143
+ }}"""
144
 
145
+ try:
146
+ response = self._generate_with_qwen3(prompt, 512)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # Extract JSON from response
149
+ import re
150
+ json_pattern = r'\{[^{}]*\}'
151
+ json_match = re.search(json_pattern, response)
152
+
153
+ if json_match:
154
+ result = json.loads(json_match.group())
155
+
156
+ # Validate required fields
157
+ required_fields = ["question_type", "needs_tools", "reasoning", "confidence"]
158
+ if all(field in result for field in required_fields):
159
+ return result
160
+
161
+ raise ValueError("Invalid JSON structure in response")
162
+
163
+ except Exception as e:
164
+ print(f"⚠️ Qwen3 analysis failed: {str(e)[:100]}...")
165
+
166
+ # Fallback analysis
167
+ question_type = analyze_question_type(question)
168
+ return {
169
+ "question_type": question_type,
170
+ "needs_tools": question_type in ["wiki", "youtube", "image", "audio", "file"],
171
+ "reasoning": f"Fallback classification: detected {question_type}",
172
+ "confidence": "medium"
173
+ }
174
 
175
+ def generate_answer(self, question: str, tool_results: Dict[str, Any]) -> str:
176
+ """Generate final answer using Qwen3 with context"""
177
 
 
178
  if tool_results and tool_results.get("tool_results"):
179
+ context = build_context_summary(
180
  tool_results.get("tool_results", []),
181
  tool_results.get("cached_data", {})
182
  )
183
  else:
184
+ context = "No additional context available"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ prompt = f"""<instruction>
187
+ Generate a comprehensive answer to the user's question using the provided context.
188
+ </instruction>
 
 
 
189
 
190
+ <question>{question}</question>
 
191
 
192
+ <context>
193
+ {context}
194
+ </context>
195
 
196
+ <output_rules>
197
+ - Provide direct, accurate answers
198
+ - Use context information when relevant
199
+ - Be concise but complete
200
+ - No thinking process in output
201
+ - Professional tone
202
+ </output_rules>
 
 
 
 
 
 
 
 
203
 
204
+ Answer:"""
205
+
206
+ response = self._generate_with_qwen3(prompt, 2048)
207
+
208
+ # Clean up response
209
+ if "Answer:" in response:
210
+ response = response.split("Answer:")[-1].strip()
211
+
212
+ return response
213
 
214
+ # Initialize AI Brain globally
215
+ ai_brain = AIBrain()
216
 
217
  def analyze_question_node(state: AgentState) -> AgentState:
218
+ """Analyze question using Qwen3 AI Brain"""
219
  question = state["question"]
220
  task_id = state.get("task_id", "")
221
 
222
+ print("πŸ” Analyzing question with Qwen3...")
 
 
223
  analysis = ai_brain.analyze_question(question, task_id)
 
224
 
225
+ state["ai_analysis"] = analysis
226
  state["should_use_tools"] = analysis.get("needs_tools", True)
227
 
228
+ print(f"πŸ“Š Type: {analysis.get('question_type')} | Tools: {analysis.get('needs_tools')} | Confidence: {analysis.get('confidence')}")
 
 
 
 
 
229
  return state
230
 
231
  def process_with_tools_node(state: AgentState) -> AgentState:
232
+ """Process question with appropriate tools"""
233
  question = state["question"]
234
  task_id = state.get("task_id", "")
235
 
236
+ print("πŸ”§ Processing with specialized tools...")
237
+ tool_results = process_question_with_tools(question, task_id)
238
+ state["tool_processing_result"] = tool_results
239
 
240
+ successful_tools = [result.tool_name for result in tool_results.get("tool_results", []) if result.success]
241
+ if successful_tools:
242
+ print(f"βœ… Successful tools: {successful_tools}")
243
+ else:
244
+ print("⚠️ No tools succeeded")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  return state
247
 
248
  def answer_directly_node(state: AgentState) -> AgentState:
249
+ """Answer directly without tools using Qwen3"""
250
  question = state["question"]
251
 
252
+ print("πŸ’­ Generating direct answer with Qwen3...")
253
+ answer = ai_brain.generate_answer(question, {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  state["final_answer"] = answer
255
  state["processing_complete"] = True
256
 
257
  return state
258
 
259
  def generate_final_answer_node(state: AgentState) -> AgentState:
260
+ """Generate final answer combining tool results and AI analysis"""
261
  question = state["question"]
262
  tool_results = state.get("tool_processing_result", {})
263
 
264
+ print("🎯 Generating final answer with context...")
265
+ answer = ai_brain.generate_answer(question, tool_results)
 
 
 
266
  state["final_answer"] = answer
267
  state["processing_complete"] = True
268
 
 
 
269
  return state
270
 
 
 
 
 
271
  def create_agent_workflow():
272
+ """Create LangGraph workflow for question processing"""
 
273
  workflow = StateGraph(AgentState)
274
 
275
  # Add nodes
276
+ workflow.add_node("analyze_question", analyze_question_node)
277
+ workflow.add_node("process_with_tools", process_with_tools_node)
278
+ workflow.add_node("answer_directly", answer_directly_node)
279
+ workflow.add_node("generate_final_answer", generate_final_answer_node)
280
 
281
+ # Define routing logic
282
  def should_use_tools(state: AgentState) -> str:
283
+ return "process_with_tools" if state.get("should_use_tools", True) else "answer_directly"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
+ # Set up the flow
286
+ workflow.set_entry_point("analyze_question")
287
+ workflow.add_conditional_edges("analyze_question", should_use_tools)
288
+ workflow.add_edge("process_with_tools", "generate_final_answer")
289
+ workflow.add_edge("answer_directly", END)
290
+ workflow.add_edge("generate_final_answer", END)
291
 
292
  return workflow.compile()
293
 
 
 
 
 
294
  class LangGraphUtilsAgent:
 
 
295
  def __init__(self):
296
+ self.app = create_agent_workflow()
297
+ print("πŸš€ LangGraph Agent with Qwen3 + Utils System ready")
 
 
 
 
 
298
 
299
  def process_question(self, question: str, task_id: str = "") -> str:
300
+ """Process question through the workflow"""
301
  try:
302
+ print(f"\n🎯 Processing: {question[:100]}...")
 
 
 
 
303
 
304
+ # Initialize state
305
  initial_state = {
306
  "messages": [HumanMessage(content=question)],
307
  "question": question,
 
313
  "processing_complete": False
314
  }
315
 
316
+ # Run workflow
 
317
  start_time = time.time()
318
+ result = self.app.invoke(initial_state)
319
+ elapsed_time = time.time() - start_time
320
 
321
+ final_answer = result.get("final_answer", "No answer generated")
322
+ print(f"βœ… Completed in {elapsed_time:.2f}s")
 
 
323
 
324
+ return final_answer
 
 
 
 
325
 
326
  except Exception as e:
327
+ print(f"❌ Agent error: {str(e)}")
328
+ return f"I apologize, but I encountered an error processing your question: {str(e)}"
 
 
 
 
 
 
 
329
 
330
+ # Global agent instance
331
  agent = LangGraphUtilsAgent()
332
 
333
  def process_question(question: str, task_id: str = "") -> str:
334
+ """Main entry point for question processing"""
335
+ if not question or not question.strip():
336
+ return "Please provide a valid question."
337
+
338
+ return agent.process_question(question.strip(), task_id)
339
 
340
  # =============================================================================
341
  # TESTING
 
381
 
382
  print(f"\n{'-'*60}")
383
 
384
+ print("\nβœ… All tests completed!")
385
+
386
+ # Initialize Qwen3 with thinking mode disabled
387
+ primary_brain = HuggingFaceEndpoint(
388
+ repo_id=primary_model,
389
+ temperature=0.7,
390
+ max_new_tokens=300,
391
+ huggingfacehub_api_token=os.getenv("HF_API_KEY"),
392
+ model_kwargs={"enable_thinking": False, "thinking_prompt": "/no_thinking"}
393
+ )
requirements.txt CHANGED
@@ -1,26 +1,38 @@
1
- gradio==4.44.0
2
- huggingface-hub==0.33.1
3
- groq==0.13.0
4
- python-dotenv==1.0.0
5
- requests==2.31.0
6
- wikipedia==1.4.0
7
- pandas==2.0.3
8
- openpyxl==3.1.2
9
- Pillow==10.0.0
10
- yt-dlp>=2024.12.23
11
-
12
- # LangChain + LangGraph for AI-driven workflows
13
- langchain==0.3.13
14
- langchain-core==0.3.29
15
- langgraph==0.2.61
16
  langchain-huggingface>=0.1.0
17
- langchain-groq>=0.2.0
 
 
 
 
 
 
 
 
 
 
 
18
  pydantic>=2.0.0
19
 
20
- # Transformers for multimodal models
21
- transformers>=4.44.0
22
- torch>=2.0.0
23
- accelerate>=0.21.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Optional dependencies for enhanced functionality
26
  # flash-attn>=2.0.0 # For flash attention (requires CUDA)
 
1
+ # AI Core Dependencies - LangChain HuggingFace Integration
2
+ langchain>=0.3.0
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  langchain-huggingface>=0.1.0
4
+ langchain-groq>=0.2.0
5
+ langgraph>=0.2.0
6
+
7
+ # HuggingFace Core
8
+ transformers>=4.51.0
9
+
10
+ # Tool Dependencies
11
+ groq>=0.11.0
12
+ wikipedia>=1.4.0
13
+ requests>=2.31.0
14
+ pillow>=10.0.0
15
+ python-dotenv>=1.0.0
16
  pydantic>=2.0.0
17
 
18
+ # YouTube Tools
19
+ yt-dlp>=2024.1.0
20
+ pytube>=15.0.0
21
+ youtube-transcript-api>=0.6.0
22
+
23
+ # File Processing
24
+ pandas>=2.0.0
25
+ openpyxl>=3.1.0
26
+
27
+ # Audio/Image Processing
28
+ openai-whisper>=20231117
29
+ opencv-python>=4.8.0
30
+
31
+ # Optional optimizations
32
+ typing-extensions>=4.8.0
33
+
34
+ # LangChain + LangGraph for AI-driven workflows
35
+ langchain-core==0.3.29
36
 
37
  # Optional dependencies for enhanced functionality
38
  # flash-attn>=2.0.0 # For flash attention (requires CUDA)
utils/__init__.py CHANGED
@@ -2,8 +2,8 @@
2
  Utils package for AI Agent tools
3
  """
4
 
5
- # Individual tools
6
- from .youtube_tool import get_youtube_content
7
  from .text_tool import reverse_text_if_needed
8
  from .image_tool import ocr_image_with_nanonets
9
  from .audio_tool import transcribe_audio_groq
@@ -30,7 +30,7 @@ from .tool_orchestrator import (
30
 
31
  __all__ = [
32
  # Individual tools
33
- "get_youtube_content",
34
  "reverse_text_if_needed",
35
  "ocr_image_with_nanonets",
36
  "transcribe_audio_groq",
 
2
  Utils package for AI Agent tools
3
  """
4
 
5
+ # Individual tools
6
+ from .youtube_tool import YouTubeTool
7
  from .text_tool import reverse_text_if_needed
8
  from .image_tool import ocr_image_with_nanonets
9
  from .audio_tool import transcribe_audio_groq
 
30
 
31
  __all__ = [
32
  # Individual tools
33
+ "YouTubeTool",
34
  "reverse_text_if_needed",
35
  "ocr_image_with_nanonets",
36
  "transcribe_audio_groq",
utils/audio_tool.py CHANGED
@@ -1,11 +1,139 @@
1
  """
2
- Audio Tool - Transcribe audio vα»›i Groq Whisper API
 
3
  """
4
 
5
  import os
6
  import tempfile
7
  import requests
8
- from typing import Optional
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def download_audio_file(task_id: str) -> Optional[str]:
11
  """
@@ -58,7 +186,6 @@ def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str
58
 
59
  try:
60
  # Initialize Groq client
61
- from groq import Groq
62
  groq_api_key = os.environ.get("GROQ_API_KEY")
63
 
64
  if not groq_api_key:
 
1
  """
2
+ AUDIO PROCESSING TOOL - Groq Audio Only
3
+ Handles audio file transcription using Groq Whisper API
4
  """
5
 
6
  import os
7
  import tempfile
8
  import requests
9
+ from typing import Dict, Any, Optional
10
+ from groq import Groq
11
+ from .state_manager import get_agent_state
12
+
13
+ class AudioTool:
14
+ def __init__(self):
15
+ self.client = Groq(api_key=os.environ.get("GROQ_API_KEY", ""))
16
+ self.model = "whisper-large-v3"
17
+ print("🎡 Audio Tool (Groq Whisper) initialized")
18
+
19
+ def process_audio(self, audio_input: str, **kwargs) -> Dict[str, Any]:
20
+ """
21
+ Process audio files using Groq Whisper API
22
+ Supports URLs, file paths, and base64 audio
23
+ """
24
+ try:
25
+ audio_path = self._prepare_audio_file(audio_input)
26
+ if not audio_path:
27
+ return self._error_result("Could not prepare audio file")
28
+
29
+ # Transcribe using Groq Whisper
30
+ transcript = self._transcribe_with_groq(audio_path)
31
+
32
+ # Cleanup temp file if created
33
+ if audio_path.startswith(tempfile.gettempdir()):
34
+ os.unlink(audio_path)
35
+
36
+ result = {
37
+ "transcript": transcript,
38
+ "source": audio_input,
39
+ "model": self.model,
40
+ "tool": "groq_whisper"
41
+ }
42
+
43
+ # Update agent state
44
+ state = get_agent_state()
45
+ state.cached_data["audio_analysis"] = result
46
+
47
+ return {
48
+ "success": True,
49
+ "data": result,
50
+ "summary": f"Audio transcribed: {transcript[:100]}..."
51
+ }
52
+
53
+ except Exception as e:
54
+ error_msg = f"Audio processing failed: {str(e)}"
55
+ print(f"❌ {error_msg}")
56
+ return self._error_result(error_msg)
57
+
58
+ def _prepare_audio_file(self, audio_input: str) -> Optional[str]:
59
+ """Prepare audio file for processing"""
60
+ try:
61
+ # If it's a URL, download it
62
+ if audio_input.startswith(('http://', 'https://')):
63
+ return self._download_audio(audio_input)
64
+
65
+ # If it's a local file path
66
+ if os.path.exists(audio_input):
67
+ return audio_input
68
+
69
+ # If it's base64, decode it
70
+ if self._is_base64(audio_input):
71
+ return self._decode_base64_audio(audio_input)
72
+
73
+ return None
74
+
75
+ except Exception as e:
76
+ print(f"⚠️ Audio prep error: {str(e)}")
77
+ return None
78
+
79
+ def _download_audio(self, url: str) -> str:
80
+ """Download audio from URL to temp file"""
81
+ response = requests.get(url, stream=True)
82
+ response.raise_for_status()
83
+
84
+ # Create temp file with audio extension
85
+ suffix = '.mp3' # Default
86
+ if '.' in url:
87
+ suffix = '.' + url.split('.')[-1].split('?')[0]
88
+
89
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
90
+ for chunk in response.iter_content(chunk_size=8192):
91
+ tmp_file.write(chunk)
92
+ return tmp_file.name
93
+
94
+ def _is_base64(self, s: str) -> bool:
95
+ """Check if string is base64 encoded"""
96
+ import base64
97
+ try:
98
+ if isinstance(s, str):
99
+ s_bytes = bytes(s, 'ascii')
100
+ elif isinstance(s, bytes):
101
+ s_bytes = s
102
+ else:
103
+ return False
104
+ return base64.b64encode(base64.b64decode(s_bytes)) == s_bytes
105
+ except Exception:
106
+ return False
107
+
108
+ def _decode_base64_audio(self, b64_string: str) -> str:
109
+ """Decode base64 audio to temp file"""
110
+ import base64
111
+
112
+ audio_data = base64.b64decode(b64_string)
113
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
114
+ tmp_file.write(audio_data)
115
+ return tmp_file.name
116
+
117
+ def _transcribe_with_groq(self, audio_path: str) -> str:
118
+ """Transcribe audio using Groq Whisper API"""
119
+ with open(audio_path, "rb") as audio_file:
120
+ transcript = self.client.audio.transcriptions.create(
121
+ file=audio_file,
122
+ model=self.model,
123
+ language="en", # Auto-detect or specify
124
+ response_format="text"
125
+ )
126
+
127
+ return transcript if isinstance(transcript, str) else transcript.text
128
+
129
+ def _error_result(self, error_msg: str) -> Dict[str, Any]:
130
+ """Standard error result format"""
131
+ return {
132
+ "success": False,
133
+ "error": error_msg,
134
+ "data": None,
135
+ "summary": f"Audio processing failed: {error_msg}"
136
+ }
137
 
138
  def download_audio_file(task_id: str) -> Optional[str]:
139
  """
 
186
 
187
  try:
188
  # Initialize Groq client
 
189
  groq_api_key = os.environ.get("GROQ_API_KEY")
190
 
191
  if not groq_api_key:
utils/text_tool.py CHANGED
@@ -1,100 +1,43 @@
1
  """
2
- Text Tool - Xử lý cÒu hỏi viết ngược
3
  """
4
 
5
  from typing import Dict, Any
6
 
7
- def reverse_text(text: str) -> str:
8
- """
9
- Đảo ngược text
10
- """
11
- return text[::-1]
12
-
13
  def is_likely_reversed(text: str) -> bool:
14
- """
15
- Kiểm tra xem text cΓ³ khαΊ£ nΔƒng bα»‹ viαΊΏt ngược khΓ΄ng
16
- Dα»±a trΓͺn cΓ‘c dαΊ₯u hiệu:
17
- - CΓ’u kαΊΏt thΓΊc bαΊ±ng dαΊ₯u phαΊ©y thay vΓ¬ dαΊ₯u chαΊ₯m hỏi
18
- - Có từ "rewsna" (answer ngược)
19
- - CΓ³ tα»« kαΊΏt thΓΊc bαΊ±ng cΓ‘c kΓ½ tα»± Δ‘αΊ·c biệt
20
- """
21
- # DαΊ₯u hiệu cΓ’u hỏi viαΊΏt ngược
22
- reverse_indicators = [
23
- "rewsna", # "answer" ngược
24
- "noitseuq", # "question" ngược
25
- "ecnetnes", # "sentence" ngược
26
- "dnatsrednu", # "understand" ngược
27
- "etirw", # "write" ngược
28
- text.strip().endswith(","), # KαΊΏt thΓΊc bαΊ±ng dαΊ₯u phαΊ©y
29
- text.strip().startswith("?"), # BαΊ―t Δ‘αΊ§u bαΊ±ng dαΊ₯u hỏi
30
- ]
31
 
32
- # Đếm sα»‘ dαΊ₯u hiệu
33
- indicators_found = sum([
34
- 1 for indicator in reverse_indicators
35
- if (isinstance(indicator, str) and indicator.lower() in text.lower()) or
36
- (isinstance(indicator, bool) and indicator)
37
- ])
 
38
 
39
- # NαΊΏu cΓ³ >= 2 dαΊ₯u hiệu thΓ¬ cΓ³ thể lΓ  text ngược
40
- return indicators_found >= 2
 
 
41
 
42
  def reverse_text_if_needed(question: str, ai_brain=None) -> Dict[str, Any]:
43
- """
44
- Main function: Kiểm tra vΓ  cung cαΊ₯p thΓ΄ng tin về cΓ’u hỏi cΓ³ thể bα»‹ viαΊΏt ngược
45
-
46
- Args:
47
- question: CΓ’u hỏi gα»‘c
48
- ai_brain: AI brain instance để hỏi lαΊ‘i (optional)
49
 
50
- Returns:
51
- Dict chα»©a thΓ΄ng tin phΓ’n tΓ­ch text
52
- """
53
- analysis = {
54
- "original_text": question,
55
- "reversed_text": reverse_text(question),
56
- "likely_reversed": is_likely_reversed(question),
57
- "should_reverse": False,
58
- "processed_text": question
59
- }
60
 
61
- # NαΊΏu cΓ³ dαΊ₯u hiệu bα»‹ viαΊΏt ngược
62
- if analysis["likely_reversed"]:
63
- print(f"πŸ”„ Detected likely reversed text: {question[:50]}...")
64
-
65
- # NαΊΏu cΓ³ AI brain, hỏi AI quyαΊΏt Δ‘α»‹nh
66
- if ai_brain:
67
- check_prompt = f"""
68
- Original: {question}
69
- Reversed: {analysis["reversed_text"]}
70
-
71
- Which version makes more sense as a question? Answer "original" or "reversed" only.
72
- """
73
- try:
74
- ai_response = ai_brain.think(check_prompt).strip().lower()
75
- analysis["should_reverse"] = "reversed" in ai_response
76
- analysis["ai_decision"] = ai_response
77
- except:
78
- # Fallback nαΊΏu AI khΓ΄ng hoαΊ‘t Δ‘α»™ng
79
- analysis["should_reverse"] = True
80
- analysis["ai_decision"] = "fallback_reverse"
81
- else:
82
- # KhΓ΄ng cΓ³ AI, AI sαΊ½ quyαΊΏt Δ‘α»‹nh sau
83
- analysis["should_reverse"] = None # Để AI quyαΊΏt Δ‘α»‹nh
84
-
85
- if analysis["should_reverse"]:
86
- analysis["processed_text"] = analysis["reversed_text"]
87
- print(f"πŸ”„ Reversed to: {analysis['reversed_text'][:50]}...")
88
-
89
- return analysis
90
-
91
- # Test function
92
- if __name__ == "__main__":
93
- # Test case tα»« đề bΓ i
94
- test_question = ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI"
95
-
96
- print("Original:", test_question)
97
- print("Is likely reversed:", is_likely_reversed(test_question))
98
-
99
- analysis = reverse_text_if_needed(test_question)
100
- print("Analysis:", analysis)
 
1
  """
2
+ Text Tool - Process reversed text questions
3
  """
4
 
5
  from typing import Dict, Any
6
 
 
 
 
 
 
 
7
  def is_likely_reversed(text: str) -> bool:
8
+ words = text.split()
9
+ if len(words) < 3:
10
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ reversed_indicators = [
13
+ text.startswith('.'),
14
+ text.endswith('fI'),
15
+ 'ecnetnes' in text,
16
+ 'rewsna' in text,
17
+ 'noitseuq' in text
18
+ ]
19
 
20
+ return sum(reversed_indicators) >= 2
21
+
22
+ def reverse_text(text: str) -> str:
23
+ return text[::-1]
24
 
25
  def reverse_text_if_needed(question: str, ai_brain=None) -> Dict[str, Any]:
26
+ if is_likely_reversed(question):
27
+ reversed_question = reverse_text(question)
 
 
 
 
28
 
29
+ return {
30
+ "should_reverse": True,
31
+ "original_text": question,
32
+ "processed_text": reversed_question,
33
+ "confidence": "high",
34
+ "reasoning": "Text appears to be written backwards"
35
+ }
 
 
 
36
 
37
+ return {
38
+ "should_reverse": False,
39
+ "original_text": question,
40
+ "processed_text": question,
41
+ "confidence": "high",
42
+ "reasoning": "Text appears normal"
43
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/tool_orchestrator.py CHANGED
@@ -6,7 +6,7 @@ import time
6
  from typing import Dict, Any, List, Optional, Callable
7
  from .state_manager import AgentState, ToolResult, get_agent_state, analyze_question_type, detect_urls_in_question
8
  from .text_tool import reverse_text_if_needed
9
- from .youtube_tool import get_youtube_content
10
  from .image_tool import ocr_image_with_nanonets, fallback_ocr_image
11
  from .audio_tool import transcribe_audio_groq, fallback_audio_info
12
  from .wiki_tool import search_wikipedia_from_question
@@ -198,8 +198,9 @@ class ToolOrchestrator:
198
  def _run_youtube_tool(self, question: str, task_id: str = "") -> Dict[str, Any]:
199
  """Run YouTube content extraction"""
200
  try:
201
- result = get_youtube_content(question)
202
- return result
 
203
  except Exception as e:
204
  return {"success": False, "error": str(e)}
205
 
 
6
  from typing import Dict, Any, List, Optional, Callable
7
  from .state_manager import AgentState, ToolResult, get_agent_state, analyze_question_type, detect_urls_in_question
8
  from .text_tool import reverse_text_if_needed
9
+ from .youtube_tool import YouTubeTool
10
  from .image_tool import ocr_image_with_nanonets, fallback_ocr_image
11
  from .audio_tool import transcribe_audio_groq, fallback_audio_info
12
  from .wiki_tool import search_wikipedia_from_question
 
198
  def _run_youtube_tool(self, question: str, task_id: str = "") -> Dict[str, Any]:
199
  """Run YouTube content extraction"""
200
  try:
201
+ youtube_tool = YouTubeTool()
202
+ result = youtube_tool.process_youtube(question)
203
+ return result.get("data", {}) if result.get("success") else {"success": False, "error": result.get("error", "Unknown error")}
204
  except Exception as e:
205
  return {"success": False, "error": str(e)}
206
 
utils/youtube_tool.py CHANGED
@@ -1,161 +1,189 @@
1
  """
2
- YouTube Tool - Auto detect YouTube URLs and extract metadata + transcript/thumbnail
 
3
  """
4
 
 
5
  import re
6
- import requests
7
- from typing import Dict, Any, Optional
 
 
8
 
9
- def extract_youtube_url(text: str) -> Optional[str]:
10
- """
11
- Tα»± Δ‘α»™ng regex tΓ¬m link YouTube trong cΓ’u hỏi task
12
- """
13
- # Regex lαΊ₯y link YouTube
14
- pattern = r'(https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[\w\-]+)'
15
- match = re.search(pattern, text)
16
- if match:
17
- return match.group(1)
18
- return None
19
 
20
- def extract_youtube_id(url: str) -> Optional[str]:
21
- """
22
- LαΊ₯y video_id tα»« YouTube URL
23
- """
24
- # Hα»— trợ cαΊ£ dαΊ‘ng youtube.com/watch?v=... vΓ  youtu.be/...
25
- pattern = r'(?:v=|\/)([0-9A-Za-z_-]{11})'
26
- match = re.search(pattern, url)
27
- if match:
28
- return match.group(1)
29
- return None
30
 
31
- def get_youtube_thumbnail_url(video_url: str) -> Optional[str]:
32
- """
33
- LαΊ₯y link thumbnail tα»« YouTube URL
34
- """
35
- video_id = extract_youtube_id(video_url)
36
- if not video_id:
37
- return None
38
- # Link thumbnail chuαΊ©n cα»§a YouTube
39
- return f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg"
40
-
41
- def get_txt_content_from_url(url: str) -> str:
42
- """
43
- LαΊ₯y nα»™i dung file .txt tα»« URL (dΓ nh cho transcript link)
44
- """
45
- try:
46
- response = requests.get(url, timeout=30)
47
- response.raise_for_status()
48
- return response.text
49
- except Exception as e:
50
- return f"Error downloading text file: {str(e)}"
51
-
52
- def get_youtube_content(question: str) -> Dict[str, Any]:
53
- """
54
- Main function: Tα»± Δ‘α»™ng detect YouTube URL trong cΓ’u hỏi vΓ  lαΊ₯y metadata + transcript/thumbnail
55
-
56
- Args:
57
- question: CΓ’u hỏi task cΓ³ thể chα»©a YouTube URL
58
-
59
- Returns:
60
- Dict chα»©a metadata, transcript (nαΊΏu cΓ³), thumbnail URL
61
- """
62
- # Auto detect YouTube URL
63
- youtube_url = extract_youtube_url(question)
64
-
65
- if not youtube_url:
66
- return {
67
- "has_youtube": False,
68
- "error": "No YouTube URL found in question"
69
- }
70
 
71
- print(f"Found YouTube URL: {youtube_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- try:
74
- # Sα»­ dα»₯ng yt-dlp để lαΊ₯y metadata an toΓ n vα»›i cookies
75
- import yt_dlp
76
- import os
77
-
78
- # Path to cookies file
79
- cookies_path = "cookies.txt"
80
 
81
- ydl_opts = {
82
- 'writesubtitles': True,
83
- 'writeautomaticsub': True,
84
- 'subtitleslangs': ['en'],
85
- 'skip_download': True,
86
- 'quiet': True,
87
- 'no_warnings': True
88
- }
89
 
90
- # Add cookies if file exists
91
- if os.path.exists(cookies_path):
92
- ydl_opts['cookiefile'] = cookies_path
93
- print(f"πŸͺ Using cookies from {cookies_path}")
94
- else:
95
- print("⚠️ No cookies.txt found, trying without cookies")
96
 
97
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
98
- info = ydl.extract_info(youtube_url, download=False)
 
 
 
 
 
 
 
 
 
99
 
100
- title = info.get('title', 'Unknown Title')
101
- description = info.get('description', 'No description')
 
 
102
 
103
- # LαΊ₯y thumbnail
104
- thumbnail_url = get_youtube_thumbnail_url(youtube_url)
105
 
106
- # Kiểm tra transcript
107
- transcript_content = None
108
- if 'subtitles' in info and info['subtitles']:
109
- # CΓ³ subtitle/transcript
110
- for lang in ['en', 'en-US', 'en-GB']:
111
- if lang in info['subtitles']:
112
- subtitle_info = info['subtitles'][lang]
113
- if subtitle_info and len(subtitle_info) > 0:
114
- transcript_url = subtitle_info[0].get('url')
115
- if transcript_url:
116
- transcript_content = get_txt_content_from_url(transcript_url)
117
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- # Kiểm tra automatic_captions nαΊΏu khΓ΄ng cΓ³ subtitles
120
- if not transcript_content and 'automatic_captions' in info and info['automatic_captions']:
121
- for lang in ['en', 'en-US', 'en-GB']:
122
- if lang in info['automatic_captions']:
123
- caption_info = info['automatic_captions'][lang]
124
- if caption_info and len(caption_info) > 0:
125
- # Tìm format .vtt hoặc .txt
126
- for caption in caption_info:
127
- if caption.get('ext') in ['vtt', 'txt']:
128
- transcript_url = caption.get('url')
129
- if transcript_url:
130
- transcript_content = get_txt_content_from_url(transcript_url)
131
- break
132
- if transcript_content:
133
- break
134
 
135
  return {
136
- "has_youtube": True,
137
- "title": title,
138
- "description": description[:1000], # Giα»›i hαΊ‘n description
139
- "transcript": transcript_content,
140
- "thumbnail_url": thumbnail_url,
141
- "video_url": youtube_url
 
 
 
 
 
142
  }
143
 
144
- except Exception as e:
145
- # Fallback: Ít nhαΊ₯t trαΊ£ về thumbnail
146
- thumbnail_url = get_youtube_thumbnail_url(youtube_url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  return {
148
- "has_youtube": True,
149
- "title": "Could not fetch title",
150
- "description": "Could not fetch description",
151
- "transcript": None,
152
- "thumbnail_url": thumbnail_url,
153
- "video_url": youtube_url,
154
- "error": f"YouTube extraction error: {str(e)}"
155
- }
156
-
157
- # Test function
158
- if __name__ == "__main__":
159
- test_question = "What is this video about? https://www.youtube.com/watch?v=dQw4w9WgXcQ"
160
- result = get_youtube_content(test_question)
161
- print("Result:", result)
 
1
  """
2
+ YOUTUBE PROCESSING TOOL
3
+ Enhanced with cookies support for bot detection bypass
4
  """
5
 
6
+ import os
7
  import re
8
+ import json
9
+ import tempfile
10
+ from typing import Dict, Any, Optional, List
11
+ from urllib.parse import urlparse, parse_qs
12
 
13
+ try:
14
+ from pytube import YouTube
15
+ from youtube_transcript_api import YouTubeTranscriptApi
16
+ import yt_dlp
17
+ except ImportError as e:
18
+ print(f"⚠️ YouTube dependencies missing: {e}")
 
 
 
 
19
 
20
+ from .state_manager import get_agent_state
 
 
 
 
 
 
 
 
 
21
 
22
+ class YouTubeTool:
23
+ def __init__(self):
24
+ # Path to cookies file in project root
25
+ self.cookies_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cookies.txt")
26
+ print("🎬 YouTube Tool with cookies support initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def process_youtube(self, youtube_input: str, **kwargs) -> Dict[str, Any]:
29
+ """
30
+ Process YouTube content with cookie authentication
31
+ """
32
+ try:
33
+ # Extract video ID from URL or use as-is
34
+ video_id = self._extract_video_id(youtube_input)
35
+ if not video_id:
36
+ return self._error_result("Invalid YouTube URL or video ID")
37
+
38
+ print(f"🎬 Processing YouTube video: {video_id}")
39
+
40
+ # Try multiple extraction methods
41
+ video_data = self._extract_with_cookies(video_id) or self._extract_with_pytube(video_id)
42
+
43
+ if not video_data:
44
+ return self._error_result("Could not extract video data")
45
+
46
+ # Update agent state
47
+ state = get_agent_state()
48
+ state.cached_data["youtube_analysis"] = video_data
49
+
50
+ return {
51
+ "success": True,
52
+ "data": video_data,
53
+ "summary": f"YouTube video processed: {video_data.get('title', 'Unknown')[:50]}..."
54
+ }
55
+
56
+ except Exception as e:
57
+ error_msg = f"YouTube processing failed: {str(e)}"
58
+ print(f"❌ {error_msg}")
59
+ return self._error_result(error_msg)
60
 
61
+ def _extract_video_id(self, url_or_id: str) -> Optional[str]:
62
+ """Extract video ID from YouTube URL or return if already ID"""
63
+ if len(url_or_id) == 11 and url_or_id.isalnum():
64
+ return url_or_id
 
 
 
65
 
66
+ # Extract from various YouTube URL formats
67
+ patterns = [
68
+ r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
69
+ r'youtube\.com/.*[?&]v=([a-zA-Z0-9_-]{11})',
70
+ ]
 
 
 
71
 
72
+ for pattern in patterns:
73
+ match = re.search(pattern, url_or_id)
74
+ if match:
75
+ return match.group(1)
 
 
76
 
77
+ return None
78
+
79
+ def _extract_with_cookies(self, video_id: str) -> Optional[Dict[str, Any]]:
80
+ """Extract using yt-dlp with cookies for better success rate"""
81
+ try:
82
+ ydl_opts = {
83
+ 'quiet': True,
84
+ 'no_warnings': True,
85
+ 'extractaudio': False,
86
+ 'extract_flat': False,
87
+ }
88
 
89
+ # Add cookies if file exists
90
+ if os.path.exists(self.cookies_path):
91
+ ydl_opts['cookiefile'] = self.cookies_path
92
+ print(f"πŸͺ Using cookies from: {self.cookies_path}")
93
 
94
+ url = f"https://www.youtube.com/watch?v={video_id}"
 
95
 
96
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
97
+ info = ydl.extract_info(url, download=False)
98
+
99
+ # Extract transcript using youtube-transcript-api
100
+ transcript = self._get_transcript(video_id)
101
+
102
+ return {
103
+ "video_id": video_id,
104
+ "title": info.get('title', ''),
105
+ "description": info.get('description', ''),
106
+ "channel": info.get('uploader', ''),
107
+ "duration": info.get('duration', 0),
108
+ "view_count": info.get('view_count', 0),
109
+ "transcript": transcript,
110
+ "thumbnail_url": info.get('thumbnail', ''),
111
+ "upload_date": info.get('upload_date', ''),
112
+ "url": url,
113
+ "extraction_method": "yt-dlp_with_cookies"
114
+ }
115
+
116
+ except Exception as e:
117
+ print(f"⚠️ yt-dlp extraction failed: {str(e)}")
118
+ return None
119
+
120
+ def _extract_with_pytube(self, video_id: str) -> Optional[Dict[str, Any]]:
121
+ """Fallback extraction using pytube"""
122
+ try:
123
+ url = f"https://www.youtube.com/watch?v={video_id}"
124
+ yt = YouTube(url)
125
 
126
+ transcript = self._get_transcript(video_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  return {
129
+ "video_id": video_id,
130
+ "title": yt.title or '',
131
+ "description": yt.description or '',
132
+ "channel": yt.author or '',
133
+ "duration": yt.length or 0,
134
+ "view_count": yt.views or 0,
135
+ "transcript": transcript,
136
+ "thumbnail_url": yt.thumbnail_url or '',
137
+ "upload_date": str(yt.publish_date) if yt.publish_date else '',
138
+ "url": url,
139
+ "extraction_method": "pytube_fallback"
140
  }
141
 
142
+ except Exception as e:
143
+ print(f"⚠️ PyTube extraction failed: {str(e)}")
144
+ return None
145
+
146
+ def _get_transcript(self, video_id: str) -> str:
147
+ """Get video transcript using youtube-transcript-api"""
148
+ try:
149
+ # Try to get transcript in multiple languages
150
+ languages = ['en', 'en-US', 'auto', 'vi']
151
+
152
+ for lang in languages:
153
+ try:
154
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
155
+ transcript_text = ' '.join([entry['text'] for entry in transcript_list])
156
+ if transcript_text.strip():
157
+ return transcript_text
158
+ except:
159
+ continue
160
+
161
+ # If no manual transcript, try auto-generated
162
+ try:
163
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
164
+ return ' '.join([entry['text'] for entry in transcript_list])
165
+ except:
166
+ return "No transcript available"
167
+
168
+ except Exception as e:
169
+ print(f"⚠️ Transcript extraction failed: {str(e)}")
170
+ return "Transcript extraction failed"
171
+
172
+ def is_youtube_url(self, text: str) -> bool:
173
+ """Check if text contains YouTube URL"""
174
+ youtube_patterns = [
175
+ r'youtube\.com/watch\?v=',
176
+ r'youtu\.be/',
177
+ r'youtube\.com/embed/',
178
+ r'youtube\.com/.*[?&]v='
179
+ ]
180
+ return any(re.search(pattern, text, re.IGNORECASE) for pattern in youtube_patterns)
181
+
182
+ def _error_result(self, error_msg: str) -> Dict[str, Any]:
183
+ """Standard error result format"""
184
  return {
185
+ "success": False,
186
+ "error": error_msg,
187
+ "data": None,
188
+ "summary": f"YouTube processing failed: {error_msg}"
189
+ }