|
|
import os |
|
|
import gradio as gr |
|
|
import requests |
|
|
import pandas as pd |
|
|
from smolagents import ToolCallingAgent, tool |
|
|
from duckduckgo_search import DDGS |
|
|
import math |
|
|
import re |
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
@tool |
|
|
def web_search(query: str) -> str: |
|
|
"""Performs a web search using DuckDuckGo. |
|
|
|
|
|
Args: |
|
|
query: The search query string. |
|
|
|
|
|
Returns: |
|
|
A formatted string with search results. |
|
|
""" |
|
|
try: |
|
|
with DDGS() as ddgs: |
|
|
results = ddgs.text(query, max_results=3) |
|
|
return "\n\n".join( |
|
|
f"Title: {r['title']}\nContent: {r['body']}\nURL: {r['href']}" |
|
|
for r in results |
|
|
) if results else "No results found." |
|
|
except Exception as e: |
|
|
return f"Search error: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def calculate(expression: str) -> str: |
|
|
"""Evaluates mathematical expressions. |
|
|
|
|
|
Args: |
|
|
expression: The math expression to evaluate. |
|
|
|
|
|
Returns: |
|
|
The result as a string or error message. |
|
|
""" |
|
|
try: |
|
|
|
|
|
safe_dict = {k: v for k, v in math.__dict__.items() |
|
|
if not k.startswith("__")} |
|
|
safe_dict.update({ |
|
|
'__builtins__': None, |
|
|
'abs': abs, |
|
|
'round': round |
|
|
}) |
|
|
result = eval(expression, {'__builtins__': None}, safe_dict) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Calculation error: {str(e)}" |
|
|
|
|
|
|
|
|
class GAIAAgent: |
|
|
def __init__(self): |
|
|
"""Initialize the agent with proper error handling.""" |
|
|
try: |
|
|
self.agent = ToolCallingAgent( |
|
|
name="GAIA_Submission_Agent", |
|
|
description="Agent for GAIA benchmark tasks", |
|
|
tools=[web_search, calculate], |
|
|
model="gpt-3.5-turbo", |
|
|
planning_interval=3 |
|
|
) |
|
|
print("✅ Agent initialized successfully") |
|
|
except Exception as e: |
|
|
print(f"❌ Agent initialization failed: {str(e)}") |
|
|
raise |
|
|
|
|
|
def __call__(self, question: str) -> str: |
|
|
"""Process a question with proper error handling.""" |
|
|
try: |
|
|
if not question or not isinstance(question, str): |
|
|
return "Invalid question format" |
|
|
|
|
|
|
|
|
response = self.agent.run(question) |
|
|
|
|
|
|
|
|
if response is None: |
|
|
return "No response generated" |
|
|
return str(response) |
|
|
except Exception as e: |
|
|
print(f"⚠️ Processing error: {str(e)}") |
|
|
return f"Error processing question: {str(e)}" |
|
|
|
|
|
|
|
|
def submit_answers(profile: gr.OAuthProfile | None): |
|
|
if not profile: |
|
|
return "Please login to Hugging Face", None |
|
|
|
|
|
try: |
|
|
agent = GAIAAgent() |
|
|
response = requests.get( |
|
|
"https://agents-course-unit4-scoring.hf.space/questions", |
|
|
timeout=20 |
|
|
) |
|
|
questions = response.json() |
|
|
|
|
|
if not questions: |
|
|
return "No questions received", None |
|
|
|
|
|
answers = [] |
|
|
results = [] |
|
|
|
|
|
for item in questions[:15]: |
|
|
task_id = item.get("task_id") |
|
|
question = item.get("question") |
|
|
|
|
|
if not task_id or not question: |
|
|
continue |
|
|
|
|
|
answer = agent(question) |
|
|
answers.append({ |
|
|
"task_id": task_id, |
|
|
"submitted_answer": answer[:1000] |
|
|
}) |
|
|
results.append({ |
|
|
"Task ID": task_id, |
|
|
"Question": question[:100], |
|
|
"Answer": answer[:200] |
|
|
}) |
|
|
|
|
|
|
|
|
submit_response = requests.post( |
|
|
"https://agents-course-unit4-scoring.hf.space/submit", |
|
|
json={ |
|
|
"username": profile.username, |
|
|
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}", |
|
|
"answers": answers |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
data = submit_response.json() |
|
|
|
|
|
return ( |
|
|
f"Submitted {len(answers)} answers\n" |
|
|
f"Score: {data.get('score', 'N/A')}%\n" |
|
|
f"Correct: {data.get('correct_count', 0)}/{len(answers)}", |
|
|
pd.DataFrame(results) |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}", None |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# GAIA Submission Agent") |
|
|
gr.LoginButton() |
|
|
submit_btn = gr.Button("Run Evaluation", variant="primary") |
|
|
output = gr.Textbox(label="Results") |
|
|
table = gr.DataFrame(label="Details") |
|
|
|
|
|
submit_btn.click( |
|
|
fn=submit_answers, |
|
|
outputs=[output, table] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |