|
|
import os |
|
|
import gradio as gr |
|
|
import requests |
|
|
import pandas as pd |
|
|
import math |
|
|
|
|
|
from smolagents import ToolCallingAgent, tool |
|
|
from duckduckgo_search import DDGS |
|
|
from openai import OpenAI |
|
|
|
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
|
def web_search(query: str) -> str: |
|
|
"""Search the web using DuckDuckGo. |
|
|
|
|
|
Args: |
|
|
query: The search query to look up. |
|
|
|
|
|
Returns: |
|
|
A summary of the top web results. |
|
|
""" |
|
|
try: |
|
|
with DDGS() as ddgs: |
|
|
results = ddgs.text(query, max_results=3) |
|
|
if not results: |
|
|
return "No results found." |
|
|
return "\n\n".join( |
|
|
f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" |
|
|
for r in results |
|
|
) |
|
|
except Exception as e: |
|
|
return f"Search error: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def calculate(expression: str) -> str: |
|
|
"""Evaluate a mathematical expression. |
|
|
|
|
|
Args: |
|
|
expression: The math expression to evaluate (e.g. '2 + 3 * 5'). |
|
|
|
|
|
Returns: |
|
|
Result of the calculation. |
|
|
""" |
|
|
try: |
|
|
safe_math = {k: v for k, v in math.__dict__.items() if not k.startswith("__")} |
|
|
result = eval(expression, {"__builtins__": None}, safe_math) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Calculation error: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GAIAAgent: |
|
|
def __init__(self): |
|
|
self.agent = ToolCallingAgent( |
|
|
name="GAIA Agent", |
|
|
description="""You are an AI assistant that answers questions using tools: |
|
|
- Use 'web_search' for looking up facts and recent information. |
|
|
- Use 'calculate' for evaluating math expressions. |
|
|
Be accurate and concise.""", |
|
|
tools=[web_search, calculate], |
|
|
model=client.chat.completions |
|
|
) |
|
|
|
|
|
def __call__(self, question: str) -> str: |
|
|
try: |
|
|
response = self.agent.run(question) |
|
|
return str(response) |
|
|
except Exception as e: |
|
|
return f"Agent error: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_agent_and_submit(profile: gr.OAuthProfile | None): |
|
|
if not profile: |
|
|
return "β οΈ Please log in to Hugging Face.", None |
|
|
|
|
|
try: |
|
|
agent = GAIAAgent() |
|
|
response = requests.get("https://agents-course-unit4-scoring.hf.space/questions", timeout=20) |
|
|
questions = response.json() |
|
|
except Exception as e: |
|
|
return f"β Error fetching questions: {e}", None |
|
|
|
|
|
results = [] |
|
|
answers = [] |
|
|
|
|
|
for q in questions: |
|
|
task_id = q.get("task_id") |
|
|
question_text = q.get("question") |
|
|
if not task_id or not question_text: |
|
|
continue |
|
|
try: |
|
|
answer = agent(question_text) |
|
|
except Exception as e: |
|
|
answer = f"Agent error: {e}" |
|
|
|
|
|
answers.append({ |
|
|
"task_id": task_id, |
|
|
"submitted_answer": answer[:1000] |
|
|
}) |
|
|
results.append({ |
|
|
"Task ID": task_id, |
|
|
"Question": question_text, |
|
|
"Answer": answer |
|
|
}) |
|
|
|
|
|
|
|
|
try: |
|
|
submit_url = "https://agents-course-unit4-scoring.hf.space/submit" |
|
|
payload = { |
|
|
"username": profile.username, |
|
|
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main", |
|
|
"answers": answers |
|
|
} |
|
|
submit_resp = requests.post(submit_url, json=payload, timeout=60) |
|
|
result_data = submit_resp.json() |
|
|
summary = ( |
|
|
f"β
Submitted {len(answers)} answers\n" |
|
|
f"π Score: {result_data.get('score', 'N/A')}%\n" |
|
|
f"βοΈ Correct: {result_data.get('correct_count', '?')}/{len(answers)}" |
|
|
) |
|
|
except Exception as e: |
|
|
summary = f"β Submission error: {e}" |
|
|
|
|
|
return summary, pd.DataFrame(results) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# π€ GAIA Tool Agent") |
|
|
gr.Markdown("This agent answers GAIA benchmark questions using tool-calling with search and math.") |
|
|
gr.LoginButton() |
|
|
run_btn = gr.Button("π Run Agent & Submit") |
|
|
status = gr.Textbox(label="Status", lines=4) |
|
|
results_df = gr.DataFrame(label="Results") |
|
|
|
|
|
run_btn.click(fn=run_agent_and_submit, outputs=[status, results_df]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|