File size: 5,350 Bytes
10e9b7d
 
4c934c3
 
94d642e
 
 
4c934c3
d6f7c66
4c934c3
 
94d642e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c15dab
94d642e
 
 
 
 
 
 
 
 
 
 
4c934c3
94d642e
4c934c3
94d642e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c15dab
94d642e
 
 
 
 
 
 
 
 
 
4c15dab
94d642e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import ToolCallingAgent, tool
import duckduckgo_search
import math

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Tools ---
@tool
def duck_search(query: str) -> str:
    """Searches the web using DuckDuckGo and returns a short summary."""
    try:
        results = duckduckgo_search.ddg(query, max_results=3)
        if results:
            return "\n".join([f"{r['title']}: {r['body']}" for r in results])
        else:
            return "No results found."
    except Exception as e:
        return f"Search error: {e}"

@tool
def calculator(expression: str) -> str:
    """Safely evaluates basic math expressions."""
    try:
        result = eval(expression, {"__builtins__": {}}, math.__dict__)
        return str(result)
    except Exception as e:
        return f"Calculation error: {e}"

# --- Agent Definition ---
class WebSearchAgent:
    def __init__(self):
        self.agent = ToolCallingAgent(
            name="GAIAWebToolAgent",
            description="An agent that answers questions using reasoning and tools like web search and calculator.",
            tools=[duck_search, calculator],
            step_limit=5,
            system_prompt="You're a helpful agent tasked with answering general questions using reasoning and external tools if needed. Prioritize factual accuracy, logic, and concise answers."
        )
        print("βœ… WebSearchAgent initialized.")

    def __call__(self, question: str) -> str:
        print(f"πŸ” Agent received: {question}")
        try:
            return self.agent.run(question)
        except Exception as e:
            print(f"❌ Error: {e}")
            return f"Error: {e}"

# --- Main Evaluation Logic ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        return "Please login to Hugging Face first.", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        agent = WebSearchAgent()
    except Exception as e:
        return f"Agent init error: {e}", None

    try:
        print("πŸ“₯ Fetching questions...")
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "Fetched questions list is empty or invalid format.", None
        print(f"βœ… Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Error fetching questions: {e}", None

    answers_payload = []
    results_log = []
    print("πŸš€ Running agent on questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or not question_text:
            continue
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({
                "task_id": task_id,
                "submitted_answer": submitted_answer
            })
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": submitted_answer
            })
        except Exception as e:
            error_msg = f"Agent error: {e}"
            print(error_msg)
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": error_msg
            })

    if not answers_payload:
        return "No answers to submit.", pd.DataFrame(results_log)

    print("πŸ“€ Submitting answers...")
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result = response.json()
        final_status = (
            f"βœ… Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
            f"Message: {result.get('message', 'No message.')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission error: {e}", pd.DataFrame(results_log)

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 GAIA Agent with Web Search & Calculator")
    gr.Markdown("""
    1. Log in to Hugging Face.
    2. Click **Run Evaluation** to fetch, run, and submit.
    3. Your agent uses web search (DuckDuckGo) and math tools.
    """)
    gr.LoginButton()
    run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Status", lines=5)
    results_table = gr.DataFrame(label="Answer Log")

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    print("🌍 Launching App...")
    demo.launch(debug=True, share=False)