Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| from smolagents import ( | |
| CodeAgent, | |
| DuckDuckGoSearchTool, | |
| OpenAIServerModel, | |
| ) | |
| import traceback # Import traceback for detailed error logging | |
| import subprocess | |
| class PythonREPLTool: | |
| name = "python_repl" | |
| description = "Runs Python code and returns the output or error." | |
| def __init__(self, timeout=10): | |
| self.timeout = timeout | |
| def run(self, code: str) -> str: | |
| try: | |
| result = subprocess.run( | |
| ["python3", "-c", code], | |
| timeout=self.timeout, | |
| ) | |
| if result.returncode == 0: | |
| return result.stdout.strip() | |
| else: | |
| return f"Error:\n{result.stderr.strip()}" | |
| except subprocess.TimeoutExpired: | |
| return "Execution timed out." | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| # --- Agent Definition --- | |
| class GaiaAgent: | |
| def __init__(self, openai_key: str): | |
| self.openai_key = openai_key | |
| # 1) Initialize the LLM-backed model | |
| self.model = OpenAIServerModel( | |
| model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer | |
| api_key=self.openai_key, | |
| system_prompt=( | |
| "You are a meticulous AI agent. " | |
| "Always think in Python code using the available tools. " | |
| "Never answer without executing or checking with a tool. " | |
| "Use DuckDuckGoSearchTool for factual lookups. " | |
| "Use PythonREPLTool for calculations, string manipulation, and logical deductions. " | |
| "Respond with the final answer only. Do not include any extra explanation. " | |
| "Here are some examples of how to use the tools:" | |
| "# Example 1: Calculate the square root of 16\n" | |
| "# ```python\n" | |
| "# print(16**0.5)\n" | |
| "# ```\n" | |
| "# Example 2: Search for the capital of France\n" | |
| "# ```python\n" | |
| "# print(DuckDuckGoSearchTool(query='capital of France'))\n" | |
| "# ```\n" | |
| "# Example 3: Reverse a string\n" | |
| "# ```python\n" | |
| "# print('hello'[::-1])\n" | |
| "# ```\n" | |
| ) | |
| ) | |
| # 2) Define the tools | |
| self.search_tool = DuckDuckGoSearchTool() | |
| self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool | |
| # 3) Create the CodeAgent | |
| self.agent = CodeAgent( | |
| model=self.model, | |
| tools=[self.search_tool, self.python_tool], | |
| # Encourage the agent to think step-by-step in code | |
| max_steps=20 | |
| ), | |
| def __call__(self, question: str) -> str: | |
| try: | |
| return self.agent.run(question) | |
| except Exception as e: | |
| error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}" | |
| print(error_message) # Log the error for debugging | |
| return "ERROR: Agent failed to answer." # Return a string, not an exception | |
| def run_and_submit_all(openai_key: str): | |
| # --- Login & Setup --- | |
| # if not profile: | |
| # return "Please log in to Hugging Face to submit your score.", None | |
| # username = profile.username.strip() | |
| username = "anonymous" | |
| # 1) Instantiate our improved agent | |
| try: | |
| agent = GaiaAgent(openai_key) | |
| except Exception as e: | |
| error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}" | |
| print(error_message) | |
| return f"Error initializing agent: {e}", None | |
| # 2) Fetch the GAIA questions | |
| questions_url = f"{DEFAULT_API_URL}/questions" | |
| try: | |
| resp = requests.get(questions_url, timeout=15) | |
| resp.raise_for_status() | |
| questions = resp.json() | |
| except Exception as e: | |
| error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}" | |
| print(error_message) | |
| return f"Error fetching questions: {e}", None | |
| # 3) Run the agent on each question | |
| answers = [] | |
| log = [] | |
| for item in questions: | |
| tid = item["task_id"] | |
| q = item["question"] | |
| try: | |
| ans = agent(q) | |
| except Exception as e: | |
| error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}" | |
| print(error_message) # Print full traceback | |
| ans = f"ERROR: {e}" | |
| answers.append({"task_id": tid, "submitted_answer": ans}) | |
| log.append({"Task ID": tid, "Question": q, "Answer": ans}) | |
| # 4) Submit | |
| submit_url = f"{DEFAULT_API_URL}/submit" | |
| payload = { | |
| "username": username, | |
| "agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main", | |
| "answers": answers, | |
| } | |
| try: | |
| res = requests.post(submit_url, json=payload, timeout=60) | |
| res.raise_for_status() | |
| data = res.json() | |
| status = ( | |
| f"✅ Submission Successful!\n" | |
| f"User: {data['username']}\n" | |
| f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n" | |
| f"Message: {data.get('message', '')}" | |
| ) | |
| except Exception as e: | |
| error_message = f"Submission failed: {e}\n{traceback.format_exc()}" | |
| print(error_message) | |
| status = f"Submission failed: {e}" | |
| return status, pd.DataFrame(log) | |
| # --- Gradio UI --- | |
| def run_test_questions(profile, openai_key, test_questions): | |
| if not profile: | |
| return "Please log in to Hugging Face to run the test questions.", None | |
| try: | |
| agent = GaiaAgent(openai_key) | |
| except Exception as e: | |
| error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}" | |
| print(error_message) | |
| return f"Error initializing agent: {e}", None | |
| log = [] | |
| for q in test_questions: | |
| try: | |
| ans = agent(q) | |
| except Exception as e: | |
| error_message = f"Error processing test question: {e}\n{traceback.format_exc()}" | |
| print(error_message) | |
| ans = f"ERROR: {e}" | |
| log.append({"Question": q, "Answer": ans}) | |
| return pd.DataFrame(log) | |
| with gr.Blocks() as demo: # Corrected to use gr.Blocks() | |
| gr.Markdown("# GAIA Benchmark Runner") | |
| gr.Markdown( | |
| "1. Clone this Space and customize your agent logic.\n" | |
| "2. Log in below (to get your HF username).\n" | |
| "3. Enter your OpenAI key (if needed).\n" | |
| "4. Click to run and submit to the leaderboard." | |
| ) | |
| login = gr.LoginButton() | |
| key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...") | |
| run_btn = gr.Button("Run & Submit") | |
| out_status = gr.Textbox(label="Status", lines=4) | |
| out_table = gr.DataFrame(label="Questions & Answers") | |
| test_questions_input = gr.Textbox( | |
| label="Test Questions (comma-separated)", | |
| placeholder="What is the capital of France?, What is the square root of 25?", | |
| ) | |
| run_test_btn = gr.Button("Run Test Questions") | |
| test_results_output = gr.DataFrame(label="Test Results") | |
| run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table]) | |
| run_test_btn.click( | |
| fn=run_test_questions, | |
| inputs=[login, key_in, test_questions_input], | |
| outputs=[test_results_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True, share=False) | |