Spaces:
Runtime error
Runtime error
| import os | |
| from benchmark.run_benchmark import run_benchmark | |
| def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key): | |
| """Run the benchmark using the provided function and API key.""" | |
| # Empyt the results directory | |
| os.system("rm -rf results/*") | |
| # Set the benchmark parameters | |
| kwargs = {} | |
| if not num_questions: | |
| num_questions = 10 | |
| kwargs["num_questions"] = num_questions | |
| kwargs["tools"] = [tool_name] | |
| if model_name: | |
| kwargs["model"] = [model_name] | |
| kwargs["api_keys"] = {} | |
| if openai_api_key: | |
| kwargs["api_keys"]["openai"] = openai_api_key | |
| if anthropic_api_key: | |
| kwargs["api_keys"]["anthropic"] = anthropic_api_key | |
| if openrouter_api_key: | |
| kwargs["api_keys"]["openrouter"] = openrouter_api_key | |
| if "gpt" in model_name: | |
| kwargs["llm_provider"] = "openai" | |
| elif "claude" in model_name: | |
| kwargs["llm_provider"] = "anthropic" | |
| else: | |
| kwargs["llm_provider"] = "openrouter" | |
| if tool_name == "prediction-request-reasoning" or tool_name == "prediction-request-rag": | |
| if not openai_api_key: | |
| return f"Error: Tools that use RAG also require an OpenAI API Key" | |
| kwargs["num_urls"] = 3 | |
| kwargs["num_words"] = 300 | |
| kwargs["provide_source_links"] = True | |
| print(f"Running benchmark") | |
| # Run the benchmark | |
| try: | |
| run_benchmark(kwargs=kwargs) | |
| return "completed" | |
| except Exception as e: | |
| return f"Error running benchmark: {e}" | |