Spaces:
Runtime error
Runtime error
Commit
Β·
b871994
1
Parent(s):
c4e55cd
fix bug
Browse files- app.py +2 -9
- tabs/run_benchmark.py +4 -0
app.py
CHANGED
|
@@ -87,13 +87,6 @@ with demo:
|
|
| 87 |
with gr.TabItem("π Contribute"):
|
| 88 |
gr.Markdown(how_to_run)
|
| 89 |
|
| 90 |
-
def update_dropdown(tool):
|
| 91 |
-
if "claude" in tool:
|
| 92 |
-
return ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229"]
|
| 93 |
-
else:
|
| 94 |
-
return ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"]
|
| 95 |
-
|
| 96 |
-
|
| 97 |
# fourth tab - run the benchmark
|
| 98 |
with gr.TabItem("π₯ Run the Benchmark"):
|
| 99 |
with gr.Row():
|
|
@@ -106,14 +99,14 @@ with demo:
|
|
| 106 |
# "prediction-online-sme",
|
| 107 |
'prediction-request-rag',
|
| 108 |
'prediction-request-reasoning',
|
| 109 |
-
"prediction-url-cot-claude",
|
| 110 |
# "prediction-request-rag-cohere",
|
| 111 |
# "prediction-with-research-conservative",
|
| 112 |
# "prediction-with-research-bold",
|
| 113 |
], label="Tool Name", info="Choose the tool to run")
|
| 114 |
model_name = gr.Dropdown([
|
| 115 |
"gpt-3.5-turbo-0125",
|
| 116 |
-
"gpt-4-0125-preview"
|
| 117 |
"claude-3-haiku-20240307",
|
| 118 |
"claude-3-sonnet-20240229",
|
| 119 |
"claude-3-opus-20240229",
|
|
|
|
| 87 |
with gr.TabItem("π Contribute"):
|
| 88 |
gr.Markdown(how_to_run)
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# fourth tab - run the benchmark
|
| 91 |
with gr.TabItem("π₯ Run the Benchmark"):
|
| 92 |
with gr.Row():
|
|
|
|
| 99 |
# "prediction-online-sme",
|
| 100 |
'prediction-request-rag',
|
| 101 |
'prediction-request-reasoning',
|
| 102 |
+
# "prediction-url-cot-claude",
|
| 103 |
# "prediction-request-rag-cohere",
|
| 104 |
# "prediction-with-research-conservative",
|
| 105 |
# "prediction-with-research-bold",
|
| 106 |
], label="Tool Name", info="Choose the tool to run")
|
| 107 |
model_name = gr.Dropdown([
|
| 108 |
"gpt-3.5-turbo-0125",
|
| 109 |
+
"gpt-4-0125-preview",
|
| 110 |
"claude-3-haiku-20240307",
|
| 111 |
"claude-3-sonnet-20240229",
|
| 112 |
"claude-3-opus-20240229",
|
tabs/run_benchmark.py
CHANGED
|
@@ -30,6 +30,10 @@ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, ant
|
|
| 30 |
else:
|
| 31 |
kwargs["llm_provider"] = "openrouter"
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
kwargs["num_urls"] = 3
|
| 34 |
kwargs["num_words"] = 300
|
| 35 |
kwargs["provide_source_links"] = True
|
|
|
|
| 30 |
else:
|
| 31 |
kwargs["llm_provider"] = "openrouter"
|
| 32 |
|
| 33 |
+
if tool_name == "prediction-request-reasoning" or tool_name == "prediction-request-rag":
|
| 34 |
+
if not openai_api_key:
|
| 35 |
+
return f"Error: Tools that use RAG also require an OpenAI API Key"
|
| 36 |
+
|
| 37 |
kwargs["num_urls"] = 3
|
| 38 |
kwargs["num_words"] = 300
|
| 39 |
kwargs["provide_source_links"] = True
|