Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
|
@@ -150,16 +150,18 @@ def calculate_order_by_first_substring(selected_models):
|
|
| 150 |
lambda x: x["parsed_judge_response"].eq(1).all()
|
| 151 |
)
|
| 152 |
|
| 153 |
-
|
| 154 |
|
| 155 |
text_only = all_data[all_data["Model Type"] == "Text Only"]
|
| 156 |
text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
|
| 157 |
|
|
|
|
| 158 |
text_only_filtered = (
|
| 159 |
text_only_filtered.groupby(["Model Name"])["parsed_judge_response"]
|
| 160 |
.mean()
|
| 161 |
.reset_index()
|
| 162 |
)
|
|
|
|
| 163 |
text_only_filtered["Accuracy"] = text_only_filtered["parsed_judge_response"] * 100
|
| 164 |
text_only_filtered.drop("parsed_judge_response", axis=1, inplace=True)
|
| 165 |
|
|
@@ -168,9 +170,6 @@ def calculate_order_by_first_substring(selected_models):
|
|
| 168 |
)
|
| 169 |
text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True)
|
| 170 |
|
| 171 |
-
query_ids = text_only_filtered.query_id.unique()
|
| 172 |
-
fsm_ids = text_only_filtered.fsm_id.unique()
|
| 173 |
-
|
| 174 |
number_of_queries = len(query_ids)
|
| 175 |
number_of_fsms = len(fsm_ids)
|
| 176 |
|
|
@@ -228,9 +227,9 @@ with gr.Blocks() as demo:
|
|
| 228 |
interactive=True,
|
| 229 |
)
|
| 230 |
with gr.Row():
|
| 231 |
-
number_of_queries = gr.Textbox(label="Number of queries
|
| 232 |
|
| 233 |
-
number_of_fsms = gr.Textbox(label="Number of FSMs
|
| 234 |
|
| 235 |
constrained_leader_board_text = gr.Dataframe()
|
| 236 |
|
|
|
|
| 150 |
lambda x: x["parsed_judge_response"].eq(1).all()
|
| 151 |
)
|
| 152 |
|
| 153 |
+
fsm_ids = query_ids_df.fsm_id.unique()
|
| 154 |
|
| 155 |
text_only = all_data[all_data["Model Type"] == "Text Only"]
|
| 156 |
text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
|
| 157 |
|
| 158 |
+
query_ids = text_only_filtered.query_id.unique()
|
| 159 |
text_only_filtered = (
|
| 160 |
text_only_filtered.groupby(["Model Name"])["parsed_judge_response"]
|
| 161 |
.mean()
|
| 162 |
.reset_index()
|
| 163 |
)
|
| 164 |
+
|
| 165 |
text_only_filtered["Accuracy"] = text_only_filtered["parsed_judge_response"] * 100
|
| 166 |
text_only_filtered.drop("parsed_judge_response", axis=1, inplace=True)
|
| 167 |
|
|
|
|
| 170 |
)
|
| 171 |
text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True)
|
| 172 |
|
|
|
|
|
|
|
|
|
|
| 173 |
number_of_queries = len(query_ids)
|
| 174 |
number_of_fsms = len(fsm_ids)
|
| 175 |
|
|
|
|
| 227 |
interactive=True,
|
| 228 |
)
|
| 229 |
with gr.Row():
|
| 230 |
+
number_of_queries = gr.Textbox(label="Number of included queries")
|
| 231 |
|
| 232 |
+
number_of_fsms = gr.Textbox(label="Number of included FSMs")
|
| 233 |
|
| 234 |
constrained_leader_board_text = gr.Dataframe()
|
| 235 |
|