Spaces:
Sleeping
Sleeping
Alex
commited on
Commit
·
9d40219
1
Parent(s):
d369cff
error
Browse files
app.py
CHANGED
|
@@ -87,12 +87,9 @@ def _flatten_entry(entry: Dict) -> Dict:
|
|
| 87 |
def _table_data() -> List[List]:
|
| 88 |
data = _load_leaderboard()
|
| 89 |
if not data:
|
| 90 |
-
# Return empty list if no data
|
| 91 |
return []
|
| 92 |
-
# Sort descending by pass@1 as requested
|
| 93 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
| 94 |
|
| 95 |
-
# Convert to list of lists for Gradio table
|
| 96 |
table_rows = []
|
| 97 |
for entry in data:
|
| 98 |
row = [
|
|
@@ -101,6 +98,21 @@ def _table_data() -> List[List]:
|
|
| 101 |
entry["llm_pass_1"],
|
| 102 |
entry["llm_pass_5"],
|
| 103 |
entry["llm_pass_10"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
entry["metrics"]["readability"],
|
| 105 |
entry["metrics"]["relevance"],
|
| 106 |
entry["metrics"]["explanation_clarity"],
|
|
@@ -157,7 +169,7 @@ def submit_model(
|
|
| 157 |
},
|
| 158 |
)
|
| 159 |
except Exception as e:
|
| 160 |
-
return _table_data(), f"❌ Submission failed: {e}"
|
| 161 |
|
| 162 |
data = _load_leaderboard()
|
| 163 |
# Replace existing model entry if any
|
|
@@ -165,7 +177,7 @@ def submit_model(
|
|
| 165 |
data.append(entry.dict())
|
| 166 |
_save_leaderboard(data)
|
| 167 |
|
| 168 |
-
return _table_data(), "✅ Submission recorded!"
|
| 169 |
|
| 170 |
|
| 171 |
# --------------- Interface ---------------
|
|
@@ -202,11 +214,19 @@ with gr.Blocks(title="CodeReview Leaderboard") as demo:
|
|
| 202 |
|
| 203 |
# Initialize table data
|
| 204 |
initial_data = _table_data()
|
|
|
|
| 205 |
|
| 206 |
leaderboard_df = gr.Dataframe(
|
| 207 |
-
headers=["Model", "BLEU", "Pass@1", "Pass@5", "Pass@10"
|
| 208 |
value=initial_data,
|
| 209 |
-
label="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
interactive=False,
|
| 211 |
)
|
| 212 |
|
|
@@ -255,7 +275,7 @@ with gr.Blocks(title="CodeReview Leaderboard") as demo:
|
|
| 255 |
consistency_inp,
|
| 256 |
brevity_inp,
|
| 257 |
],
|
| 258 |
-
outputs=[leaderboard_df, status_markdown],
|
| 259 |
api_name="submit_model",
|
| 260 |
)
|
| 261 |
|
|
|
|
| 87 |
def _table_data() -> List[List]:
|
| 88 |
data = _load_leaderboard()
|
| 89 |
if not data:
|
|
|
|
| 90 |
return []
|
|
|
|
| 91 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
| 92 |
|
|
|
|
| 93 |
table_rows = []
|
| 94 |
for entry in data:
|
| 95 |
row = [
|
|
|
|
| 98 |
entry["llm_pass_1"],
|
| 99 |
entry["llm_pass_5"],
|
| 100 |
entry["llm_pass_10"],
|
| 101 |
+
]
|
| 102 |
+
table_rows.append(row)
|
| 103 |
+
return table_rows
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _multimetric_table_data() -> List[List]:
|
| 107 |
+
data = _load_leaderboard()
|
| 108 |
+
if not data:
|
| 109 |
+
return []
|
| 110 |
+
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
| 111 |
+
|
| 112 |
+
table_rows = []
|
| 113 |
+
for entry in data:
|
| 114 |
+
row = [
|
| 115 |
+
entry["model_name"],
|
| 116 |
entry["metrics"]["readability"],
|
| 117 |
entry["metrics"]["relevance"],
|
| 118 |
entry["metrics"]["explanation_clarity"],
|
|
|
|
| 169 |
},
|
| 170 |
)
|
| 171 |
except Exception as e:
|
| 172 |
+
return _table_data(), _multimetric_table_data(), f"❌ Submission failed: {e}"
|
| 173 |
|
| 174 |
data = _load_leaderboard()
|
| 175 |
# Replace existing model entry if any
|
|
|
|
| 177 |
data.append(entry.dict())
|
| 178 |
_save_leaderboard(data)
|
| 179 |
|
| 180 |
+
return _table_data(), _multimetric_table_data(), "✅ Submission recorded!"
|
| 181 |
|
| 182 |
|
| 183 |
# --------------- Interface ---------------
|
|
|
|
| 214 |
|
| 215 |
# Initialize table data
|
| 216 |
initial_data = _table_data()
|
| 217 |
+
initial_multimetric_data = _multimetric_table_data()
|
| 218 |
|
| 219 |
leaderboard_df = gr.Dataframe(
|
| 220 |
+
headers=["Model", "BLEU", "Pass@1", "Pass@5", "Pass@10"],
|
| 221 |
value=initial_data,
|
| 222 |
+
label="Main Metrics Leaderboard",
|
| 223 |
+
interactive=False,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
multimetric_df = gr.Dataframe(
|
| 227 |
+
headers=["Model", "Readability", "Relevance", "Explanation Clarity", "Problem Identification", "Actionability", "Completeness", "Specificity", "Contextual Adequacy", "Consistency", "Brevity"],
|
| 228 |
+
value=initial_multimetric_data,
|
| 229 |
+
label="Multi-Metric Scores",
|
| 230 |
interactive=False,
|
| 231 |
)
|
| 232 |
|
|
|
|
| 275 |
consistency_inp,
|
| 276 |
brevity_inp,
|
| 277 |
],
|
| 278 |
+
outputs=[leaderboard_df, multimetric_df, status_markdown],
|
| 279 |
api_name="submit_model",
|
| 280 |
)
|
| 281 |
|