Spaces:
Sleeping
Sleeping
Alex
commited on
Commit
·
ffff7f4
1
Parent(s):
c762a51
error
Browse files
app.py
CHANGED
|
@@ -70,20 +70,50 @@ def _flatten_entry(entry: Dict) -> Dict:
|
|
| 70 |
"Pass@1": entry["llm_pass_1"],
|
| 71 |
"Pass@5": entry["llm_pass_5"],
|
| 72 |
"Pass@10": entry["llm_pass_10"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
}
|
| 74 |
-
for metric_name, score in entry["metrics"].items():
|
| 75 |
-
flat[metric_name.replace("_", " ").title()] = score
|
| 76 |
return flat
|
| 77 |
|
| 78 |
|
| 79 |
-
def _table_data() -> List[
|
| 80 |
data = _load_leaderboard()
|
| 81 |
if not data:
|
| 82 |
# Return empty list if no data
|
| 83 |
return []
|
| 84 |
# Sort descending by pass@1 as requested
|
| 85 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
# --------------- Gradio callbacks ---------------
|
|
@@ -127,7 +157,7 @@ def submit_model(
|
|
| 127 |
},
|
| 128 |
)
|
| 129 |
except Exception as e:
|
| 130 |
-
return
|
| 131 |
|
| 132 |
data = _load_leaderboard()
|
| 133 |
# Replace existing model entry if any
|
|
@@ -135,7 +165,7 @@ def submit_model(
|
|
| 135 |
data.append(entry.dict())
|
| 136 |
_save_leaderboard(data)
|
| 137 |
|
| 138 |
-
return
|
| 139 |
|
| 140 |
|
| 141 |
# --------------- Interface ---------------
|
|
|
|
| 70 |
"Pass@1": entry["llm_pass_1"],
|
| 71 |
"Pass@5": entry["llm_pass_5"],
|
| 72 |
"Pass@10": entry["llm_pass_10"],
|
| 73 |
+
"Readability": entry["metrics"]["readability"],
|
| 74 |
+
"Relevance": entry["metrics"]["relevance"],
|
| 75 |
+
"Explanation Clarity": entry["metrics"]["explanation_clarity"],
|
| 76 |
+
"Problem Identification": entry["metrics"]["problem_identification"],
|
| 77 |
+
"Actionability": entry["metrics"]["actionability"],
|
| 78 |
+
"Completeness": entry["metrics"]["completeness"],
|
| 79 |
+
"Specificity": entry["metrics"]["specificity"],
|
| 80 |
+
"Contextual Adequacy": entry["metrics"]["contextual_adequacy"],
|
| 81 |
+
"Consistency": entry["metrics"]["consistency"],
|
| 82 |
+
"Brevity": entry["metrics"]["brevity"],
|
| 83 |
}
|
|
|
|
|
|
|
| 84 |
return flat
|
| 85 |
|
| 86 |
|
| 87 |
+
def _table_data() -> List[List]:
|
| 88 |
data = _load_leaderboard()
|
| 89 |
if not data:
|
| 90 |
# Return empty list if no data
|
| 91 |
return []
|
| 92 |
# Sort descending by pass@1 as requested
|
| 93 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
| 94 |
+
|
| 95 |
+
# Convert to list of lists for Gradio table
|
| 96 |
+
table_rows = []
|
| 97 |
+
for entry in data:
|
| 98 |
+
row = [
|
| 99 |
+
entry["model_name"],
|
| 100 |
+
entry["bleu"],
|
| 101 |
+
entry["llm_pass_1"],
|
| 102 |
+
entry["llm_pass_5"],
|
| 103 |
+
entry["llm_pass_10"],
|
| 104 |
+
entry["metrics"]["readability"],
|
| 105 |
+
entry["metrics"]["relevance"],
|
| 106 |
+
entry["metrics"]["explanation_clarity"],
|
| 107 |
+
entry["metrics"]["problem_identification"],
|
| 108 |
+
entry["metrics"]["actionability"],
|
| 109 |
+
entry["metrics"]["completeness"],
|
| 110 |
+
entry["metrics"]["specificity"],
|
| 111 |
+
entry["metrics"]["contextual_adequacy"],
|
| 112 |
+
entry["metrics"]["consistency"],
|
| 113 |
+
entry["metrics"]["brevity"],
|
| 114 |
+
]
|
| 115 |
+
table_rows.append(row)
|
| 116 |
+
return table_rows
|
| 117 |
|
| 118 |
|
| 119 |
# --------------- Gradio callbacks ---------------
|
|
|
|
| 157 |
},
|
| 158 |
)
|
| 159 |
except Exception as e:
|
| 160 |
+
return _table_data(), f"❌ Submission failed: {e}"
|
| 161 |
|
| 162 |
data = _load_leaderboard()
|
| 163 |
# Replace existing model entry if any
|
|
|
|
| 165 |
data.append(entry.dict())
|
| 166 |
_save_leaderboard(data)
|
| 167 |
|
| 168 |
+
return _table_data(), "✅ Submission recorded!"
|
| 169 |
|
| 170 |
|
| 171 |
# --------------- Interface ---------------
|