Spaces:
Running
Running
Signed-off-by: Jonathan Bnayahu <bnayahu@il.ibm.com>
- app.py +6 -5
- src/display/utils.py +1 -14
app.py
CHANGED
|
@@ -30,11 +30,12 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
|
|
| 30 |
def init_leaderboard(dataframe):
|
| 31 |
if dataframe is None or dataframe.empty:
|
| 32 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
|
| 33 |
return Leaderboard(
|
| 34 |
value=dataframe,
|
| 35 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
| 36 |
search_columns=[AutoEvalColumn.model.name],
|
| 37 |
-
interactive=False
|
| 38 |
)
|
| 39 |
|
| 40 |
def download_csv():
|
|
@@ -42,15 +43,15 @@ def download_csv():
|
|
| 42 |
LEADERBOARD_DF.to_csv(buffer, index=False)
|
| 43 |
return buffer.getvalue()
|
| 44 |
|
| 45 |
-
|
| 46 |
-
with
|
| 47 |
gr.HTML(TITLE_IMAGE)
|
| 48 |
gr.HTML(TITLE)
|
| 49 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 50 |
|
| 51 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 52 |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 53 |
-
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 54 |
|
| 55 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 56 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
@@ -82,4 +83,4 @@ with demo:
|
|
| 82 |
scheduler = BackgroundScheduler()
|
| 83 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 84 |
scheduler.start()
|
| 85 |
-
|
|
|
|
| 30 |
def init_leaderboard(dataframe):
|
| 31 |
if dataframe is None or dataframe.empty:
|
| 32 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 33 |
+
|
| 34 |
return Leaderboard(
|
| 35 |
value=dataframe,
|
| 36 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
| 37 |
search_columns=[AutoEvalColumn.model.name],
|
| 38 |
+
interactive=False
|
| 39 |
)
|
| 40 |
|
| 41 |
def download_csv():
|
|
|
|
| 43 |
LEADERBOARD_DF.to_csv(buffer, index=False)
|
| 44 |
return buffer.getvalue()
|
| 45 |
|
| 46 |
+
gui = gr.Blocks(css=custom_css)
|
| 47 |
+
with gui:
|
| 48 |
gr.HTML(TITLE_IMAGE)
|
| 49 |
gr.HTML(TITLE)
|
| 50 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 51 |
|
| 52 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 53 |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 54 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF.style.highlight_max(color = 'lightgreen', axis=0).data)
|
| 55 |
|
| 56 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 57 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 83 |
scheduler = BackgroundScheduler()
|
| 84 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 85 |
scheduler.start()
|
| 86 |
+
gui.queue(default_concurrency_limit=40).launch()
|
src/display/utils.py
CHANGED
|
@@ -25,23 +25,13 @@ auto_eval_column_dict = []
|
|
| 25 |
# Init
|
| 26 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 27 |
#Scores
|
| 28 |
-
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average
|
| 29 |
for task in Tasks:
|
| 30 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 31 |
|
| 32 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 33 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
| 34 |
|
| 35 |
-
## For the queue columns in the submission tab
|
| 36 |
-
@dataclass(frozen=True)
|
| 37 |
-
class EvalQueueColumn: # Queue column
|
| 38 |
-
model = ColumnContent("model", "markdown", True)
|
| 39 |
-
revision = ColumnContent("revision", "str", True)
|
| 40 |
-
private = ColumnContent("private", "bool", True)
|
| 41 |
-
precision = ColumnContent("precision", "str", True)
|
| 42 |
-
weight_type = ColumnContent("weight_type", "str", "Original")
|
| 43 |
-
status = ColumnContent("status", "str", True)
|
| 44 |
-
|
| 45 |
## All the model information that we might need
|
| 46 |
@dataclass
|
| 47 |
class ModelDetails:
|
|
@@ -52,8 +42,5 @@ class ModelDetails:
|
|
| 52 |
# Column selection
|
| 53 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 54 |
|
| 55 |
-
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
| 56 |
-
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
| 57 |
-
|
| 58 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
| 59 |
|
|
|
|
| 25 |
# Init
|
| 26 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 27 |
#Scores
|
| 28 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
| 29 |
for task in Tasks:
|
| 30 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 31 |
|
| 32 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 33 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
## All the model information that we might need
|
| 36 |
@dataclass
|
| 37 |
class ModelDetails:
|
|
|
|
| 42 |
# Column selection
|
| 43 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
| 46 |
|