Spaces:
Sleeping
Sleeping
commit
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ def refresh_data():
|
|
| 16 |
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
| 17 |
|
| 18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
| 19 |
-
|
| 20 |
|
| 21 |
return global_output_armenian, unified_exam_chart(global_output_armenian, 'Average')
|
| 22 |
|
|
@@ -26,7 +26,7 @@ def main():
|
|
| 26 |
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
| 27 |
|
| 28 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
| 29 |
-
|
| 30 |
|
| 31 |
with gr.Blocks() as app:
|
| 32 |
with gr.Tabs():
|
|
@@ -40,17 +40,17 @@ def main():
|
|
| 40 |
table_output_armenian = gr.DataFrame(value=global_output_armenian)
|
| 41 |
plot_column_dropdown_unified_exam = gr.Dropdown(choices=['Average', 'Armenian language and literature', 'Armenian history', 'Mathematics'], value='Average', label='Select Column to Plot')
|
| 42 |
plot_output_armenian = gr.Plot(lambda column: unified_exam_chart(global_output_armenian, column), inputs=plot_column_dropdown_unified_exam)
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
with gr.TabItem("About"):
|
| 55 |
gr.Markdown("# About the Benchmark")
|
| 56 |
gr.Markdown(
|
|
@@ -112,9 +112,9 @@ def main():
|
|
| 112 |
refresh_button.click(
|
| 113 |
fn=refresh_data,
|
| 114 |
outputs=[table_output_armenian,
|
| 115 |
-
|
| 116 |
plot_output_armenian,
|
| 117 |
-
|
| 118 |
],
|
| 119 |
)
|
| 120 |
app.launch(share=True, debug=True)
|
|
|
|
| 16 |
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
| 17 |
|
| 18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
| 19 |
+
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
| 20 |
|
| 21 |
return global_output_armenian, unified_exam_chart(global_output_armenian, 'Average')
|
| 22 |
|
|
|
|
| 26 |
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
| 27 |
|
| 28 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
| 29 |
+
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
| 30 |
|
| 31 |
with gr.Blocks() as app:
|
| 32 |
with gr.Tabs():
|
|
|
|
| 40 |
table_output_armenian = gr.DataFrame(value=global_output_armenian)
|
| 41 |
plot_column_dropdown_unified_exam = gr.Dropdown(choices=['Average', 'Armenian language and literature', 'Armenian history', 'Mathematics'], value='Average', label='Select Column to Plot')
|
| 42 |
plot_output_armenian = gr.Plot(lambda column: unified_exam_chart(global_output_armenian, column), inputs=plot_column_dropdown_unified_exam)
|
| 43 |
+
with gr.TabItem("MMLU-Pro-Hy"):
|
| 44 |
+
gr.Markdown("# MMLU-Pro Translated to Armenian (MMLU-Pro-Hy)")
|
| 45 |
+
gr.Markdown(
|
| 46 |
+
"""
|
| 47 |
+
This benchmark contains results of various Language Models on the MMLU-Pro benchmark, translated into Armenian. MMLU-Pro is a massive multi-task test in MCQA format. The scores represent accuracy.
|
| 48 |
+
"""
|
| 49 |
+
)
|
| 50 |
+
table_output_mmlu = gr.DataFrame(value=global_output_mmlu)
|
| 51 |
+
subject_cols = ['Average','Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Philosophy', 'Physics', 'Psychology','Other']
|
| 52 |
+
plot_column_dropdown_mmlu = gr.Dropdown(choices=subject_cols, value='Average', label='Select Column to Plot')
|
| 53 |
+
plot_output_mmlu = gr.Plot(lambda column: mmlu_chart(global_output_mmlu, column), inputs=plot_column_dropdown_mmlu)
|
| 54 |
with gr.TabItem("About"):
|
| 55 |
gr.Markdown("# About the Benchmark")
|
| 56 |
gr.Markdown(
|
|
|
|
| 112 |
refresh_button.click(
|
| 113 |
fn=refresh_data,
|
| 114 |
outputs=[table_output_armenian,
|
| 115 |
+
table_output_mmlu,
|
| 116 |
plot_output_armenian,
|
| 117 |
+
plot_output_mmlu
|
| 118 |
],
|
| 119 |
)
|
| 120 |
app.launch(share=True, debug=True)
|