Spaces:
Sleeping
Sleeping
File size: 5,911 Bytes
6e2102e ca2f11f 6e2102e ca2f11f 6e2102e ca2f11f 6e2102e ca2f11f 6e2102e ca2f11f 6e2102e ca2f11f 6e2102e ca2f11f 6e2102e b814215 6e2102e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import argparse
import gradio as gr
import pandas as pd
import json
from constants import *
from datetime import datetime
from utils_display import model_info
from constants import column_names
import pytz
from data_utils import post_processing
LAST_UPDATED = None
INTRO_MD = ""
with open("_about_us.md", "r") as f:
ABOUT_MD = f.read()
with open("_header.md", "r") as f:
HEADER_MD = f.read()
raw_data = None
original_df = None
raw_puzzle_data = None
puzzle_df = None
available_models = list(model_info.keys())
def _gstr(text):
return gr.Text(text, visible=False)
def _tab_leaderboard():
global original_df
df =original_df.copy()
df.insert(0, "#", range(1, 1 + len(df)))
if "Open Source" in df.columns:
df["Open Source"] = df["Open Source"].apply(lambda x: "โ
" if x else "โ")
leaderboard_table = gr.components.Dataframe(
value=df,
datatype=["number", "markdown", "bool", "number", "number", "number", "number"],
elem_id="leaderboard-table",
interactive=False,
visible=True,
column_widths=[50, 200, 100, 120, 120, 120, 130],
wrap=True,
height=800
)
return leaderboard_table
def _tab_leaderboard_puzzle():
global puzzle_df
df =puzzle_df.copy()
df.insert(0, "#", range(1, 1 + len(df)))
leaderboard_puzzle_table = gr.components.Dataframe(
value=df,
datatype=["number", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"],
elem_id="leaderboard-puzzle-table",
interactive=False,
visible=True,
column_widths=[50, 200, 150, 150, 150, 150, 150, 150, 150, 150, 150,150, 150],
wrap=True,
height=800
)
return leaderboard_puzzle_table
def _tab_submit():
markdown_text = """
Please create an issue on our [Github](https://github.com/ljcleo/hardcore-logic) repository to talk about your model. Then, we can test it for you and report the results here on the Leaderboard.
If you would like to do local testing, please read our code [here](https://github.com/ljcleo/hardcore-logic/tree/master/src/evaluator)
and apply for the access for the [HardcoreLogic](https://hf.co/dataset/?/?) that contains the truth solutions.
"""
gr.Markdown("## ๐ Evaluate your models\n\n" + markdown_text, elem_classes="markdown-text")
def build_demo():
global original_df, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo:
gr.HTML(BANNER, elem_id="banner")
# convert LAST_UPDATED to the PDT time
LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S")
header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED))
gr.Markdown(header_md_text, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# ๐
Leaderboard
with gr.TabItem("๐
Leaderboard", elem_id="od-benchmark-tab-table", id=0):
_tab_leaderboard()
# ๐ฏ Accuracy for each puzzl
with gr.TabItem("๐ฏ Accuracy for each puzzle",elem_id="od-benchmark-tab-table", id=1):
_tab_leaderboard_puzzle()
'''
# ๐ Evaluate your models
with gr.TabItem("๐ Evaluate your models", elem_id="od-benchmark-tab-table", id=3):
_tab_submit()
'''
'''
# ๐ฎ About Us
with gr.TabItem("๐ฎ About Us", elem_id="od-benchmark-tab-table", id=4):
gr.Markdown(ABOUT_MD, elem_classes="markdown-text")
'''
# ๐ Citation ๅบๅ
with gr.Accordion("๐ Citation", open=False):
gr.Textbox(
value=CITATION_TEXT,
lines=7,
label="Copy this BibTeX to cite us",
elem_id="citation-button",
show_copy_button=True
)
return demo
def data_load(result_file,puzzle_file):
global raw_data, original_df, raw_puzzle_data, puzzle_df
print(f"Loading {result_file}")
column_names_main = column_names.copy()
column_puzzle_main = column_names_puzzle.copy()
main_ordered_columns = ORDERED_COLUMN_NAMES
puzzle_main_ordered_columns =ORDERED_COLUMN_NAMES_PUZZLE
click_url = True
with open(result_file, "r") as f:
raw_data = json.load(f)
for d in raw_data:
for k, v in d.items():
try:
d[k] = float(v)
except:
pass
with open(puzzle_file, "r") as f:
raw_puzzle_data = json.load(f)
for d in raw_puzzle_data:
for k, v in d.items():
try:
d[k] = float(v)
except:
pass
original_df = pd.DataFrame(raw_data)
original_df = post_processing(original_df, column_names_main, ordered_columns=main_ordered_columns, click_url=click_url, rank_column=RANKING_COLUMN)
puzzle_df = pd.DataFrame(raw_puzzle_data)
puzzle_df = post_processing(puzzle_df, column_puzzle_main, ordered_columns=puzzle_main_ordered_columns, click_url=click_url, rank_column=RANKING_COLUMN)
print(f"original_df.columns: {original_df.columns}")
print(f"puzzle_df.columns: {puzzle_df.columns}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--share", action="store_true")
parser.add_argument("--result_file", help="Path to results table", default="hardcorelogic.summary.json")
parser.add_argument("--puzzle_file", help="Path to results(puzzle) table", default="hardcorelogic.puzzle.json")
args = parser.parse_args()
data_load(args.result_file,args.puzzle_file)
print(original_df)
demo = build_demo()
demo.launch(share=args.share, height=3000, width="100%")
|