Spaces:

JunsWan
/

HardcoreLogic

Sleeping

App Files Files Community

JunsWan commited on 28 days ago

Commit

6e2102e

verified ·

1 Parent(s): 6205075

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -193

app.py CHANGED Viewed

@@ -1,204 +1,168 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
         interactive=False,
     )
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
                 elem_id="citation-button",
-                show_copy_button=True,
             )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

+import argparse
 import gradio as gr
 import pandas as pd
+import json
+from constants import *
+from datetime import datetime
+from utils_display import model_info
+from constants import column_names
+import pytz
+from data_utils import post_processing
+LAST_UPDATED = None
+INTRO_MD = ""
+with open("_about_us.md", "r") as f:
+    ABOUT_MD = f.read()
+with open("_header.md", "r") as f:
+    HEADER_MD = f.read()
+raw_data = None
+original_df = None
+raw_puzzle_data = None
+puzzle_df = None
+available_models = list(model_info.keys())
+def _gstr(text):
+    return gr.Text(text, visible=False)
+def _tab_leaderboard():
+    global original_df
+    df =original_df.copy()
+    df.insert(0, "#", range(1, 1 + len(df)))
+    if "Open Source" in df.columns:
+        df["Open Source"] = df["Open Source"].apply(lambda x: "✅" if x else "❌")
+    leaderboard_table = gr.components.Dataframe(
+        value=df,
+        datatype=["number", "markdown", "bool", "number", "number", "number", "number"],
+        elem_id="leaderboard-table",
+        interactive=False,
+        visible=True,
+        column_widths=[50, 200, 100, 120, 120, 120, 130],
+        wrap=True,
+        height=800
     )
+    return leaderboard_table
+def _tab_leaderboard_puzzle():
+    global puzzle_df
+    df =puzzle_df.copy()
+    df.insert(0, "#", range(1, 1 + len(df)))
+    leaderboard_puzzle_table = gr.components.Dataframe(
+        value=df,
+        datatype=["number", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"],
+        elem_id="leaderboard-puzzle-table",
         interactive=False,
+        visible=True,
+        column_widths=[50, 200, 150, 150, 150, 150, 150, 150, 150, 150, 150,150, 150],
+        wrap=True,
+        height=800
     )
+    return leaderboard_puzzle_table
+def _tab_submit():
+    markdown_text = """
+    Please create an issue on our [Github](https://github.com/ljcleo/hardcore-logic) repository to talk about your model. Then, we can test it for you and report the results here on the Leaderboard.
+    If you would like to do local testing, please read our code [here](https://github.com/ljcleo/hardcore-logic/tree/master/src/evaluator)
+    and apply for the access for the [HardcoreLogic](https://hf.co/dataset/?/?) that contains the truth solutions.
+    """
+    gr.Markdown("## 🚀 Evaluate your models\n\n" + markdown_text, elem_classes="markdown-text")
+def build_demo():
+    global original_df, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
+    with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo:
+        gr.HTML(BANNER, elem_id="banner")
+        # convert LAST_UPDATED to the PDT time
+        LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S")
+        header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED))
+        gr.Markdown(header_md_text, elem_classes="markdown-text")
+        with gr.Tabs(elem_classes="tab-buttons") as tabs:
+            # 🏅 Leaderboard
+            with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
+                _tab_leaderboard()
+            # 🎯 Accuracy for each puzzl
+            with gr.TabItem("🎯 Accuracy for each puzzle",elem_id="od-benchmark-tab-table", id=1):
+                _tab_leaderboard_puzzle()
+            '''
+            # 🚀 Evaluate your models
+            with gr.TabItem("🚀 Evaluate your models", elem_id="od-benchmark-tab-table", id=3):
+                _tab_submit()
+            '''
+            '''
+            # 📮 About Us
+            with gr.TabItem("📮 About Us", elem_id="od-benchmark-tab-table", id=4):
+                gr.Markdown(ABOUT_MD, elem_classes="markdown-text")
+            '''
+        # 📚 Citation 区域
+        with gr.Accordion("📚 Citation", open=False):
+            gr.Textbox(
+                value=CITATION_TEXT,
+                lines=7,
+                label="Copy this BibTeX to cite us",
                 elem_id="citation-button",
+                show_copy_button=True
             )
+    return demo
+def data_load(result_file,puzzle_file):
+    global raw_data, original_df, raw_puzzle_data, puzzle_df
+    print(f"Loading {result_file}")
+    column_names_main = column_names.copy()
+    column_puzzle_main = column_names_puzzle.copy()
+    main_ordered_columns = ORDERED_COLUMN_NAMES
+    puzzle_main_ordered_columns =ORDERED_COLUMN_NAMES_PUZZLE
+    click_url = True
+    with open(result_file, "r") as f:
+        raw_data = json.load(f)
+    for d in raw_data:
+        for k, v in d.items():
+            try:
+                d[k] = float(v)
+            except:
+                pass
+    with open(puzzle_file, "r") as f:
+        raw_puzzle_data = json.load(f)
+    for d in raw_puzzle_data:
+        for k, v in d.items():
+            try:
+                d[k] = float(v)
+            except:
+                pass
+    original_df = pd.DataFrame(raw_data)
+    original_df = post_processing(original_df, column_names_main, ordered_columns=main_ordered_columns, click_url=click_url, rank_column=RANKING_COLUMN)
+    puzzle_df = pd.DataFrame(raw_puzzle_data)
+    puzzle_df = post_processing(puzzle_df, column_puzzle_main, ordered_columns=puzzle_main_ordered_columns, click_url=click_url, rank_column=RANKING_COLUMN)
+    print(f"original_df.columns: {original_df.columns}")
+    print(f"puzzle_df.columns: {puzzle_df.columns}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--share", action="store_true")
+    parser.add_argument("--result_file", help="Path to results table", default="HardcoreLogic-Eval/results_dirs/hardcorelogic.summary.json")
+    parser.add_argument("--puzzle_file", help="Path to results(puzzle) table", default="HardcoreLogic-Eval/results_dirs/hardcorelogic.puzzle.json")
+    args = parser.parse_args()
+    data_load(args.result_file,args.puzzle_file)
+    print(original_df)
+    demo = build_demo()
+    demo.launch(share=args.share, height=3000, width="100%")