href

Running

alrope commited on Nov 26, 2024

Commit

ee71995

1 Parent(s): 91cb993

made changes

Files changed (2) hide show

app.py CHANGED Viewed

@@ -103,7 +103,7 @@ with gr.Blocks(css=custom_css) as app:
                 search_1 = gr.Textbox(label="Model Search (delimit with , )",
                                     #   placeholder="Model Search (delimit with , )",
                                       show_label=True)
-                category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True)
             with gr.Row():
                 # reference data
                 rewardbench_table_hidden = gr.Dataframe(

                 search_1 = gr.Textbox(label="Model Search (delimit with , )",
                                     #   placeholder="Model Search (delimit with , )",
                                       show_label=True)
+                category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True, elem_id="category_selector")
             with gr.Row():
                 # reference data
                 rewardbench_table_hidden = gr.Dataframe(

src/md.py CHANGED Viewed

@@ -24,6 +24,9 @@ For reproductability, we use greedy decoding for all model generation as default
 - **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
 - **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
 """
 # Get Pacific time zone (handles PST/PDT automatically)
@@ -33,7 +36,4 @@ current_time = datetime.now(pacific_tz).strftime("%H:%M %Z, %d %b %Y")
 TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
 [Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
-## Contact Us
-TODO
-"""

 - **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
 - **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
+## Contact Us
+TODO
+"""
 """
 # Get Pacific time zone (handles PST/PDT automatically)
 TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
 [Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}