made changes
Browse files
app.py
CHANGED
|
@@ -103,7 +103,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
| 103 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
| 104 |
# placeholder="Model Search (delimit with , )",
|
| 105 |
show_label=True)
|
| 106 |
-
category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True)
|
| 107 |
with gr.Row():
|
| 108 |
# reference data
|
| 109 |
rewardbench_table_hidden = gr.Dataframe(
|
|
|
|
| 103 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
| 104 |
# placeholder="Model Search (delimit with , )",
|
| 105 |
show_label=True)
|
| 106 |
+
category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True, elem_id="category_selector")
|
| 107 |
with gr.Row():
|
| 108 |
# reference data
|
| 109 |
rewardbench_table_hidden = gr.Dataframe(
|
src/md.py
CHANGED
|
@@ -24,6 +24,9 @@ For reproductability, we use greedy decoding for all model generation as default
|
|
| 24 |
- **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
|
| 25 |
- **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
"""
|
| 28 |
|
| 29 |
# Get Pacific time zone (handles PST/PDT automatically)
|
|
@@ -33,7 +36,4 @@ current_time = datetime.now(pacific_tz).strftime("%H:%M %Z, %d %b %Y")
|
|
| 33 |
TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
|
| 34 |
[Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
|
| 35 |
|
| 36 |
-
## Contact Us
|
| 37 |
-
TODO
|
| 38 |
-
"""
|
| 39 |
|
|
|
|
| 24 |
- **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
|
| 25 |
- **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
|
| 26 |
|
| 27 |
+
## Contact Us
|
| 28 |
+
TODO
|
| 29 |
+
"""
|
| 30 |
"""
|
| 31 |
|
| 32 |
# Get Pacific time zone (handles PST/PDT automatically)
|
|
|
|
| 36 |
TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
|
| 37 |
[Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
|