PROBE

Running

App Files Files Community

gyigit commited on Sep 30, 2024

Commit

e9f3238

2 Parent(s): 8babe71 1cc2077

Merge branch 'main' of https://huggingface.co/spaces/mgyigit/probe3

Browse files

Files changed (8) hide show

.gitattributes +35 -0
Makefile +13 -0
README.md +45 -0
app.py +129 -0
index.html +19 -0
pyproject.toml +13 -0
requirements.txt +19 -0
style.css +28 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Makefile ADDED Viewed

	@@ -0,0 +1,13 @@

+.PHONY: style format
+style:
+	python -m black --line-length 119 .
+	python -m isort .
+	ruff check --fix .
+quality:
+	python -m black --check --line-length 119 .
+	python -m isort --check-only .
+	ruff check .

README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+---
+title: PROBE
+emoji: 🥇
+colorFrom: green
+colorTo: indigo
+sdk: gradio
+app_file: app.py
+pinned: false
+license: gpl
+python_version: 3.8.1
+---
+# Start the configuration
+Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
+Results files should have the following format and be stored as json files:
+```json
+{
+    "config": {
+        "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
+        "model_name": "path of the model on the hub: org/model",
+        "model_sha": "revision on the hub",
+    },
+    "results": {
+        "task_name": {
+            "metric_name": score,
+        },
+        "task_name2": {
+            "metric_name": score,
+        }
+    }
+}
+```
+Request files are created automatically by this tool.
+If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
+# Code logic for more complex edits
+You'll find
+- the main table' columns names and properties in `src/display/utils.py`
+- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
+- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
+import gradio as gr
+import pandas as pd
+import re
+import pandas as pd
+import os
+import json
+from src.about import *
+global data_component, filter_component
+def get_baseline_df():
+    df = pd.read_csv(CSV_RESULT_PATH)
+    present_columns = ["Method"] + checkbox_group.value
+    df = df[present_columns]
+    return df
+def add_new_eval(
+    human_file,
+    skempi_file,
+    model_name_textbox: str,
+    revision_name_textbox: str,
+    benchmark_type: str,
+):
+    representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
+    print(representation_name)
+    # Save human and skempi files under ./src/data/representation_vectors using pandas
+    if human_file is not None:
+        human_df = pd.read_csv(human_file)
+        human_df.to_csv(f"./src/data/representation_vectors/{representation_name}_human.csv", index=False)
+    return None
+block = gr.Blocks()
+with block:
+    gr.Markdown(
+        LEADERBOARD_INTRODUCTION
+    )
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        # table jmmmu bench
+        with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
+            # selection for column part:
+            checkbox_group = gr.CheckboxGroup(
+                choices=TASK_INFO,
+                label="Benchmark Type",
+                interactive=True,
+            ) # user can select the evaluation dimension
+            baseline_value = get_baseline_df()
+            baseline_header = ["Method"] + checkbox_group.value
+            baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)
+            data_component = gr.components.Dataframe(
+                value=baseline_value,
+                headers=baseline_header,
+                type="pandas",
+                datatype=baseline_datatype,
+                interactive=False,
+                visible=True,
+                )
+        # table 5
+        with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
+            with gr.Row():
+                gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
+            with gr.Row():
+                gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(
+                        label="Model name",
+                        )
+                    revision_name_textbox = gr.Textbox(
+                        label="Revision Model Name",
+                    )
+                    # Selection for benchmark type from (similartiy, family, function, affinity) to eval the representations (chekbox)
+                    benchmark_type = gr.CheckboxGroup(
+                        choices=TASK_INFO,
+                        label="Benchmark Type",
+                        interactive=True,
+                    )
+            with gr.Column():
+                human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='binary')
+                skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='binary')
+                submit_button = gr.Button("Submit Eval")
+                submission_result = gr.Markdown()
+                submit_button.click(
+                    add_new_eval,
+                    inputs = [
+                        human_file,
+                        skempi_file,
+                        model_name_textbox,
+                        revision_name_textbox,
+                        benchmark_type
+                    ],
+                )
+    def refresh_data():
+        value = get_baseline_df()
+        return value
+    with gr.Row():
+        data_run = gr.Button("Refresh")
+        data_run.click(
+            refresh_data, outputs=[data_component]
+        )
+    with gr.Accordion("Citation", open=False):
+        citation_button = gr.Textbox(
+            value=CITATION_BUTTON_TEXT,
+            label=CITATION_BUTTON_LABEL,
+            elem_id="citation-button",
+            show_copy_button=True,
+        )
+block.launch()

index.html ADDED Viewed

	@@ -0,0 +1,19 @@

+<!doctype html>
+<html>
+	<head>
+		<meta charset="utf-8" />
+		<meta name="viewport" content="width=device-width" />
+		<title>My static Space</title>
+		<link rel="stylesheet" href="style.css" />
+	</head>
+	<body>
+		<div class="card">
+			<h1>Welcome to your static Space!</h1>
+			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
+			<p>
+				Also don't forget to check the
+				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
+			</p>
+		</div>
+	</body>
+</html>

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[tool.ruff]
+# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
+select = ["E", "F"]
+ignore = ["E501"] # line too long (black is taking care of this)
+line-length = 119
+fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
+[tool.isort]
+profile = "black"
+line_length = 119
+[tool.black]
+line-length = 119

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+APScheduler
+black
+datasets
+gradio
+gradio[oauth]
+gradio_leaderboard==0.0.9
+gradio_client
+huggingface-hub>=0.18.0
+python-dateutil
+tqdm
+transformers
+tokenizers>=0.15.0
+sentencepiece
+matplotlib
+numpy
+pandas==1.1.4
+pyyaml==5.1
+scikit-learn==0.22
+scikit-multilearn==0.2.0

style.css ADDED Viewed

	@@ -0,0 +1,28 @@

+body {
+	padding: 2rem;
+	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
+}
+h1 {
+	font-size: 16px;
+	margin-top: 0;
+}
+p {
+	color: rgb(107, 114, 128);
+	font-size: 15px;
+	margin-bottom: 10px;
+	margin-top: 5px;
+}
+.card {
+	max-width: 620px;
+	margin: 0 auto;
+	padding: 16px;
+	border: 1px solid lightgray;
+	border-radius: 16px;
+}
+.card p:last-child {
+	margin-bottom: 0;
+}