PROBE

Running

App Files Files Community

mgyigit commited on May 9

Commit

edb9d91

verified ·

1 Parent(s): b696eae

Update app.py

Browse files

Files changed (1) hide show

app.py +220 -100

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
 import gradio as gr
 import pandas as pd
 import re
@@ -22,6 +20,9 @@ from src.saving_utils import *
 from src.vis_utils import *
 from src.bin.PROBE import run_probe
 def add_new_eval(
     human_file,
@@ -35,6 +36,7 @@ def add_new_eval(
     family_prediction_dataset,
     save,
 ):
     if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
         gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
         return -1
@@ -43,27 +45,36 @@ def add_new_eval(
         gr.Warning("SKEMPI representations are required for affinity benchmark!")
         return -1
-    processing_info = gr.Info("Your submission is being processed...")
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
     try:
-        results = run_probe(benchmark_types, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
-    except:
-        completion_info = gr.Warning("Your submission has not been processed. Please check your representation files!")
         return -1
     if save:
         save_results(representation_name, benchmark_types, results)
-        completion_info = gr.Info("Your submission has been processed and results are saved!")
     else:
-        completion_info = gr.Info("Your submission has been processed!")
     return 0
 def refresh_data():
     api.restart_space(repo_id=repo_id)
     benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
@@ -75,63 +86,130 @@ def refresh_data():
     benchmark_types.remove("leaderboard")
     download_from_hub(benchmark_types)
-            # Define a function to update metrics based on benchmark type selection
 def update_metrics(selected_benchmarks):
     updated_metrics = set()
     for benchmark in selected_benchmarks:
         updated_metrics.update(benchmark_metric_mapping.get(benchmark, []))
     return list(updated_metrics)
-            # Define a function to update the leaderboard
 def update_leaderboard(selected_methods, selected_metrics):
     updated_df = get_baseline_df(selected_methods, selected_metrics)
     return updated_df
 block = gr.Blocks()
 with block:
     gr.Markdown(LEADERBOARD_INTRODUCTION)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
-            leaderboard = get_baseline_df(None, None) #get baseline leaderboard without filtering
             method_names = leaderboard['Method'].unique().tolist()
             metric_names = leaderboard.columns.tolist()
-            metrics_with_method = metric_names.copy()
-            metric_names.remove('Method')  # Remove method_name from the metric options
             benchmark_metric_mapping = {
-                "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
-                "function": [metric for metric in metric_names if metric.startswith('func')],
-                "family": [metric for metric in metric_names if metric.startswith('fam_')],
-                "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
             }
-            # Leaderboard section with method and metric selectors
             leaderboard_method_selector = gr.CheckboxGroup(
-                choices=method_names, label="Select Methods for the Leaderboard", value=method_names, interactive=True
             )
-            benchmark_type_selector = gr.CheckboxGroup(
-                choices=list(benchmark_metric_mapping.keys()),
-                label="Select Benchmark Types",
-                value=None,  # Initially select all benchmark types
-                interactive=True
             )
             leaderboard_metric_selector = gr.CheckboxGroup(
-                choices=metric_names, label="Select Metrics for the Leaderboard", value=None, interactive=True
             )
-            # Display the filtered leaderboard
             baseline_value = get_baseline_df(method_names, metric_names)
-            baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)  # Round all numeric values to 4 decimal places
             baseline_header = ["Method"] + metric_names
             baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
             with gr.Row(show_progress=True, variant='panel'):
-                data_component = gr.components.Dataframe(
                     value=baseline_value,
                     headers=baseline_header,
                     type="pandas",
@@ -140,78 +218,121 @@ with block:
                     visible=True,
                 )
-            # Update leaderboard when method/metric selection changes
             leaderboard_method_selector.change(
-                get_baseline_df,
-                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
-                outputs=data_component
             )
-            # Update metrics when benchmark type changes
-            benchmark_type_selector.change(
-                lambda selected_benchmarks: update_metrics(selected_benchmarks),
-                inputs=[benchmark_type_selector],
-                outputs=leaderboard_metric_selector
             )
             leaderboard_metric_selector.change(
-                get_baseline_df,
-                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
-                outputs=data_component
             )
-            with gr.Row():
-                gr.Markdown(
-                    """
-                    ## **Below, you can visualize the results displayed in the Leaderboard.**
-                    ### Once you choose a benchmark type, the related options for metrics, datasets, and other parameters will become visible. Select the methods and metrics of interest from the options to generate visualizations.
-                    """
-                )
-            # Dropdown for benchmark type
-            benchmark_type_selector = gr.Dropdown(choices=list(benchmark_specific_metrics.keys()), label="Select Benchmark Type", value=None)
-            with gr.Row():
-                # Dynamic selectors
-                x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
-                y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
-                aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
-                dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
-                single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
-            method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names)
-            # Button to draw the plot for the selected benchmark
             plot_button = gr.Button("Plot")
             with gr.Row(show_progress=True, variant='panel'):
                 plot_output = gr.Image(label="Plot")
-            # Update selectors when benchmark type changes
-            benchmark_type_selector.change(
                 update_metric_choices,
-                inputs=[benchmark_type_selector],
-                outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
             )
             plot_button.click(
-                benchmark_plot,
-                inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
-                outputs=plot_output
             )
-        with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Image(
-                    value="./src/data/PROBE_workflow_figure.jpg",  # Replace with your image file path or URL
-                    label="PROBE Workflow Figure",  # Optional label
-                    elem_classes="about-image",  # Optional CSS class for styling
                 )
-        with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
@@ -220,12 +341,8 @@ with block:
             with gr.Row():
                 with gr.Column():
-                    model_name_textbox = gr.Textbox(
-                        label="Method name",
-                    )
-                    revision_name_textbox = gr.Textbox(
-                        label="Revision Method Name",
-                    )
                     benchmark_types = gr.CheckboxGroup(
                         choices=TASK_INFO,
@@ -237,35 +354,34 @@ with block:
                         label="Similarity Tasks",
                         interactive=True,
                     )
                     function_prediction_aspect = gr.Radio(
                         choices=function_prediction_aspect_options,
                         label="Function Prediction Aspects",
                         interactive=True,
                     )
                     family_prediction_dataset = gr.CheckboxGroup(
                         choices=family_prediction_dataset_options,
                         label="Family Prediction Datasets",
                         interactive=True,
                     )
                     function_dataset = gr.Textbox(
                         label="Function Prediction Datasets",
                         visible=False,
-                        value="All_Data_Sets"
                     )
                     save_checkbox = gr.Checkbox(
                         label="Save results for leaderboard and visualization",
-                        value=True
                     )
-            #with gr.Column():
             with gr.Row():
-                human_file = gr.components.File(label="The representation file (csv) for Human dataset", file_count="single", type='filepath')
-                skempi_file = gr.components.File(label="The representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
             submit_button = gr.Button("Submit Eval")
             submission_result = gr.Markdown()
             submit_button.click(
@@ -284,6 +400,9 @@ with block:
                 ],
             )
     with gr.Row():
         data_run = gr.Button("Refresh")
         data_run.click(refresh_data, outputs=[data_component])
@@ -296,4 +415,5 @@ with block:
             show_copy_button=True,
         )
-block.launch()

 import gradio as gr
 import pandas as pd
 import re
 from src.vis_utils import *
 from src.bin.PROBE import run_probe
+# ------------------------------------------------------------------
+# Helper functions moved / added here so that UI callbacks can see them
+# ------------------------------------------------------------------
 def add_new_eval(
     human_file,
     family_prediction_dataset,
     save,
 ):
+    """Validate inputs, run evaluation and (optionally) save results."""
     if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
         gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
         return -1
         gr.Warning("SKEMPI representations are required for affinity benchmark!")
         return -1
+    gr.Info("Your submission is being processed…")
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
     try:
+        results = run_probe(
+            benchmark_types,
+            representation_name,
+            human_file,
+            skempi_file,
+            similarity_tasks,
+            function_prediction_aspect,
+            function_prediction_dataset,
+            family_prediction_dataset,
+        )
+    except Exception:
+        gr.Warning("Your submission has not been processed. Please check your representation files!")
         return -1
     if save:
         save_results(representation_name, benchmark_types, results)
+        gr.Info("Your submission has been processed and results are saved!")
     else:
+        gr.Info("Your submission has been processed!")
     return 0
 def refresh_data():
+    """Re‑start the space and pull fresh leaderboard CSVs from the HF Hub."""
     api.restart_space(repo_id=repo_id)
     benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
     benchmark_types.remove("leaderboard")
     download_from_hub(benchmark_types)
+# ------- Leaderboard helpers -------------------------------------------------
 def update_metrics(selected_benchmarks):
+    """Populate metric selector according to chosen benchmark types."""
     updated_metrics = set()
     for benchmark in selected_benchmarks:
         updated_metrics.update(benchmark_metric_mapping.get(benchmark, []))
     return list(updated_metrics)
 def update_leaderboard(selected_methods, selected_metrics):
     updated_df = get_baseline_df(selected_methods, selected_metrics)
     return updated_df
+# ------- Visualisation helpers ----------------------------------------------
+def get_plot_explanation(benchmark_type, x_metric, y_metric, aspect, dataset, single_metric):
+    """Return a short natural‑language explanation for the produced plot."""
+    if benchmark_type == "similarity":
+        return (
+            f"The scatter plot compares models on **{x_metric}** (x‑axis) and "
+            f"**{y_metric}** (y‑axis). Points further to the upper‑right indicate better "
+            "performance on both metrics."
+        )
+    elif benchmark_type == "function":
+        return (
+            f"The heat‑map shows performance of each model (columns) across GO terms "
+            f"for the **{aspect.upper()}** aspect using the **{single_metric}** metric. "
+            "Darker squares correspond to stronger performance; hierarchical clustering "
+            "groups similar models and tasks together."
+        )
+    elif benchmark_type == "family":
+        return (
+            f"The horizontal box‑plots summarise cross‑validation performance on the "
+            f"**{dataset}** dataset. Higher median MCC values indicate better family‑"
+            "classification accuracy."
+        )
+    elif benchmark_type == "affinity":
+        return (
+            f"Each box‑plot shows the distribution of **{single_metric}** scores for every "
+            "model when predicting binding affinity changes. Higher values are better."
+        )
+    return ""
+def generate_plot_and_explanation(
+    benchmark_type,
+    methods_selected,
+    x_metric,
+    y_metric,
+    aspect,
+    dataset,
+    single_metric,
+):
+    """Callback wrapper that returns both the image path and a textual explanation."""
+    plot_path = benchmark_plot(
+        benchmark_type,
+        methods_selected,
+        x_metric,
+        y_metric,
+        aspect,
+        dataset,
+        single_metric,
+    )
+    explanation = get_plot_explanation(benchmark_type, x_metric, y_metric, aspect, dataset, single_metric)
+    return plot_path, explanation
+# ------------------------------------------------------------------
+# UI definition
+# ------------------------------------------------------------------
 block = gr.Blocks()
 with block:
     gr.Markdown(LEADERBOARD_INTRODUCTION)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        # ------------------------------------------------------------------
+        # 1️⃣  Leaderboard tab
+        # ------------------------------------------------------------------
         with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
+            leaderboard = get_baseline_df(None, None)  # baseline leaderboard without filtering
             method_names = leaderboard['Method'].unique().tolist()
             metric_names = leaderboard.columns.tolist()
+            metric_names.remove('Method')  # remove non‑metric column
             benchmark_metric_mapping = {
+                "similarity": [m for m in metric_names if m.startswith('sim_')],
+                "function": [m for m in metric_names if m.startswith('func')],
+                "family": [m for m in metric_names if m.startswith('fam_')],
+                "affinity": [m for m in metric_names if m.startswith('aff_')],
             }
+            # selectors -----------------------------------------------------
             leaderboard_method_selector = gr.CheckboxGroup(
+                choices=method_names,
+                label="Select Methods for the Leaderboard",
+                value=method_names,
+                interactive=True,
             )
+            benchmark_type_selector_lb = gr.CheckboxGroup(
+                choices=list(benchmark_metric_mapping.keys()),
+                label="Select Benchmark Types",
+                value=None,
+                interactive=True,
             )
             leaderboard_metric_selector = gr.CheckboxGroup(
+                choices=metric_names,
+                label="Select Metrics for the Leaderboard",
+                value=None,
+                interactive=True,
             )
+            # leaderboard table --------------------------------------------
             baseline_value = get_baseline_df(method_names, metric_names)
+            baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)
             baseline_header = ["Method"] + metric_names
             baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
             with gr.Row(show_progress=True, variant='panel'):
+                data_component = gr.Dataframe(
                     value=baseline_value,
                     headers=baseline_header,
                     type="pandas",
                     visible=True,
                 )
+            # callbacks -----------------------------------------------------
             leaderboard_method_selector.change(
+                get_baseline_df,
+                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
+                outputs=data_component,
             )
+            benchmark_type_selector_lb.change(
+                lambda selected: update_metrics(selected),
+                inputs=[benchmark_type_selector_lb],
+                outputs=leaderboard_metric_selector,
             )
             leaderboard_metric_selector.change(
+                get_baseline_df,
+                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
+                outputs=data_component,
             )
+        # ------------------------------------------------------------------
+        # 2️⃣ Visualisation tab
+        # ------------------------------------------------------------------
+        with gr.TabItem("📊 Visualization", elem_id="probe-benchmark-tab-visualization", id=2):
+            # Intro / instructions
+            gr.Markdown(
+                """
+                ## **Interactive Visualizations**
+                Select a benchmark type first; context‑specific options will appear automatically.
+                Once your parameters are set, click **Plot** to generate the figure.
+                **How to read the plots**
+                * **Similarity (scatter)** – Each point is a model. Points nearer the top‑right perform well on both chosen similarity metrics.
+                * **Function prediction (heat‑map)** – Darker squares denote better scores. Rows/columns are clustered to reveal shared structure.
+                * **Family / Affinity (boxplots)** – Boxes summarise distribution across folds/targets. Higher medians indicate stronger performance.
+                """,
+                elem_classes="markdown-text",
+            )
+            # ------------------------------------------------------------------
+            # selectors specific to visualisation
+            # ------------------------------------------------------------------
+            vis_benchmark_type_selector = gr.Dropdown(
+                choices=list(benchmark_specific_metrics.keys()),
+                label="Select Benchmark Type",
+                value=None,
+            )
+            with gr.Row():
+                vis_x_metric_selector = gr.Dropdown(choices=[], label="Select X‑axis Metric", visible=False)
+                vis_y_metric_selector = gr.Dropdown(choices=[], label="Select Y‑axis Metric", visible=False)
+                vis_aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
+                vis_dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
+                vis_single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
+            vis_method_selector = gr.CheckboxGroup(
+                choices=method_names,
+                label="Select methods to visualize",
+                interactive=True,
+                value=method_names,
+            )
             plot_button = gr.Button("Plot")
             with gr.Row(show_progress=True, variant='panel'):
                 plot_output = gr.Image(label="Plot")
+            # textual explanation below the image
+            plot_explanation = gr.Markdown(visible=False)
+            # ------------------------------------------------------------------
+            # callbacks for visualisation tab
+            # ------------------------------------------------------------------
+            vis_benchmark_type_selector.change(
                 update_metric_choices,
+                inputs=[vis_benchmark_type_selector],
+                outputs=[
+                    vis_x_metric_selector,
+                    vis_y_metric_selector,
+                    vis_aspect_type_selector,
+                    vis_dataset_selector,
+                    vis_single_metric_selector,
+                ],
             )
             plot_button.click(
+                generate_plot_and_explanation,
+                inputs=[
+                    vis_benchmark_type_selector,
+                    vis_method_selector,
+                    vis_x_metric_selector,
+                    vis_y_metric_selector,
+                    vis_aspect_type_selector,
+                    vis_dataset_selector,
+                    vis_single_metric_selector,
+                ],
+                outputs=[plot_output, plot_explanation],
             )
+        # ------------------------------------------------------------------
+        # 3️⃣  About tab
+        # ------------------------------------------------------------------
+        with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Image(
+                    value="./src/data/PROBE_workflow_figure.jpg",
+                    label="PROBE Workflow Figure",
+                    elem_classes="about-image",
                 )
+        # ------------------------------------------------------------------
+        # 4️⃣  Submit tab
+        # ------------------------------------------------------------------
+        with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=4):
             with gr.Row():
                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Method name")
+                    revision_name_textbox = gr.Textbox(label="Revision Method Name")
                     benchmark_types = gr.CheckboxGroup(
                         choices=TASK_INFO,
                         label="Similarity Tasks",
                         interactive=True,
                     )
                     function_prediction_aspect = gr.Radio(
                         choices=function_prediction_aspect_options,
                         label="Function Prediction Aspects",
                         interactive=True,
                     )
                     family_prediction_dataset = gr.CheckboxGroup(
                         choices=family_prediction_dataset_options,
                         label="Family Prediction Datasets",
                         interactive=True,
                     )
                     function_dataset = gr.Textbox(
                         label="Function Prediction Datasets",
                         visible=False,
+                        value="All_Data_Sets",
                     )
                     save_checkbox = gr.Checkbox(
                         label="Save results for leaderboard and visualization",
+                        value=True,
                     )
             with gr.Row():
+                human_file = gr.File(label="Representation file (CSV) for Human dataset", file_count="single", type='filepath')
+                skempi_file = gr.File(label="Representation file (CSV) for SKEMPI dataset", file_count="single", type='filepath')
             submit_button = gr.Button("Submit Eval")
             submission_result = gr.Markdown()
             submit_button.click(
                 ],
             )
+    # ----------------------------------------------------------------------
+    # global refresh button & citation accordion
+    # ----------------------------------------------------------------------
     with gr.Row():
         data_run = gr.Button("Refresh")
         data_run.click(refresh_data, outputs=[data_component])
             show_copy_button=True,
         )
+# -----------------------------------------------------------------------------
+block.launch()