PROBE

Running

App Files Files Community

mgyigit commited on Mar 29

Commit

b696eae

verified ·

1 Parent(s): 39e623c

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -198

app.py CHANGED Viewed

@@ -10,11 +10,13 @@ import matplotlib.pyplot as plt
 import seaborn as sns
 import plotnine as p9
 import sys
-import zipfile
-import tempfile
 sys.path.append('./src')
 sys.path.append('.')
 from src.about import *
 from src.saving_utils import *
 from src.vis_utils import *
@@ -33,10 +35,10 @@ def add_new_eval(
     family_prediction_dataset,
     save,
 ):
-    # Validate required files based on selected benchmarks
     if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
         gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
         return -1
     if 'affinity' in benchmark_types and skempi_file is None:
         gr.Warning("SKEMPI representations are required for affinity benchmark!")
         return -1
@@ -46,161 +48,59 @@ def add_new_eval(
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
     try:
-        results = run_probe(
-            benchmark_types,
-            representation_name,
-            human_file,
-            skempi_file,
-            similarity_tasks,
-            function_prediction_aspect,
-            function_prediction_dataset,
-            family_prediction_dataset,
-        )
-    except Exception as e:
-        gr.Warning("Your submission has not been processed. Please check your representation files!")
         return -1
-    # Even if save is False, we store the submission (e.g., temporarily) so that the leaderboard includes it.
     if save:
         save_results(representation_name, benchmark_types, results)
     else:
-        save_results(representation_name, benchmark_types, results, temporary=True)
     return 0
 def refresh_data():
     benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
     for benchmark_type in benchmark_types:
         path = f"/tmp/{benchmark_type}_results.csv"
         if os.path.exists(path):
             os.remove(path)
     benchmark_types.remove("leaderboard")
     download_from_hub(benchmark_types)
-def download_leaderboard_csv():
-    """Generates a CSV file for the updated leaderboard."""
-    df = get_baseline_df(None, None)
-    tmp_csv = os.path.join(tempfile.gettempdir(), "leaderboard_download.csv")
-    df.to_csv(tmp_csv, index=False)
-    return tmp_csv
-def generate_plots_based_on_submission(benchmark_types, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset):
-    """
-    For each benchmark type selected during submission, generate a plot based on the corresponding extra parameters.
-    """
-    tmp_dir = tempfile.mkdtemp()
-    plot_files = []
-    # Get the current leaderboard to retrieve available method names.
-    leaderboard = get_baseline_df(None, None)
-    method_names = leaderboard['Method'].unique().tolist()
-    for btype in benchmark_types:
-        # For each benchmark type, choose plotting parameters based on additional selections.
-        if btype == "similarity":
-            # Use the user-selected similarity tasks (if provided) to determine the metrics.
-            x_metric = similarity_tasks[0] if similarity_tasks and len(similarity_tasks) > 0 else None
-            y_metric = similarity_tasks[1] if similarity_tasks and len(similarity_tasks) > 1 else None
-        elif btype == "function":
-            x_metric = function_prediction_aspect if function_prediction_aspect else None
-            y_metric = function_prediction_dataset if function_prediction_dataset else None
-        elif btype == "family":
-            # For family, assume that family_prediction_dataset is a list of datasets.
-            x_metric = family_prediction_dataset[0] if family_prediction_dataset and len(family_prediction_dataset) > 0 else None
-            y_metric = family_prediction_dataset[1] if family_prediction_dataset and len(family_prediction_dataset) > 1 else None
-        elif btype == "affinity":
-            # For affinity, you may use default plotting parameters.
-            x_metric, y_metric = None, None
-        else:
-            x_metric, y_metric = None, None
-        # Generate the plot using your benchmark_plot function.
-        # Here, aspect, dataset, and single_metric are passed as None, but you could extend this logic.
-        plot_img = benchmark_plot(btype, method_names, x_metric, y_metric, None, None, None)
-        plot_file = os.path.join(tmp_dir, f"{btype}.png")
-        if isinstance(plot_img, plt.Figure):
-            plot_img.savefig(plot_file)
-            plt.close(plot_img)
-        else:
-            # If benchmark_plot already returns a file path, use it directly.
-            plot_file = plot_img
-        plot_files.append(plot_file)
-    # Zip all plot images
-    zip_path = os.path.join(tmp_dir, "submission_plots.zip")
-    with zipfile.ZipFile(zip_path, "w") as zipf:
-        for file in plot_files:
-            zipf.write(file, arcname=os.path.basename(file))
-    return zip_path
-def submission_callback(
-    human_file,
-    skempi_file,
-    model_name_textbox,
-    revision_name_textbox,
-    benchmark_types,
-    similarity_tasks,
-    function_prediction_aspect,
-    function_prediction_dataset,
-    family_prediction_dataset,
-    save_checkbox,
-    return_option,  # New radio selection: "Leaderboard CSV" or "Plot Results"
-):
-    """
-    Runs the evaluation and then returns either a downloadable CSV of the leaderboard
-    (which includes the new submission) or a ZIP file of plots generated based on the submission's selections.
-    """
-    eval_status = add_new_eval(
-        human_file,
-        skempi_file,
-        model_name_textbox,
-        revision_name_textbox,
-        benchmark_types,
-        similarity_tasks,
-        function_prediction_aspect,
-        function_prediction_dataset,
-        family_prediction_dataset,
-        save_checkbox,
-    )
-    if eval_status == -1:
-        return "Submission failed. Please check your files and selections.", None
-    if return_option == "Leaderboard CSV":
-        csv_path = download_leaderboard_csv()
-        return "Your leaderboard CSV (including your submission) is ready for download.", csv_path
-    elif return_option == "Plot Results":
-        zip_path = generate_plots_based_on_submission(
-            benchmark_types,
-            similarity_tasks,
-            function_prediction_aspect,
-            function_prediction_dataset,
-            family_prediction_dataset,
-        )
-        return "Your plots are ready for download.", zip_path
-    else:
-        return "Submission processed, but no output option was selected.", None
-# --------------------------
-# Build the Gradio interface
-# --------------------------
 block = gr.Blocks()
 with block:
     gr.Markdown(LEADERBOARD_INTRODUCTION)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
-            # Leaderboard tab (unchanged from before)
-            leaderboard = get_baseline_df(None, None)
             method_names = leaderboard['Method'].unique().tolist()
             metric_names = leaderboard.columns.tolist()
             metrics_with_method = metric_names.copy()
-            metric_names.remove('Method')
             benchmark_metric_mapping = {
                 "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
@@ -208,28 +108,25 @@ with block:
                 "family": [metric for metric in metric_names if metric.startswith('fam_')],
                 "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
             }
             leaderboard_method_selector = gr.CheckboxGroup(
-                choices=method_names,
-                label="Select Methods for the Leaderboard",
-                value=method_names,
-                interactive=True
             )
             benchmark_type_selector = gr.CheckboxGroup(
-                choices=list(benchmark_metric_mapping.keys()),
-                label="Select Benchmark Types",
-                value=None,
                 interactive=True
             )
             leaderboard_metric_selector = gr.CheckboxGroup(
-                choices=metric_names,
-                label="Select Metrics for the Leaderboard",
-                value=None,
-                interactive=True
             )
             baseline_value = get_baseline_df(method_names, metric_names)
-            baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)
             baseline_header = ["Method"] + metric_names
             baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
@@ -243,80 +140,93 @@ with block:
                     visible=True,
                 )
             leaderboard_method_selector.change(
-                get_baseline_df,
-                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
             benchmark_type_selector.change(
                 lambda selected_benchmarks: update_metrics(selected_benchmarks),
                 inputs=[benchmark_type_selector],
                 outputs=leaderboard_metric_selector
             )
             leaderboard_metric_selector.change(
-                get_baseline_df,
-                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
             with gr.Row():
                 gr.Markdown(
                     """
-                    ## **Visualize the Leaderboard Results**
-                    Select options to update the visualization.
                     """
                 )
-            # (Plotting section remains available as before; not the focus of the submission callback)
-            benchmark_type_selector_plot = gr.Dropdown(
-                choices=list(benchmark_specific_metrics.keys()),
-                label="Select Benchmark Type for Plotting",
-                value=None
-            )
             with gr.Row():
                 x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
                 y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
                 aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
                 dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
                 single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
-            method_selector = gr.CheckboxGroup(
-                choices=method_names,
-                label="Select Methods to Visualize",
-                interactive=True,
-                value=method_names
-            )
             plot_button = gr.Button("Plot")
             with gr.Row(show_progress=True, variant='panel'):
                 plot_output = gr.Image(label="Plot")
-            benchmark_type_selector_plot.change(
                 update_metric_choices,
-                inputs=[benchmark_type_selector_plot],
                 outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
             )
             plot_button.click(
                 benchmark_plot,
-                inputs=[benchmark_type_selector_plot, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
                 outputs=plot_output
             )
         with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Image(
-                    value="./src/data/PROBE_workflow_figure.jpg",
-                    label="PROBE Workflow Figure",
-                    elem_classes="about-image",
                 )
         with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Method name")
-                    revision_name_textbox = gr.Textbox(label="Revision Method Name")
                     benchmark_types = gr.CheckboxGroup(
                         choices=TASK_INFO,
                         label="Benchmark Types",
@@ -324,51 +234,42 @@ with block:
                     )
                     similarity_tasks = gr.CheckboxGroup(
                         choices=similarity_tasks_options,
-                        label="Similarity Tasks (if selected)",
                         interactive=True,
                     )
                     function_prediction_aspect = gr.Radio(
                         choices=function_prediction_aspect_options,
-                        label="Function Prediction Aspects (if selected)",
                         interactive=True,
                     )
                     family_prediction_dataset = gr.CheckboxGroup(
                         choices=family_prediction_dataset_options,
-                        label="Family Prediction Datasets (if selected)",
                         interactive=True,
                     )
                     function_dataset = gr.Textbox(
                         label="Function Prediction Datasets",
                         visible=False,
                         value="All_Data_Sets"
                     )
                     save_checkbox = gr.Checkbox(
                         label="Save results for leaderboard and visualization",
                         value=True
                     )
             with gr.Row():
-                human_file = gr.components.File(
-                    label="The representation file (csv) for Human dataset",
-                    file_count="single",
-                    type='filepath'
-                )
-                skempi_file = gr.components.File(
-                    label="The representation file (csv) for SKEMPI dataset",
-                    file_count="single",
-                    type='filepath'
-                )
-            # New radio button for output selection.
-            return_option = gr.Radio(
-                choices=["Leaderboard CSV", "Plot Results"],
-                label="Return Output",
-                value="Leaderboard CSV",
-                interactive=True,
-            )
             submit_button = gr.Button("Submit Eval")
-            submission_result_msg = gr.Markdown()
-            submission_result_file = gr.File()
             submit_button.click(
-                submission_callback,
                 inputs=[
                     human_file,
                     skempi_file,
@@ -380,9 +281,7 @@ with block:
                     function_dataset,
                     family_prediction_dataset,
                     save_checkbox,
-                    return_option,
                 ],
-                outputs=[submission_result_msg, submission_result_file]
             )
     with gr.Row():

 import seaborn as sns
 import plotnine as p9
 import sys
 sys.path.append('./src')
 sys.path.append('.')
+from huggingface_hub import HfApi
+repo_id = "HUBioDataLab/PROBE"
+api = HfApi()
 from src.about import *
 from src.saving_utils import *
 from src.vis_utils import *
     family_prediction_dataset,
     save,
 ):
     if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
         gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
         return -1
     if 'affinity' in benchmark_types and skempi_file is None:
         gr.Warning("SKEMPI representations are required for affinity benchmark!")
         return -1
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
     try:
+        results = run_probe(benchmark_types, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
+    except:
+        completion_info = gr.Warning("Your submission has not been processed. Please check your representation files!")
         return -1
     if save:
         save_results(representation_name, benchmark_types, results)
+        completion_info = gr.Info("Your submission has been processed and results are saved!")
     else:
+        completion_info = gr.Info("Your submission has been processed!")
     return 0
 def refresh_data():
+    api.restart_space(repo_id=repo_id)
     benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
     for benchmark_type in benchmark_types:
         path = f"/tmp/{benchmark_type}_results.csv"
         if os.path.exists(path):
             os.remove(path)
     benchmark_types.remove("leaderboard")
     download_from_hub(benchmark_types)
+            # Define a function to update metrics based on benchmark type selection
+def update_metrics(selected_benchmarks):
+    updated_metrics = set()
+    for benchmark in selected_benchmarks:
+        updated_metrics.update(benchmark_metric_mapping.get(benchmark, []))
+    return list(updated_metrics)
+            # Define a function to update the leaderboard
+def update_leaderboard(selected_methods, selected_metrics):
+    updated_df = get_baseline_df(selected_methods, selected_metrics)
+    return updated_df
 block = gr.Blocks()
 with block:
     gr.Markdown(LEADERBOARD_INTRODUCTION)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
+            leaderboard = get_baseline_df(None, None) #get baseline leaderboard without filtering
             method_names = leaderboard['Method'].unique().tolist()
             metric_names = leaderboard.columns.tolist()
             metrics_with_method = metric_names.copy()
+            metric_names.remove('Method')  # Remove method_name from the metric options
             benchmark_metric_mapping = {
                 "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
                 "family": [metric for metric in metric_names if metric.startswith('fam_')],
                 "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
             }
+            # Leaderboard section with method and metric selectors
             leaderboard_method_selector = gr.CheckboxGroup(
+                choices=method_names, label="Select Methods for the Leaderboard", value=method_names, interactive=True
             )
             benchmark_type_selector = gr.CheckboxGroup(
+                choices=list(benchmark_metric_mapping.keys()),
+                label="Select Benchmark Types",
+                value=None,  # Initially select all benchmark types
                 interactive=True
             )
             leaderboard_metric_selector = gr.CheckboxGroup(
+                choices=metric_names, label="Select Metrics for the Leaderboard", value=None, interactive=True
             )
+            # Display the filtered leaderboard
             baseline_value = get_baseline_df(method_names, metric_names)
+            baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)  # Round all numeric values to 4 decimal places
             baseline_header = ["Method"] + metric_names
             baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
                     visible=True,
                 )
+            # Update leaderboard when method/metric selection changes
             leaderboard_method_selector.change(
+                get_baseline_df,
+                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
+            # Update metrics when benchmark type changes
             benchmark_type_selector.change(
                 lambda selected_benchmarks: update_metrics(selected_benchmarks),
                 inputs=[benchmark_type_selector],
                 outputs=leaderboard_metric_selector
             )
             leaderboard_metric_selector.change(
+                get_baseline_df,
+                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
             with gr.Row():
                 gr.Markdown(
                     """
+                    ## **Below, you can visualize the results displayed in the Leaderboard.**
+                    ### Once you choose a benchmark type, the related options for metrics, datasets, and other parameters will become visible. Select the methods and metrics of interest from the options to generate visualizations.
                     """
                 )
+            # Dropdown for benchmark type
+            benchmark_type_selector = gr.Dropdown(choices=list(benchmark_specific_metrics.keys()), label="Select Benchmark Type", value=None)
             with gr.Row():
+                # Dynamic selectors
                 x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
                 y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
                 aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
                 dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
                 single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
+            method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names)
+            # Button to draw the plot for the selected benchmark
             plot_button = gr.Button("Plot")
             with gr.Row(show_progress=True, variant='panel'):
                 plot_output = gr.Image(label="Plot")
+            # Update selectors when benchmark type changes
+            benchmark_type_selector.change(
                 update_metric_choices,
+                inputs=[benchmark_type_selector],
                 outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
             )
             plot_button.click(
                 benchmark_plot,
+                inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
                 outputs=plot_output
             )
         with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Image(
+                    value="./src/data/PROBE_workflow_figure.jpg",  # Replace with your image file path or URL
+                    label="PROBE Workflow Figure",  # Optional label
+                    elem_classes="about-image",  # Optional CSS class for styling
                 )
         with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
+                    model_name_textbox = gr.Textbox(
+                        label="Method name",
+                    )
+                    revision_name_textbox = gr.Textbox(
+                        label="Revision Method Name",
+                    )
                     benchmark_types = gr.CheckboxGroup(
                         choices=TASK_INFO,
                         label="Benchmark Types",
                     )
                     similarity_tasks = gr.CheckboxGroup(
                         choices=similarity_tasks_options,
+                        label="Similarity Tasks",
                         interactive=True,
                     )
                     function_prediction_aspect = gr.Radio(
                         choices=function_prediction_aspect_options,
+                        label="Function Prediction Aspects",
                         interactive=True,
                     )
                     family_prediction_dataset = gr.CheckboxGroup(
                         choices=family_prediction_dataset_options,
+                        label="Family Prediction Datasets",
                         interactive=True,
                     )
                     function_dataset = gr.Textbox(
                         label="Function Prediction Datasets",
                         visible=False,
                         value="All_Data_Sets"
                     )
                     save_checkbox = gr.Checkbox(
                         label="Save results for leaderboard and visualization",
                         value=True
                     )
+            #with gr.Column():
             with gr.Row():
+                human_file = gr.components.File(label="The representation file (csv) for Human dataset", file_count="single", type='filepath')
+                skempi_file = gr.components.File(label="The representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
             submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
             submit_button.click(
+                add_new_eval,
                 inputs=[
                     human_file,
                     skempi_file,
                     function_dataset,
                     family_prediction_dataset,
                     save_checkbox,
                 ],
             )
     with gr.Row():