bigcodebench-evaluator

Sleeping

App Files Files Community

terryyz commited on Jul 29, 2024

Commit

f2539bf

verified ·

1 Parent(s): 7b6d04f

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -29

app.py CHANGED Viewed

@@ -5,6 +5,10 @@ import os
 import threading
 import time
 import uuid
 class Logger:
     def __init__(self, filename):
@@ -34,8 +38,10 @@ def generate_command(
     command = [default_command]
     if jsonl_file is not None:
-        samples = os.path.basename(jsonl_file.name)
-        command.extend(["--samples", samples])
     command.extend(["--split", split, "--subset", subset])
@@ -61,6 +67,21 @@ def generate_command(
     return " ".join(command)
 def run_bigcodebench(command):
     global is_running
     is_running = True
@@ -82,23 +103,27 @@ def run_bigcodebench(command):
     is_running = False
     yield "Evaluation completed.\n"
-def stream_logs(command):
     global is_running
     if is_running:
         yield "A command is already running. Please wait for it to finish.\n"
         return
     log_content = []
     for log_line in run_bigcodebench(command):
         log_content.append(log_line)
         yield "".join(log_content)
-def read_logs(log_file):
-    if os.path.exists(log_file):
-        with open(log_file, "r") as f:
-            return f.read()
-    return ""
 with gr.Blocks() as demo:
@@ -113,20 +138,23 @@ with gr.Blocks() as demo:
         save_pass_rate = gr.Checkbox(label="Save Pass Rate")
         parallel = gr.Number(label="Parallel (optional)", precision=0)
         min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
-        max_as_limit = gr.Number(label="Max AS Limit", value=200*1024, precision=0)
     with gr.Row():
-        max_data_limit = gr.Number(label="Max Data Limit", value=10*1024, precision=0)
-        max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
         check_gt_only = gr.Checkbox(label="Check GT Only")
         no_gt = gr.Checkbox(label="No GT")
     command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
-    submit_btn = gr.Button("Run Evaluation")
-    log_output = gr.Textbox(label="Execution Logs", lines=10)
-    # Hidden component to store the unique log file path
-    session_log_file = gr.State("")
     def update_command(*args):
         return generate_command(*args)
@@ -139,18 +167,25 @@ with gr.Blocks() as demo:
     for component in input_components:
         component.change(update_command, inputs=input_components, outputs=command_output)
-    def on_submit(command):
-        global is_running
-        if is_running:
-            yield "A command is already running. Please wait for it to finish."
-            return
-        log_accumulator = []
-        for log_line in run_bigcodebench(command):
-            log_accumulator.append(log_line)
-            yield "\n".join(log_accumulator)
-    submit_btn.click(stream_logs, inputs=[command_output], outputs=[log_output])
     # def update_logs(session_log_file):
     #     return read_logs(session_log_file)

 import threading
 import time
 import uuid
+import glob
+import shutil
+import urllib
+from pathlib import Path
 class Logger:
     def __init__(self, filename):
     command = [default_command]
     if jsonl_file is not None:
+        # Copy the uploaded file to the current directory
+        local_filename = os.path.basename(jsonl_file.name)
+        shutil.copy(jsonl_file.name, local_filename)
+        command.extend(["--samples", local_filename])
     command.extend(["--split", split, "--subset", subset])
     return " ".join(command)
+def cleanup_previous_files(jsonl_file=None):
+    for file in glob.glob("*.json") + glob.glob("*.log") + glob.glob("*.jsonl"):
+        try:
+            if jsonl_file is not None and file == jsonl_file:
+                continue
+            os.remove(file)
+        except Exception as e:
+            print(f"Error during cleanup of {file}: {e}")
+def find_result_file():
+    json_files = glob.glob("*.json")
+    if json_files:
+        return max(json_files, key=os.path.getmtime)
+    return None
 def run_bigcodebench(command):
     global is_running
     is_running = True
     is_running = False
     yield "Evaluation completed.\n"
+    result_file = find_result_file()
+    if result_file:
+        yield f"Result file found: {result_file}\n"
+    else:
+        yield "No result file found.\n"
+def stream_logs(command, jsonl_file=None):
     global is_running
+    if jsonl_file is not None:
+        local_filename = os.path.basename(jsonl_file.name)
     if is_running:
         yield "A command is already running. Please wait for it to finish.\n"
         return
+    cleanup_previous_files(local_filename)
+    yield "Cleaned up previous files.\n"
     log_content = []
     for log_line in run_bigcodebench(command):
         log_content.append(log_line)
         yield "".join(log_content)
 with gr.Blocks() as demo:
         save_pass_rate = gr.Checkbox(label="Save Pass Rate")
         parallel = gr.Number(label="Parallel (optional)", precision=0)
         min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
+        max_as_limit = gr.Number(label="Max AS Limit", value=30*1024, precision=0)
     with gr.Row():
+        max_data_limit = gr.Number(label="Max Data Limit", value=30*1024, precision=0)
+        max_stack_limit = gr.Number(label="Max Stack Limit", value=20, precision=0)
         check_gt_only = gr.Checkbox(label="Check GT Only")
         no_gt = gr.Checkbox(label="No GT")
     command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
+    with gr.Row():
+        submit_btn = gr.Button("Run Evaluation")
+        download_btn = gr.DownloadButton(label="Download Result", visible=False)
+    log_output = gr.Textbox(label="Execution Logs", lines=20)
+    # Hidden component to store the result file path
+    # result_file_path = gr.State("")
     def update_command(*args):
         return generate_command(*args)
     for component in input_components:
         component.change(update_command, inputs=input_components, outputs=command_output)
+    def start_evaluation(command, jsonl_file):
+        for log in stream_logs(command, jsonl_file):
+            yield log, gr.update(), gr.update()
+        result_file = find_result_file()
+        if result_file:
+            print(f"Result file: {result_file}")
+            urllib.request.urlretrieve(result_file, result_file)
+            return (gr.update(label="Evaluation completed. Result file found."),
+                    gr.Button(visible=False),
+                    gr.DownloadButton(label="Download Result", value=result_file))
+        else:
+            return (gr.update(label="Evaluation completed. No result file found."),
+                    gr.Button("Run Evaluation"),
+                    gr.DownloadButton(visible=False))
+    submit_btn.click(start_evaluation,
+                 inputs=[command_output, jsonl_file],
+                 outputs=[log_output, submit_btn, download_btn])
     # def update_logs(session_log_file):
     #     return read_logs(session_log_file)