Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ from warnings import warn
|
|
| 13 |
import gc
|
| 14 |
|
| 15 |
import numpy as np
|
| 16 |
-
|
| 17 |
from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
|
| 18 |
from bigcodebench.data.utils import CACHE_DIR
|
| 19 |
from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
|
|
@@ -22,7 +22,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
| 22 |
|
| 23 |
REPO_ID = "bigcode/bigcodebench-evaluator"
|
| 24 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 25 |
-
|
| 26 |
Result = Tuple[str, List[bool]]
|
| 27 |
|
| 28 |
|
|
@@ -230,30 +230,30 @@ def evaluate(
|
|
| 230 |
return results, pass_at_k
|
| 231 |
|
| 232 |
|
| 233 |
-
def run_gradio():
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
|
| 258 |
|
| 259 |
def preload_gt():
|
|
@@ -272,12 +272,11 @@ def restart_space():
|
|
| 272 |
logging.error(f"Failed to restart space: {e}")
|
| 273 |
|
| 274 |
|
| 275 |
-
if __name__ == "__main__":
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
# evaluate("complete", "hard", "meta-llama--Llama-3.2-3B-Instruct--bigcodebench-instruct--vllm-0-1.jsonl")
|
|
|
|
| 13 |
import gc
|
| 14 |
|
| 15 |
import numpy as np
|
| 16 |
+
from huggingface_hub import HfApi
|
| 17 |
from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
|
| 18 |
from bigcodebench.data.utils import CACHE_DIR
|
| 19 |
from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
|
|
|
|
| 22 |
|
| 23 |
REPO_ID = "bigcode/bigcodebench-evaluator"
|
| 24 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 25 |
+
API = HfApi(token=HF_TOKEN)
|
| 26 |
Result = Tuple[str, List[bool]]
|
| 27 |
|
| 28 |
|
|
|
|
| 230 |
return results, pass_at_k
|
| 231 |
|
| 232 |
|
| 233 |
+
# def run_gradio():
|
| 234 |
+
interface = gr.Interface(
|
| 235 |
+
fn=evaluate,
|
| 236 |
+
inputs=[
|
| 237 |
+
gr.Dropdown(["complete", "instruct"], label="BigCodeBench Split"),
|
| 238 |
+
gr.Dropdown(["full", "hard"], label="BigCodeBench Subset"),
|
| 239 |
+
gr.File(label="Samples Path (.jsonl)"),
|
| 240 |
+
gr.Textbox(label="Pass k Values (comma-separated)", value="1,5,10"),
|
| 241 |
+
gr.Slider(-1, multiprocessing.cpu_count(), step=1, label="Parallel Workers", value=-1),
|
| 242 |
+
gr.Slider(0.1, 10, step=0.1, label="Min Time Limit", value=1),
|
| 243 |
+
gr.Slider(1, 100 * 1024, step=1024, label="Max AS Limit", value=30 * 1024),
|
| 244 |
+
gr.Slider(1, 100 * 1024, step=1024, label="Max Data Limit", value=30 * 1024),
|
| 245 |
+
gr.Slider(1, 100, step=1, label="Max Stack Limit", value=10),
|
| 246 |
+
gr.Checkbox(label="Check GT Only"),
|
| 247 |
+
gr.Checkbox(label="No GT"),
|
| 248 |
+
],
|
| 249 |
+
outputs=[
|
| 250 |
+
gr.JSON(label="Results"),
|
| 251 |
+
gr.JSON(label="Eval Results"),
|
| 252 |
+
],
|
| 253 |
+
# concurrency_limit=None
|
| 254 |
+
)
|
| 255 |
+
interface.queue(default_concurrency_limit=None)
|
| 256 |
+
# interface.launch(show_error=True)
|
| 257 |
|
| 258 |
|
| 259 |
def preload_gt():
|
|
|
|
| 272 |
logging.error(f"Failed to restart space: {e}")
|
| 273 |
|
| 274 |
|
| 275 |
+
# if __name__ == "__main__":
|
| 276 |
+
preload_gt()
|
| 277 |
+
# run_gradio()
|
| 278 |
+
|
| 279 |
+
scheduler = BackgroundScheduler()
|
| 280 |
+
scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
|
| 281 |
+
logging.info("Scheduler initialized to restart space every 1 hour.")
|
| 282 |
+
scheduler.start()
|
|
|