Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
6bc96ff
1
Parent(s):
8b88d2c
debug inference endpoint launch and requirements
Browse files- app.py +1 -1
- requirements.txt +5 -1
- src/backend/run_eval_suite_lighteval.py +22 -7
app.py
CHANGED
|
@@ -19,8 +19,8 @@ This is just a visual for the auto evaluator. Note that the lines of the log vis
|
|
| 19 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
| 20 |
with gr.Tab("Application"):
|
| 21 |
gr.Markdown(intro_md)
|
| 22 |
-
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
| 23 |
output = gr.HTML(log_file_to_html_string, every=10)
|
|
|
|
| 24 |
|
| 25 |
if __name__ == '__main__':
|
| 26 |
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|
|
|
|
| 19 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
| 20 |
with gr.Tab("Application"):
|
| 21 |
gr.Markdown(intro_md)
|
|
|
|
| 22 |
output = gr.HTML(log_file_to_html_string, every=10)
|
| 23 |
+
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
| 24 |
|
| 25 |
if __name__ == '__main__':
|
| 26 |
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|
requirements.txt
CHANGED
|
@@ -18,7 +18,11 @@ git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
|
| 18 |
accelerate==0.24.1
|
| 19 |
sentencepiece
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# Log Visualizer
|
| 22 |
-
|
| 23 |
lxml==4.9.3
|
| 24 |
rich==13.3.4
|
|
|
|
| 18 |
accelerate==0.24.1
|
| 19 |
sentencepiece
|
| 20 |
|
| 21 |
+
# Evaluation suites
|
| 22 |
+
lighteval
|
| 23 |
+
lm_eval
|
| 24 |
+
|
| 25 |
# Log Visualizer
|
| 26 |
+
BeautifulSoup4==4.12.2
|
| 27 |
lxml==4.9.3
|
| 28 |
rich==13.3.4
|
src/backend/run_eval_suite_lighteval.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import json
|
| 2 |
-
import
|
| 3 |
import logging
|
| 4 |
from datetime import datetime
|
| 5 |
|
|
@@ -16,15 +16,18 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
| 16 |
if limit:
|
| 17 |
logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
"accelerator": accelerator,
|
| 22 |
"vendor": vendor,
|
| 23 |
"region": region,
|
| 24 |
"instance_size": instance_size,
|
| 25 |
"instance_type": instance_type,
|
| 26 |
-
"
|
| 27 |
-
"
|
|
|
|
|
|
|
| 28 |
"push_results_to_hub": True,
|
| 29 |
"save_details": True,
|
| 30 |
"push_details_to_hub": True,
|
|
@@ -32,10 +35,22 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
| 32 |
"cache_dir": CACHE_PATH,
|
| 33 |
"results_org": RESULTS_REPO,
|
| 34 |
"output_dir": local_dir,
|
|
|
|
|
|
|
| 35 |
"override_batch_size": batch_size,
|
| 36 |
"custom_tasks": "custom_tasks.py",
|
| 37 |
-
"tasks": task_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
|
|
|
| 39 |
|
| 40 |
try:
|
| 41 |
results = main(args)
|
|
@@ -47,7 +62,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
| 47 |
dumped = json.dumps(results, indent=2)
|
| 48 |
logger.info(dumped)
|
| 49 |
except Exception: # if eval failed, we force a cleanup
|
| 50 |
-
env_config = EnvConfig(token=TOKEN, cache_dir=args
|
| 51 |
|
| 52 |
model_config = create_model_config(args=args, accelerator=accelerator)
|
| 53 |
model, _ = load_model(config=model_config, env_config=env_config)
|
|
|
|
| 1 |
import json
|
| 2 |
+
import argparse
|
| 3 |
import logging
|
| 4 |
from datetime import datetime
|
| 5 |
|
|
|
|
| 16 |
if limit:
|
| 17 |
logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
| 18 |
|
| 19 |
+
args_dict = {
|
| 20 |
+
# Endpoint parameters
|
| 21 |
+
"endpoint_model_name":eval_request.model,
|
| 22 |
"accelerator": accelerator,
|
| 23 |
"vendor": vendor,
|
| 24 |
"region": region,
|
| 25 |
"instance_size": instance_size,
|
| 26 |
"instance_type": instance_type,
|
| 27 |
+
"reuse_existing": False,
|
| 28 |
+
"model_dtype": eval_request.precision,
|
| 29 |
+
"revision": eval_request.revision,
|
| 30 |
+
# Save parameters
|
| 31 |
"push_results_to_hub": True,
|
| 32 |
"save_details": True,
|
| 33 |
"push_details_to_hub": True,
|
|
|
|
| 35 |
"cache_dir": CACHE_PATH,
|
| 36 |
"results_org": RESULTS_REPO,
|
| 37 |
"output_dir": local_dir,
|
| 38 |
+
"job_id": str(datetime.now()),
|
| 39 |
+
# Experiment parameters
|
| 40 |
"override_batch_size": batch_size,
|
| 41 |
"custom_tasks": "custom_tasks.py",
|
| 42 |
+
"tasks": task_names,
|
| 43 |
+
"max_samples": limit,
|
| 44 |
+
"use_chat_template": False,
|
| 45 |
+
"system_prompt": None,
|
| 46 |
+
# Parameters which would be set to things by the kwargs if actually using argparse
|
| 47 |
+
"inference_server_address": None,
|
| 48 |
+
"model_args": None,
|
| 49 |
+
"num_fewshot_seeds": None,
|
| 50 |
+
"delta_weights": False,
|
| 51 |
+
"adapter_weights": False
|
| 52 |
}
|
| 53 |
+
args = argparse.Namespace(**args_dict)
|
| 54 |
|
| 55 |
try:
|
| 56 |
results = main(args)
|
|
|
|
| 62 |
dumped = json.dumps(results, indent=2)
|
| 63 |
logger.info(dumped)
|
| 64 |
except Exception: # if eval failed, we force a cleanup
|
| 65 |
+
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
|
| 66 |
|
| 67 |
model_config = create_model_config(args=args, accelerator=accelerator)
|
| 68 |
model, _ = load_model(config=model_config, env_config=env_config)
|