Spaces:
Sleeping
Sleeping
Update evaluation_queue.py
Browse files- evaluation_queue.py +108 -25
evaluation_queue.py
CHANGED
|
@@ -68,23 +68,57 @@ class EvaluationQueue:
|
|
| 68 |
# Query model info from the HuggingFace API
|
| 69 |
model_info_obj = self.hf_api.model_info(model_id)
|
| 70 |
|
| 71 |
-
#
|
|
|
|
|
|
|
|
|
|
| 72 |
if hasattr(model_info_obj, 'safetensors') and model_info_obj.safetensors:
|
| 73 |
-
#
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
# Account for memory overhead
|
| 88 |
estimated_ram_needed = total_size_gb * 1.3 # 30% overhead
|
| 89 |
|
| 90 |
# Check against limit
|
|
@@ -92,11 +126,15 @@ class EvaluationQueue:
|
|
| 92 |
return False, f"Model is too large (approximately {total_size_gb:.1f}GB, needs {estimated_ram_needed:.1f}GB RAM). Maximum allowed is {self.memory_limit_gb}GB."
|
| 93 |
|
| 94 |
return True, f"Model size check passed ({total_size_gb:.1f}GB, estimated {estimated_ram_needed:.1f}GB RAM usage)"
|
| 95 |
-
|
| 96 |
except Exception as e:
|
| 97 |
print(f"Model size check error: {e}")
|
| 98 |
-
#
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def _process_queue(self):
|
| 102 |
"""Process the evaluation queue in a separate thread."""
|
|
@@ -809,6 +847,27 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 809 |
gr.Blocks: Gradio Blocks component with model submission UI
|
| 810 |
"""
|
| 811 |
with gr.Blocks() as submission_ui:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 812 |
with gr.Tab("Submit Model"):
|
| 813 |
gr.Markdown(f"""
|
| 814 |
### Model Size Restrictions
|
|
@@ -826,8 +885,7 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 826 |
|
| 827 |
check_size_button = gr.Button("Check Model Size")
|
| 828 |
size_check_result = gr.Markdown("")
|
| 829 |
-
|
| 830 |
-
model_name_input = gr.Textbox(
|
| 831 |
placeholder="Display name for your model",
|
| 832 |
label="Model Name"
|
| 833 |
)
|
|
@@ -862,6 +920,7 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 862 |
|
| 863 |
submit_model_button = gr.Button("Submit for Evaluation")
|
| 864 |
submission_status = gr.Markdown("")
|
|
|
|
| 865 |
|
| 866 |
with gr.Tab("Evaluation Queue"):
|
| 867 |
refresh_queue_button = gr.Button("Refresh Queue")
|
|
@@ -897,11 +956,14 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 897 |
else:
|
| 898 |
return f"❌ {message}"
|
| 899 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 900 |
return f"Error checking model size: {str(e)}"
|
| 901 |
|
| 902 |
def refresh_benchmarks_handler():
|
| 903 |
benchmarks = db_manager.get_benchmarks()
|
| 904 |
-
|
| 905 |
# Format for dropdown - properly formatted to display names
|
| 906 |
choices = []
|
| 907 |
for b in benchmarks:
|
|
@@ -913,10 +975,8 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 913 |
|
| 914 |
return gr.update(choices=choices)
|
| 915 |
|
| 916 |
-
def submit_model_handler(model_id, model_name, model_description, model_parameters, model_tag, benchmark_id,
|
| 917 |
# Check if user is logged in
|
| 918 |
-
user = auth_manager.check_login(request)
|
| 919 |
-
|
| 920 |
if not user:
|
| 921 |
return "Please log in to submit a model."
|
| 922 |
|
|
@@ -958,6 +1018,9 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 958 |
else:
|
| 959 |
return message
|
| 960 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 961 |
return f"Error submitting model: {str(e)}"
|
| 962 |
|
| 963 |
def refresh_queue_handler():
|
|
@@ -991,6 +1054,13 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 991 |
|
| 992 |
return stats, eval_data, "No evaluation currently running", "Progress: 0%"
|
| 993 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 994 |
# Connect event handlers
|
| 995 |
check_size_button.click(
|
| 996 |
fn=check_model_size_handler,
|
|
@@ -1012,7 +1082,8 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 1012 |
model_description_input,
|
| 1013 |
model_parameters_input,
|
| 1014 |
model_tag_input,
|
| 1015 |
-
benchmark_dropdown
|
|
|
|
| 1016 |
],
|
| 1017 |
outputs=[submission_status]
|
| 1018 |
)
|
|
@@ -1024,6 +1095,18 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
|
|
| 1024 |
)
|
| 1025 |
|
| 1026 |
# Initialize on load
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
submission_ui.load(
|
| 1028 |
fn=refresh_benchmarks_handler,
|
| 1029 |
inputs=[],
|
|
|
|
| 68 |
# Query model info from the HuggingFace API
|
| 69 |
model_info_obj = self.hf_api.model_info(model_id)
|
| 70 |
|
| 71 |
+
# Initialize total size
|
| 72 |
+
total_size_gb = 0
|
| 73 |
+
|
| 74 |
+
# Try different approaches to get model size based on API response structure
|
| 75 |
if hasattr(model_info_obj, 'safetensors') and model_info_obj.safetensors:
|
| 76 |
+
# New API format with safetensors dict
|
| 77 |
+
for file_info in model_info_obj.safetensors.values():
|
| 78 |
+
if hasattr(file_info, 'size'):
|
| 79 |
+
total_size_gb += file_info.size / (1024 * 1024 * 1024)
|
| 80 |
+
elif isinstance(file_info, dict) and 'size' in file_info:
|
| 81 |
+
total_size_gb += file_info['size'] / (1024 * 1024 * 1024)
|
| 82 |
+
|
| 83 |
+
# Fallback to siblings method
|
| 84 |
+
if total_size_gb == 0 and hasattr(model_info_obj, 'siblings'):
|
| 85 |
+
for sibling in model_info_obj.siblings:
|
| 86 |
+
if hasattr(sibling, 'size'):
|
| 87 |
+
if sibling.rfilename.endswith(('.bin', '.safetensors', '.pt')):
|
| 88 |
+
total_size_gb += sibling.size / (1024 * 1024 * 1024)
|
| 89 |
+
elif isinstance(sibling, dict) and 'size' in sibling:
|
| 90 |
+
if sibling.get('rfilename', '').endswith(('.bin', '.safetensors', '.pt')):
|
| 91 |
+
total_size_gb += sibling['size'] / (1024 * 1024 * 1024)
|
| 92 |
+
|
| 93 |
+
# If we still couldn't determine size, try a reasonable guess based on model name
|
| 94 |
+
if total_size_gb == 0:
|
| 95 |
+
# Try to guess from model name (e.g., if it has "7b" in the name)
|
| 96 |
+
model_name = model_id.lower()
|
| 97 |
+
size_indicators = {
|
| 98 |
+
"1b": 1, "2b": 2, "3b": 3, "5b": 5, "7b": 7, "8b": 8,
|
| 99 |
+
"10b": 10, "13b": 13, "20b": 20, "30b": 30, "65b": 65, "70b": 70
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
for indicator, size in size_indicators.items():
|
| 103 |
+
if indicator in model_name.replace("-", "").replace("_", ""):
|
| 104 |
+
total_size_gb = size * 2 # Rough estimate: param count × 2 for size in GB
|
| 105 |
+
break
|
| 106 |
+
|
| 107 |
+
# If we still couldn't determine size, use a default
|
| 108 |
+
if total_size_gb == 0:
|
| 109 |
+
# Try direct API method
|
| 110 |
+
try:
|
| 111 |
+
print(f"Checking model size with direct method for {model_id}")
|
| 112 |
+
# Print out the entire structure for debugging
|
| 113 |
+
print(f"Model info: {model_info_obj.__dict__}")
|
| 114 |
+
|
| 115 |
+
# Default to a conservative estimate
|
| 116 |
+
total_size_gb = 5 # Assume a 5GB model as default
|
| 117 |
+
except Exception as e:
|
| 118 |
+
print(f"Direct size check failed: {e}")
|
| 119 |
+
return True, "Unable to determine model size accurately, but allowing submission with caution"
|
| 120 |
|
| 121 |
+
# Account for memory overhead
|
| 122 |
estimated_ram_needed = total_size_gb * 1.3 # 30% overhead
|
| 123 |
|
| 124 |
# Check against limit
|
|
|
|
| 126 |
return False, f"Model is too large (approximately {total_size_gb:.1f}GB, needs {estimated_ram_needed:.1f}GB RAM). Maximum allowed is {self.memory_limit_gb}GB."
|
| 127 |
|
| 128 |
return True, f"Model size check passed ({total_size_gb:.1f}GB, estimated {estimated_ram_needed:.1f}GB RAM usage)"
|
| 129 |
+
|
| 130 |
except Exception as e:
|
| 131 |
print(f"Model size check error: {e}")
|
| 132 |
+
# Log more details for debugging
|
| 133 |
+
import traceback
|
| 134 |
+
traceback.print_exc()
|
| 135 |
+
|
| 136 |
+
# Allow submission with warning
|
| 137 |
+
return True, f"Warning: Could not verify model size ({str(e)}). Please ensure your model is under {self.memory_limit_gb}GB."
|
| 138 |
|
| 139 |
def _process_queue(self):
|
| 140 |
"""Process the evaluation queue in a separate thread."""
|
|
|
|
| 847 |
gr.Blocks: Gradio Blocks component with model submission UI
|
| 848 |
"""
|
| 849 |
with gr.Blocks() as submission_ui:
|
| 850 |
+
# Store user authentication state
|
| 851 |
+
user_state = gr.State(None)
|
| 852 |
+
|
| 853 |
+
# Check authentication on load
|
| 854 |
+
def check_auth_on_load(request: gr.Request):
|
| 855 |
+
if request:
|
| 856 |
+
# Special handling for HF Spaces OAuth
|
| 857 |
+
if 'SPACE_ID' in os.environ:
|
| 858 |
+
username = request.headers.get("HF-User")
|
| 859 |
+
if username:
|
| 860 |
+
user = db_manager.get_user_by_username(username)
|
| 861 |
+
if user:
|
| 862 |
+
print(f"User authenticated via HF Spaces OAuth: {username}")
|
| 863 |
+
return user
|
| 864 |
+
else:
|
| 865 |
+
# Standard token-based auth
|
| 866 |
+
user = auth_manager.check_login(request)
|
| 867 |
+
if user:
|
| 868 |
+
return user
|
| 869 |
+
return None
|
| 870 |
+
|
| 871 |
with gr.Tab("Submit Model"):
|
| 872 |
gr.Markdown(f"""
|
| 873 |
### Model Size Restrictions
|
|
|
|
| 885 |
|
| 886 |
check_size_button = gr.Button("Check Model Size")
|
| 887 |
size_check_result = gr.Markdown("")
|
| 888 |
+
model_name_input = gr.Textbox(
|
|
|
|
| 889 |
placeholder="Display name for your model",
|
| 890 |
label="Model Name"
|
| 891 |
)
|
|
|
|
| 920 |
|
| 921 |
submit_model_button = gr.Button("Submit for Evaluation")
|
| 922 |
submission_status = gr.Markdown("")
|
| 923 |
+
auth_message = gr.Markdown("")
|
| 924 |
|
| 925 |
with gr.Tab("Evaluation Queue"):
|
| 926 |
refresh_queue_button = gr.Button("Refresh Queue")
|
|
|
|
| 956 |
else:
|
| 957 |
return f"❌ {message}"
|
| 958 |
except Exception as e:
|
| 959 |
+
print(f"Model size check error: {e}")
|
| 960 |
+
import traceback
|
| 961 |
+
traceback.print_exc()
|
| 962 |
return f"Error checking model size: {str(e)}"
|
| 963 |
|
| 964 |
def refresh_benchmarks_handler():
|
| 965 |
benchmarks = db_manager.get_benchmarks()
|
| 966 |
+
|
| 967 |
# Format for dropdown - properly formatted to display names
|
| 968 |
choices = []
|
| 969 |
for b in benchmarks:
|
|
|
|
| 975 |
|
| 976 |
return gr.update(choices=choices)
|
| 977 |
|
| 978 |
+
def submit_model_handler(model_id, model_name, model_description, model_parameters, model_tag, benchmark_id, user):
|
| 979 |
# Check if user is logged in
|
|
|
|
|
|
|
| 980 |
if not user:
|
| 981 |
return "Please log in to submit a model."
|
| 982 |
|
|
|
|
| 1018 |
else:
|
| 1019 |
return message
|
| 1020 |
except Exception as e:
|
| 1021 |
+
print(f"Error submitting model: {str(e)}")
|
| 1022 |
+
import traceback
|
| 1023 |
+
traceback.print_exc()
|
| 1024 |
return f"Error submitting model: {str(e)}"
|
| 1025 |
|
| 1026 |
def refresh_queue_handler():
|
|
|
|
| 1054 |
|
| 1055 |
return stats, eval_data, "No evaluation currently running", "Progress: 0%"
|
| 1056 |
|
| 1057 |
+
# Update authentication status
|
| 1058 |
+
def update_auth_message(user):
|
| 1059 |
+
if user:
|
| 1060 |
+
return f"Logged in as {user['username']}"
|
| 1061 |
+
else:
|
| 1062 |
+
return "Please log in to submit a model."
|
| 1063 |
+
|
| 1064 |
# Connect event handlers
|
| 1065 |
check_size_button.click(
|
| 1066 |
fn=check_model_size_handler,
|
|
|
|
| 1082 |
model_description_input,
|
| 1083 |
model_parameters_input,
|
| 1084 |
model_tag_input,
|
| 1085 |
+
benchmark_dropdown,
|
| 1086 |
+
user_state
|
| 1087 |
],
|
| 1088 |
outputs=[submission_status]
|
| 1089 |
)
|
|
|
|
| 1095 |
)
|
| 1096 |
|
| 1097 |
# Initialize on load
|
| 1098 |
+
submission_ui.load(
|
| 1099 |
+
fn=check_auth_on_load,
|
| 1100 |
+
inputs=[],
|
| 1101 |
+
outputs=[user_state]
|
| 1102 |
+
)
|
| 1103 |
+
|
| 1104 |
+
submission_ui.load(
|
| 1105 |
+
fn=lambda user: update_auth_message(user),
|
| 1106 |
+
inputs=[user_state],
|
| 1107 |
+
outputs=[auth_message]
|
| 1108 |
+
)
|
| 1109 |
+
|
| 1110 |
submission_ui.load(
|
| 1111 |
fn=refresh_benchmarks_handler,
|
| 1112 |
inputs=[],
|