lvkaokao
commited on
Commit
·
cf7af95
1
Parent(s):
a16a56e
update model size.
Browse files- src/submission/check_validity.py +53 -0
- src/submission/submit.py +4 -17
src/submission/check_validity.py
CHANGED
|
@@ -90,6 +90,59 @@ def get_model_size(model_info: ModelInfo, precision: str):
|
|
| 90 |
# model_size = size_factor * model_size
|
| 91 |
return model_size
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def get_model_arch(model_info: ModelInfo):
|
| 94 |
return model_info.config.get("architectures", "Unknown")
|
| 95 |
|
|
|
|
| 90 |
# model_size = size_factor * model_size
|
| 91 |
return model_size
|
| 92 |
|
| 93 |
+
KNOWN_SIZE_FACTOR = {
|
| 94 |
+
"gptq": {"4bit": 8, "8bit": 4},
|
| 95 |
+
"awq": {"4bit": 8},
|
| 96 |
+
"bitsandbytes": {"4bit": 2}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
BYTES = {
|
| 100 |
+
"I32": 4,
|
| 101 |
+
"F16": 2,
|
| 102 |
+
"BF16": 2,
|
| 103 |
+
"F32": 4,
|
| 104 |
+
"U8": 1}
|
| 105 |
+
|
| 106 |
+
def get_quantized_model_parameters_memory(model_info: ModelInfo, quant_method="", bits="4bit"):
|
| 107 |
+
try:
|
| 108 |
+
safetensors = get_safetensors_metadata(model_info.id)
|
| 109 |
+
num_parameters = 0
|
| 110 |
+
mem = 0
|
| 111 |
+
for key in safetensors.parameter_count:
|
| 112 |
+
mem += safetensors.parameter_count[key] * BYTES[key]
|
| 113 |
+
|
| 114 |
+
if key in ["I32", "U8"]:
|
| 115 |
+
num_parameters += safetensors.parameter_count[key] * KNOWN_SIZE_FACTOR[quant_method][bits]
|
| 116 |
+
params_b = round(num_parameters / 1e9, 2)
|
| 117 |
+
size_gb = round(mem / 1e9,2)
|
| 118 |
+
return params_b, size_gb
|
| 119 |
+
except Exception as e:
|
| 120 |
+
print(str(e))
|
| 121 |
+
|
| 122 |
+
filenames = [sib.rfilename for sib in model_info.siblings]
|
| 123 |
+
if "pytorch_model.bin" in filenames:
|
| 124 |
+
url = hf_hub_url(model_info.id, filename="pytorch_model.bin")
|
| 125 |
+
meta = get_hf_file_metadata(url)
|
| 126 |
+
params_b = round(meta.size * 2 / 1e9, 2)
|
| 127 |
+
size_gb = round(meta.size / 1e9, 2)
|
| 128 |
+
return params_b, size_gb
|
| 129 |
+
|
| 130 |
+
if "pytorch_model.bin.index.json" in filenames:
|
| 131 |
+
index_path = hf_hub_download(model_info.id, filename="pytorch_model.bin.index.json")
|
| 132 |
+
"""
|
| 133 |
+
{
|
| 134 |
+
"metadata": {
|
| 135 |
+
"total_size": 28272820224
|
| 136 |
+
},....
|
| 137 |
+
"""
|
| 138 |
+
size = json.load(open(index_path))
|
| 139 |
+
bytes_per_param = 2
|
| 140 |
+
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
| 141 |
+
return round(size["metadata"]["total_size"] / bytes_per_param / 1e9, 2), \
|
| 142 |
+
round(size["metadata"]["total_size"] / 1e9, 2)
|
| 143 |
+
|
| 144 |
+
return None, None
|
| 145 |
+
|
| 146 |
def get_model_arch(model_info: ModelInfo):
|
| 147 |
return model_info.config.get("architectures", "Unknown")
|
| 148 |
|
src/submission/submit.py
CHANGED
|
@@ -11,6 +11,7 @@ from src.submission.check_validity import (
|
|
| 11 |
already_submitted_models,
|
| 12 |
check_model_card,
|
| 13 |
get_model_size,
|
|
|
|
| 14 |
is_model_on_hub,
|
| 15 |
is_gguf_on_hub,
|
| 16 |
user_submission_permission,
|
|
@@ -95,10 +96,6 @@ def add_new_eval(
|
|
| 95 |
except Exception:
|
| 96 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
| 97 |
|
| 98 |
-
|
| 99 |
-
# ToDo: need to chek
|
| 100 |
-
model_size = get_model_size(model_info=model_info, precision=precision)
|
| 101 |
-
|
| 102 |
# Were the model card and license filled?
|
| 103 |
try:
|
| 104 |
if model_info.cardData is None:
|
|
@@ -146,15 +143,9 @@ def add_new_eval(
|
|
| 146 |
if quant_type is None or quant_type == "":
|
| 147 |
return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
if precision == "4bit":
|
| 153 |
-
model_size = model_params * 0.5
|
| 154 |
-
|
| 155 |
-
if precision == "8bit":
|
| 156 |
-
model_size = model_params
|
| 157 |
-
|
| 158 |
|
| 159 |
if quant_type == "llama.cpp":
|
| 160 |
hardware = "cpu"
|
|
@@ -163,9 +154,6 @@ def add_new_eval(
|
|
| 163 |
else:
|
| 164 |
hardware = "gpu"
|
| 165 |
|
| 166 |
-
# model = "/dataset/llama3_8b_instruct-chat-autoround-w4g128-gpu"
|
| 167 |
-
# all on gpu
|
| 168 |
-
# hardware = "gpu"
|
| 169 |
if hardware == "gpu" and compute_dtype == "bfloat16":
|
| 170 |
compute_dtype = "float16"
|
| 171 |
|
|
@@ -201,7 +189,6 @@ def add_new_eval(
|
|
| 201 |
"created_at": created_at
|
| 202 |
}
|
| 203 |
print(eval_entry)
|
| 204 |
-
print(supplementary_info)
|
| 205 |
|
| 206 |
# ToDo: need open
|
| 207 |
# Check for duplicate submission
|
|
|
|
| 11 |
already_submitted_models,
|
| 12 |
check_model_card,
|
| 13 |
get_model_size,
|
| 14 |
+
get_quantized_model_parameters_memory,
|
| 15 |
is_model_on_hub,
|
| 16 |
is_gguf_on_hub,
|
| 17 |
user_submission_permission,
|
|
|
|
| 96 |
except Exception:
|
| 97 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
# Were the model card and license filled?
|
| 100 |
try:
|
| 101 |
if model_info.cardData is None:
|
|
|
|
| 143 |
if quant_type is None or quant_type == "":
|
| 144 |
return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
|
| 145 |
|
| 146 |
+
model_params, model_size = get_quantized_model_parameters_memory(model_info,
|
| 147 |
+
quant_method=quant_type.lower(),
|
| 148 |
+
bits=precision)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
if quant_type == "llama.cpp":
|
| 151 |
hardware = "cpu"
|
|
|
|
| 154 |
else:
|
| 155 |
hardware = "gpu"
|
| 156 |
|
|
|
|
|
|
|
|
|
|
| 157 |
if hardware == "gpu" and compute_dtype == "bfloat16":
|
| 158 |
compute_dtype = "float16"
|
| 159 |
|
|
|
|
| 189 |
"created_at": created_at
|
| 190 |
}
|
| 191 |
print(eval_entry)
|
|
|
|
| 192 |
|
| 193 |
# ToDo: need open
|
| 194 |
# Check for duplicate submission
|