Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,22 +13,25 @@ from torchao.quantization import (
|
|
| 13 |
Int8DynamicActivationInt8WeightConfig,
|
| 14 |
Float8WeightOnlyConfig,
|
| 15 |
Float8DynamicActivationFloat8WeightConfig,
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
MAP_QUANT_TYPE_TO_NAME = {
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
-
"
|
| 23 |
-
"
|
|
|
|
| 24 |
"autoquant": "autoquant",
|
| 25 |
}
|
| 26 |
MAP_QUANT_TYPE_TO_CONFIG = {
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
"
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
|
|
@@ -56,8 +59,7 @@ def check_model_exists(
|
|
| 56 |
repo_name = f"{username}/{quantized_model_name}"
|
| 57 |
else:
|
| 58 |
if (
|
| 59 |
-
quantization_type
|
| 60 |
-
or quantization_type == "int8_weight_only"
|
| 61 |
) and (group_size is not None):
|
| 62 |
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
|
| 63 |
else:
|
|
@@ -173,13 +175,13 @@ def quantize_model(
|
|
| 173 |
print(f"Quantizing model: {quantization_type}")
|
| 174 |
progress(0, desc="Preparing Quantization")
|
| 175 |
if (
|
| 176 |
-
quantization_type == "
|
| 177 |
):
|
| 178 |
quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
|
| 179 |
group_size=group_size
|
| 180 |
)
|
| 181 |
quantization_config = TorchAoConfig(quant_config)
|
| 182 |
-
elif quantization_type == "
|
| 183 |
from torchao.dtypes import Int4CPULayout
|
| 184 |
|
| 185 |
quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
|
|
@@ -233,8 +235,7 @@ def save_model(
|
|
| 233 |
repo_name = f"{username}/{quantized_model_name}"
|
| 234 |
else:
|
| 235 |
if (
|
| 236 |
-
quantization_type
|
| 237 |
-
or quantization_type == "int8_weight_only"
|
| 238 |
) and (group_size is not None):
|
| 239 |
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
|
| 240 |
else:
|
|
@@ -318,7 +319,7 @@ def quantize_and_save(
|
|
| 318 |
return """
|
| 319 |
<div class="error-box">
|
| 320 |
<h3>β Group Size Error</h3>
|
| 321 |
-
<p>Group Size is a
|
| 322 |
</div>
|
| 323 |
"""
|
| 324 |
|
|
@@ -492,11 +493,12 @@ with gr.Blocks(css=css) as demo:
|
|
| 492 |
quantization_type = gr.Dropdown(
|
| 493 |
info="Select the Quantization method",
|
| 494 |
choices=[
|
| 495 |
-
"
|
| 496 |
-
"
|
| 497 |
-
"
|
| 498 |
-
"
|
| 499 |
-
"
|
|
|
|
| 500 |
"autoquant",
|
| 501 |
],
|
| 502 |
value="int8_weight_only",
|
|
@@ -549,11 +551,18 @@ with gr.Blocks(css=css) as demo:
|
|
| 549 |
## π Quantization Options
|
| 550 |
|
| 551 |
### Quantization Types
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
- **autoquant**: automatic quantization (uses the best quantization method for the model)
|
| 558 |
|
| 559 |
### Group Size
|
|
|
|
| 13 |
Int8DynamicActivationInt8WeightConfig,
|
| 14 |
Float8WeightOnlyConfig,
|
| 15 |
Float8DynamicActivationFloat8WeightConfig,
|
| 16 |
+
GemliteUIntXWeightOnlyConfig,
|
| 17 |
)
|
| 18 |
|
| 19 |
MAP_QUANT_TYPE_TO_NAME = {
|
| 20 |
+
"Int4WeightOnly": "int4wo",
|
| 21 |
+
"GemliteUIntXWeightOnly": "intxwo-gemlite"
|
| 22 |
+
"Int8WeightOnly": "int8wo",
|
| 23 |
+
"Int8DynamicActivationInt8Weight": "int8da8w8",
|
| 24 |
+
"Float8WeightOnly": "float8wo",
|
| 25 |
+
"Float8DynamicActivationFloat8Weight": "float8da8w8",
|
| 26 |
"autoquant": "autoquant",
|
| 27 |
}
|
| 28 |
MAP_QUANT_TYPE_TO_CONFIG = {
|
| 29 |
+
"Int4WeightOnly": Int4WeightOnlyConfig,
|
| 30 |
+
"GemliteUIntXWeightOnly": GemliteUIntXWeightOnlyConfig,
|
| 31 |
+
"Int8WeightOnly": Int8WeightOnlyConfig,
|
| 32 |
+
"Int8DynamicActivationInt8Weight": Int8DynamicActivationInt8WeightConfig,
|
| 33 |
+
"Float8WeightOnly": Float8WeightOnlyConfig,
|
| 34 |
+
"Float8DynamicActivationFloat8Weight": Float8DynamicActivationFloat8WeightConfig,
|
| 35 |
}
|
| 36 |
|
| 37 |
|
|
|
|
| 59 |
repo_name = f"{username}/{quantized_model_name}"
|
| 60 |
else:
|
| 61 |
if (
|
| 62 |
+
quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
|
|
|
|
| 63 |
) and (group_size is not None):
|
| 64 |
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
|
| 65 |
else:
|
|
|
|
| 175 |
print(f"Quantizing model: {quantization_type}")
|
| 176 |
progress(0, desc="Preparing Quantization")
|
| 177 |
if (
|
| 178 |
+
quantization_type == "GemliteUIntXWeightOnly"
|
| 179 |
):
|
| 180 |
quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
|
| 181 |
group_size=group_size
|
| 182 |
)
|
| 183 |
quantization_config = TorchAoConfig(quant_config)
|
| 184 |
+
elif quantization_type == "Int4WeightOnly":
|
| 185 |
from torchao.dtypes import Int4CPULayout
|
| 186 |
|
| 187 |
quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
|
|
|
|
| 235 |
repo_name = f"{username}/{quantized_model_name}"
|
| 236 |
else:
|
| 237 |
if (
|
| 238 |
+
quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
|
|
|
|
| 239 |
) and (group_size is not None):
|
| 240 |
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
|
| 241 |
else:
|
|
|
|
| 319 |
return """
|
| 320 |
<div class="error-box">
|
| 321 |
<h3>β Group Size Error</h3>
|
| 322 |
+
<p>Group Size is a parameter for Int4WeightOnly or GemliteUIntXWeightOnly</p>
|
| 323 |
</div>
|
| 324 |
"""
|
| 325 |
|
|
|
|
| 493 |
quantization_type = gr.Dropdown(
|
| 494 |
info="Select the Quantization method",
|
| 495 |
choices=[
|
| 496 |
+
"Int4WeightOnly",
|
| 497 |
+
"GemliteUIntXWeightOnly"
|
| 498 |
+
"Int8WeightOnly",
|
| 499 |
+
"Int8DynamicActivationInt8Weight",
|
| 500 |
+
"Float8WeightOnly",
|
| 501 |
+
"Float8DynamicActivationFloat8Weight",
|
| 502 |
"autoquant",
|
| 503 |
],
|
| 504 |
value="int8_weight_only",
|
|
|
|
| 551 |
## π Quantization Options
|
| 552 |
|
| 553 |
### Quantization Types
|
| 554 |
+
"Int4WeightOnly",
|
| 555 |
+
"GemliteUIntXWeightOnly"
|
| 556 |
+
"Int8WeightOnly",
|
| 557 |
+
"Int8DynamicActivationInt8Weight",
|
| 558 |
+
"Float8WeightOnly",
|
| 559 |
+
"Float8DynamicActivationFloat8Weight",
|
| 560 |
+
- **Int4WeightOnly**: 4-bit weight-only quantization
|
| 561 |
+
- **GemliteUIntXWeightOnly**: uintx gemlite quantization (default to 4 bit only for now)
|
| 562 |
+
- **Int8WeightOnly**: 8-bit weight-only quantization
|
| 563 |
+
- **Int8DynamicActivationInt8Weight**: 8-bit quantization for both weights and activations
|
| 564 |
+
- **Float8WeightOnly**: float8-bit weight-only quantization
|
| 565 |
+
- **Float8DynamicActivationFloat8Weight**: float8-bit quantization for both weights and activations
|
| 566 |
- **autoquant**: automatic quantization (uses the best quantization method for the model)
|
| 567 |
|
| 568 |
### Group Size
|