Update app.py
Browse files
app.py
CHANGED
|
@@ -207,10 +207,9 @@ def quantize(model_path, repo_id, quant_method=None):
|
|
| 207 |
|
| 208 |
return final_path
|
| 209 |
|
| 210 |
-
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods
|
| 211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
| 212 |
-
readme_template = """---
|
| 213 |
-
tags:
|
| 214 |
- autotrain
|
| 215 |
- text-generation-inference
|
| 216 |
- text-generation
|
|
@@ -236,32 +235,6 @@ datasets:
|
|
| 236 |
- created_at: {created_at}
|
| 237 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
| 238 |
|
| 239 |
-
## Usage:
|
| 240 |
-
```python
|
| 241 |
-
|
| 242 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 243 |
-
|
| 244 |
-
model_path = "{username}/{repo_name}"
|
| 245 |
-
|
| 246 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 247 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 248 |
-
model_path,
|
| 249 |
-
device_map="auto",
|
| 250 |
-
torch_dtype='auto'
|
| 251 |
-
).eval()
|
| 252 |
-
|
| 253 |
-
# Prompt content: "hi"
|
| 254 |
-
messages = [
|
| 255 |
-
{"role": "user", "content": "hi"}
|
| 256 |
-
]
|
| 257 |
-
|
| 258 |
-
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
|
| 259 |
-
output_ids = model.generate(input_ids.to('cuda'))
|
| 260 |
-
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
|
| 261 |
-
|
| 262 |
-
# Model response: "Hello! How can I assist you today?"
|
| 263 |
-
print(response)
|
| 264 |
-
```
|
| 265 |
""".format(
|
| 266 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
| 267 |
base_model_name=base_model_name,
|
|
@@ -269,7 +242,6 @@ print(response)
|
|
| 269 |
repo_name=repo_name,
|
| 270 |
quant_methods=quant_methods,
|
| 271 |
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
| 272 |
-
username=username
|
| 273 |
)
|
| 274 |
|
| 275 |
with open(readme_path, "w") as f:
|
|
@@ -288,8 +260,11 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
| 288 |
"""
|
| 289 |
try:
|
| 290 |
current_logs.clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
login(hf_token)
|
| 292 |
-
os.environ["HF_TOKEN"] = hf_token
|
| 293 |
api = HfApi(token=hf_token)
|
| 294 |
username = api.whoami()["name"]
|
| 295 |
|
|
@@ -312,7 +287,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
| 312 |
for quant_method in quant_methods:
|
| 313 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
| 314 |
|
| 315 |
-
create_readme(repo_name, base_model_name, lora_model_name, quant_methods
|
| 316 |
|
| 317 |
# 上传合并后的模型和量化模型
|
| 318 |
api.upload_large_folder(
|
|
@@ -332,6 +307,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
| 332 |
except Exception as e:
|
| 333 |
error_message = f"Error during processing: {e}"
|
| 334 |
log(error_message)
|
|
|
|
| 335 |
return "\n".join(current_logs)
|
| 336 |
|
| 337 |
@timeit
|
|
@@ -368,7 +344,7 @@ def create_ui():
|
|
| 368 |
hf_token = gr.Textbox(
|
| 369 |
label="Hugging Face Token",
|
| 370 |
placeholder="Enter your Hugging Face Token",
|
| 371 |
-
value=
|
| 372 |
)
|
| 373 |
convert_btn = gr.Button("Start Conversion", variant="primary")
|
| 374 |
with gr.Column():
|
|
|
|
| 207 |
|
| 208 |
return final_path
|
| 209 |
|
| 210 |
+
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods):
|
| 211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
| 212 |
+
readme_template = """---tags:
|
|
|
|
| 213 |
- autotrain
|
| 214 |
- text-generation-inference
|
| 215 |
- text-generation
|
|
|
|
| 235 |
- created_at: {created_at}
|
| 236 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
""".format(
|
| 239 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
| 240 |
base_model_name=base_model_name,
|
|
|
|
| 242 |
repo_name=repo_name,
|
| 243 |
quant_methods=quant_methods,
|
| 244 |
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
with open(readme_path, "w") as f:
|
|
|
|
| 260 |
"""
|
| 261 |
try:
|
| 262 |
current_logs.clear()
|
| 263 |
+
if hf_token.strip().lower() == "auto":
|
| 264 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 265 |
+
elif hf_token.startswith("hf_"):
|
| 266 |
+
os.environ["HF_TOKEN"] = hf_token
|
| 267 |
login(hf_token)
|
|
|
|
| 268 |
api = HfApi(token=hf_token)
|
| 269 |
username = api.whoami()["name"]
|
| 270 |
|
|
|
|
| 287 |
for quant_method in quant_methods:
|
| 288 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
| 289 |
|
| 290 |
+
create_readme(repo_name, base_model_name, lora_model_name, quant_methods)
|
| 291 |
|
| 292 |
# 上传合并后的模型和量化模型
|
| 293 |
api.upload_large_folder(
|
|
|
|
| 307 |
except Exception as e:
|
| 308 |
error_message = f"Error during processing: {e}"
|
| 309 |
log(error_message)
|
| 310 |
+
raise e
|
| 311 |
return "\n".join(current_logs)
|
| 312 |
|
| 313 |
@timeit
|
|
|
|
| 344 |
hf_token = gr.Textbox(
|
| 345 |
label="Hugging Face Token",
|
| 346 |
placeholder="Enter your Hugging Face Token",
|
| 347 |
+
value="Auto"
|
| 348 |
)
|
| 349 |
convert_btn = gr.Button("Start Conversion", variant="primary")
|
| 350 |
with gr.Column():
|