Spaces:
Running
Running
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import os | |
| # --- YOU MUST UPDATE THESE TWO PATHS --- | |
| # Path to the directory where your FP32 model is stored locally | |
| input_dir = "A:\LLM\.cache\huggingface\hub\models--wzhouad--gemma-2-9b-it-WPO-HB" | |
| # Path to the directory where the converted FP16 model will be saved | |
| output_dir = "A:\LLM\.cache\huggingface\hub\models--wzhouad--gemma-2-9b-it-WPO-HB_FP16" | |
| # ------------------------------------- | |
| # Make sure the output directory exists | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| # Load the tokenizer from the local path | |
| print(f"Loading tokenizer from {input_dir}...") | |
| tokenizer = AutoTokenizer.from_pretrained(input_dir) | |
| # Load the model in FP32 from the local path | |
| print(f"Loading FP32 model from {input_dir}...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| input_dir, | |
| torch_dtype=torch.float32, | |
| device_map="cpu" | |
| # device_map="auto" # use this if you have enough GPU VRAM | |
| ) | |
| # Convert the model to FP16 and save it to the new local directory | |
| print("Converting model to FP16 and saving to disk...") | |
| model.half().save_pretrained( | |
| output_dir, | |
| safe_serialization=True, | |
| max_shard_size="5GB" | |
| ) | |
| tokenizer.save_pretrained(output_dir) | |
| print(f"Model successfully converted and saved to {output_dir}") | |
| print("You can now use this new FP16 model in your mergekit config.yaml.") |