Naphula commited on
Commit
9895dce
·
verified ·
1 Parent(s): dc8730a

Upload 2 files

Browse files
Files changed (2) hide show
  1. textonly_ripper.md +57 -0
  2. textonly_ripper_v2.py +117 -0
textonly_ripper.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multimodal to Text-Only Model Converter
2
+
3
+ ## Overview
4
+
5
+ This Python script is a utility designed to convert a sharded, multimodal (text and vision) Mistral-based model into a text-only version. It achieves this by selectively removing the vision-related weights from the model's `safetensors` files and restructuring the remaining tensors to create a valid, language-only model.
6
+
7
+ This is particularly useful for adapting multimodal finetunes for tasks that only require the language model, such as merging with other text-based models (e.g., via SLERP) or for more efficient deployment in text-only environments.
8
+
9
+ ## Features
10
+
11
+ - **Handles Sharded Models**: Automatically processes models split across multiple `safetensors` files.
12
+ - **Targeted Weight Removal**: Removes tensors based on specific prefixes, targeting the vision tower and multimodal projector layers.
13
+ - **Tensor Renaming**: Correctly renames the language model tensors by stripping the multimodal prefix (e.g., `language_model.model...` becomes `model...`), ensuring compatibility with standard `MistralForCausalLM` architecture.
14
+ - **Automated Index Generation**: Creates a new, clean `model.safetensors.index.json` for the converted model.
15
+ - **Efficient Processing**: Skips creating new files for shards that contained only vision weights, saving disk space.
16
+
17
+ ## Prerequisites
18
+
19
+ - Python 3.6+
20
+ - PyTorch
21
+ - Safetensors
22
+
23
+ Install the required libraries using pip:
24
+ ```bash
25
+ pip install torch safetensors
26
+ ```
27
+
28
+ ## How to Use
29
+
30
+ 1. **Prepare Directories**:
31
+ - Have your original multimodal model in an input directory. This folder should contain the `model-*.safetensors` files and the `model.safetensors.index.json`.
32
+ - Create a new, empty directory where the converted text-only model will be saved.
33
+
34
+ 2. **Configure the Script**:
35
+ - Open the Python script (`vision_stripper.py` or your chosen name).
36
+ - Locate the `if __name__ == "__main__":` block at the bottom of the file.
37
+ - Set the `input_model_directory` variable to the path of your original multimodal model.
38
+ - Set the `output_model_directory` variable to the path of your new, empty output folder.
39
+
40
+ ```python
41
+ # --- Example Configuration ---
42
+ # On Windows, use raw strings (r"...") to avoid path errors
43
+ input_model_directory = r"C:\path\to\your\multimodal_model"
44
+ output_model_directory = r"C:\path\to\your\new_text_only_model"
45
+ ```
46
+
47
+ 3. **Run the Conversion**:
48
+ - Execute the script from your terminal:
49
+ ```bash
50
+ python vision_stripper.py
51
+ ```
52
+
53
+ 4. **Finalize Model Files**:
54
+ - After the script completes, copy any other necessary non-weight files (like `config.json`, `tokenizer_config.json`, `chat_template.jinja.txt`, etc.) to your new output directory.
55
+ - **Crucially**, ensure the `config.json` in the output directory is updated to reflect a text-only architecture (e.g., changing the `architectures` value to `["MistralForCausalLM"]` and removing the `vision_config` section).
56
+
57
+ The script will report its progress in the console, and upon completion, your output directory will contain the converted, text-only model, ready for use.
textonly_ripper_v2.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from safetensors import safe_open
3
+ from safetensors.torch import save_file
4
+ import os
5
+ import json
6
+ from collections import OrderedDict
7
+ import glob # Import the glob library to find files
8
+
9
+ def convert_multimodal_to_text_only(input_dir, output_dir):
10
+ """
11
+ Converts a sharded multimodal Mistral model to a text-only model.
12
+ This script can handle models with or without a 'model.safetensors.index.json' file.
13
+ """
14
+ try:
15
+ if not os.path.exists(output_dir):
16
+ os.makedirs(output_dir)
17
+ print(f"Created output directory: {output_dir}")
18
+
19
+ # --- Define the prefixes to handle ---
20
+ vision_prefixes_to_remove = ["vision_tower.", "multi_modal_projector."]
21
+ lm_prefix_to_rename = "language_model."
22
+
23
+ # --- Determine the list of shard files to process ---
24
+ index_file_path = os.path.join(input_dir, "model.safetensors.index.json")
25
+ shard_filenames = []
26
+
27
+ if os.path.exists(index_file_path):
28
+ print("Found 'model.safetensors.index.json'. Processing based on index.")
29
+ with open(index_file_path, 'r') as f:
30
+ index_data = json.load(f)
31
+ weight_map = index_data.get("weight_map", {})
32
+ # Get a unique, ordered list of filenames from the weight map
33
+ shard_filenames = sorted(list(set(weight_map.values())))
34
+ else:
35
+ print("No index file found. Searching for '*.safetensors' files directly.")
36
+ # Use glob to find all files ending with .safetensors
37
+ search_pattern = os.path.join(input_dir, '*.safetensors')
38
+ shard_paths = sorted(glob.glob(search_pattern))
39
+ if not shard_paths:
40
+ print(f"Error: No '.safetensors' files found in {input_dir}")
41
+ return
42
+ # Extract just the filenames from the full paths
43
+ shard_filenames = [os.path.basename(p) for p in shard_paths]
44
+
45
+ print(f"Found {len(shard_filenames)} model shards to process.")
46
+
47
+ # --- Process each shard ---
48
+ new_weight_map = OrderedDict()
49
+ total_original_size = 0
50
+ total_new_size = 0
51
+
52
+ for shard_filename in shard_filenames:
53
+ input_shard_path = os.path.join(input_dir, shard_filename)
54
+ output_shard_path = os.path.join(output_dir, shard_filename)
55
+
56
+ print(f"\nProcessing shard: {shard_filename}")
57
+
58
+ text_only_tensors = OrderedDict()
59
+ has_text_tensors = False
60
+
61
+ with safe_open(input_shard_path, framework="pt", device="cpu") as f:
62
+ for key in f.keys():
63
+ is_vision_tensor = any(key.startswith(p) for p in vision_prefixes_to_remove)
64
+
65
+ if is_vision_tensor:
66
+ continue
67
+
68
+ new_key = key
69
+ if key.startswith(lm_prefix_to_rename):
70
+ new_key = key[len(lm_prefix_to_rename):]
71
+
72
+ tensor = f.get_tensor(key)
73
+ text_only_tensors[new_key] = tensor
74
+ new_weight_map[new_key] = shard_filename
75
+ has_text_tensors = True
76
+
77
+ if has_text_tensors:
78
+ print(f"Saving {len(text_only_tensors)} text-only tensors to: {shard_filename}")
79
+ save_file(text_only_tensors, output_shard_path)
80
+
81
+ original_size = os.path.getsize(input_shard_path)
82
+ new_size = os.path.getsize(output_shard_path)
83
+ total_original_size += original_size
84
+ total_new_size += new_size
85
+ print(f" - Original shard size: {original_size / (1024**2):.2f} MB")
86
+ print(f" - New shard size: {new_size / (1024**2):.2f} MB")
87
+ else:
88
+ print(f"Shard {shard_filename} contained only vision tensors and will be skipped.")
89
+
90
+ # --- Create the new index file for the text-only model ---
91
+ # It's good practice to create one, even if the original didn't have it.
92
+ new_index_data = {
93
+ "metadata": {
94
+ "total_size": total_new_size
95
+ },
96
+ "weight_map": new_weight_map
97
+ }
98
+ new_index_path = os.path.join(output_dir, "model.safetensors.index.json")
99
+ with open(new_index_path, 'w') as f:
100
+ json.dump(new_index_data, f, indent=2)
101
+
102
+ print(f"\nSuccessfully created new index file at: {new_index_path}")
103
+ print("\n--- Conversion Summary ---")
104
+ print(f"Total original model size: {total_original_size / (1024**3):.2f} GB")
105
+ print(f"Total new text-only model size: {total_new_size / (1024**3):.2f} GB")
106
+ print("Conversion complete!")
107
+
108
+ except Exception as e:
109
+ print(f"An error occurred: {e}")
110
+
111
+ if __name__ == "__main__":
112
+ # --- Configuration ---
113
+ input_model_directory = r"A:\LLM\.cache\huggingface\hub\test"
114
+ output_model_directory = r"A:\LLM\.cache\huggingface\hub\test\fix"
115
+
116
+ # --- Run the script ---
117
+ convert_multimodal_to_text_only(input_model_directory, output_model_directory)