File size: 1,629 Bytes
789179e
 
 
 
ba05c4f
789179e
 
c4ce38b
789179e
 
 
98735f0
c4ce38b
789179e
 
c4ce38b
789179e
 
 
 
c4ce38b
789179e
ba05c4f
 
 
 
 
 
 
 
 
789179e
c4ce38b
789179e
ba05c4f
 
 
789179e
 
 
 
 
 
 
ba05c4f
 
 
 
 
789179e
 
 
ba05c4f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import sys
import os
from safetensors.torch import save_file
import json
import torch

# Add the directory containing your modeling.py and configuration.py to the Python path
model_dir = "/Users/Goekdeniz.Guelmez@computacenter.com/Library/CloudStorage/OneDrive-COMPUTACENTER/Desktop/MiniMax01Text-Dev"
sys.path.append(model_dir)

# Import your custom model and configuration classes
from modeling_minimax import MiniMaxForCausalLM
from configuration_minimax import MiniMaxConfig

# Load the configuration
config_path = os.path.join(model_dir, "config.json")
with open(config_path, 'r') as f:
    config_dict = json.load(f)

# Create the configuration object
config = MiniMaxConfig(**config_dict)

# Print attention layout info
if getattr(config, "linear_attention", False):
    print("Using linear attention layout from config.")
else:
    print("Using full attention layout from config.")

# Set random seed for reproducibility
torch.manual_seed(42)

# Create the model
small_model = MiniMaxForCausalLM(config)

# Set model to evaluation mode
small_model.eval()

# Print parameter count to verify
param_count = sum(p.numel() for p in small_model.parameters())
print(f"Model has {param_count:,} parameters")

# Convert model to state dict
model_state_dict = small_model.state_dict()

# Save the config used for reproducibility
used_config_path = os.path.join(model_dir, "config.used.json")
with open(used_config_path, 'w') as f:
    json.dump(config_dict, f, indent=2)

# Save as safetensors
save_file(model_state_dict, os.path.join(model_dir, "model.safetensors"))

print("Model saved in safetensors format")

print(small_model)