lirannoc commited on
Commit
db751a5
·
verified ·
1 Parent(s): 27cf97b

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +52 -32
config.json CHANGED
@@ -3,42 +3,62 @@
3
  "architectures": [
4
  "SuperLinearForCausalLM"
5
  ],
6
- "auto_map": {
7
- "AutoConfig": "configuration_super_linear.SuperLinearConfig",
8
- "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
9
- },
10
- "auto_regressive": 1,
11
- "d_model": 128,
12
- "dropout": 0.0,
13
- "fft_len": 5000,
14
- "freeze_experts": 1,
15
- "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
16
  "inf_pred_len": 96,
17
- "layer_type": "RLinear",
18
- "linear_checkpoints_dir": "checkpoints5",
19
- "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
20
- "load_linear": 0,
21
- "load_weights" :0,
22
  "max_horizon": 96,
23
- "mlp_gating": 0,
24
- "model_type": "super_linear",
 
25
  "moe": 1,
26
- "moe_n_experts": 12,
27
- "moe_temp": 1,
28
  "noisy_gating_std": 0.1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "noisy_gating_std_decay": 1,
30
- "pred_len": 96,
31
- "seq_len": 512,
32
- "moe_norm": 0,
33
- "top_k_experts": 10,
 
 
 
 
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.40.1",
36
- "use_fft": 1,
37
- "train_epochs": 30,
38
- "patience": 5,
39
- "lradj": "type1",
40
- "learning_rate": 0.005,
41
- "channel_ind": 0,
42
- "misc_moe" :12,
43
- "full_size":0
44
  }
 
3
  "architectures": [
4
  "SuperLinearForCausalLM"
5
  ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_super_linear.SuperLinearConfig",
8
+ "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
9
+ },
10
+
11
+ "_comment_model_architecture": "Model architecture parameters",
12
+ "train_seq_len": 512,
13
+ "train_pred_len": 96,
14
+ "seq_len": 512,
15
+ "pred_len": 96,
16
  "inf_pred_len": 96,
 
 
 
 
 
17
  "max_horizon": 96,
18
+ "auto_regressive": 1,
19
+
20
+ "_comment_moe": "MoE (Mixture of Experts) parameters",
21
  "moe": 1,
22
+ "moe_n_experts": 4,
23
+ "top_k_experts": 12,
24
  "noisy_gating_std": 0.1,
25
+ "moe_temp": 1.0,
26
+ "moe_norm": false,
27
+ "layer_type": "RLinear",
28
+ "n_experts": 4,
29
+ "comp_moe": 12,
30
+ "freeze_experts": true,
31
+
32
+ "_comment_fft": "FFT-based gating parameters",
33
+ "use_fft": true,
34
+ "fft_len": 5000,
35
+
36
+ "_comment_experts": "Expert configuration",
37
+ "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
38
+
39
+ "_comment_loading": "Model loading and saving parameters",
40
+ "load_linear": true,
41
+ "load_weights_full": true,
42
+ "linear_freq_weights_path": "./weights/linear_freq_weights/",
43
+ "full_weights_path": "./weights/full_weights/checkpoint.pth",
44
+
45
+ "_comment_training": "Training parameters",
46
+ "resample_long_lookback": false,
47
+
48
+ "_comment_legacy": "Legacy parameters for backward compatibility",
49
+ "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
50
+ "linear_checkpoints_dir": "checkpoints5",
51
+ "manual_moe": 0,
52
+ "misc_moe": 1,
53
  "noisy_gating_std_decay": 1,
54
+ "ker_len": 50,
55
+ "con": 0,
56
+ "d_model": 512,
57
+ "mlp_gating": 1,
58
+ "dropout": 0.0,
59
+
60
+ "_comment_system": "System and framework parameters",
61
+ "model_type": "super_linear",
62
  "torch_dtype": "float32",
63
+ "transformers_version": "4.40.1"
 
 
 
 
 
 
 
 
64
  }