SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

lirannoc commited on Jul 14

Commit

5086864

·

verified ·

1 Parent(s): 4f95ef6

Update config.json

Files changed (1) hide show

config.json +1 -25

config.json CHANGED Viewed

@@ -11,20 +11,14 @@
   "_comment_model_architecture": "Model architecture parameters",
   "train_seq_len": 512,
   "train_pred_len": 96,
-  "seq_len": 512,
-  "pred_len": 96,
-  "inf_pred_len": 96,
-  "max_horizon": 96,
-  "auto_regressive": 1,
   "_comment_moe": "MoE (Mixture of Experts) parameters",
-  "moe_n_experts": 4,
   "top_k_experts": 12,
   "noisy_gating_std": 0.1,
   "moe_temp": 1.0,
   "moe_norm": false,
   "layer_type": "RLinear",
-  "n_experts": 4,
   "comp_moe": 12,
   "freeze_experts": true,
@@ -35,27 +29,9 @@
   "_comment_experts": "Expert configuration",
   "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
-  "_comment_loading": "Model loading and saving parameters",
-  "load_linear": true,
-  "load_weights_full": true,
-  "linear_freq_weights_path": "./weights/linear_freq_weights/",
-  "full_weights_path": "./weights/full_weights/checkpoint.pth",
   "_comment_training": "Training parameters",
   "resample_long_lookback": false,
-  "_comment_legacy": "Legacy parameters for backward compatibility",
-  "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
-  "linear_checkpoints_dir": "checkpoints5",
-  "manual_moe": 0,
-  "misc_moe": 1,
-  "noisy_gating_std_decay": 1,
-  "ker_len": 50,
-  "con": 0,
-  "d_model": 512,
-  "mlp_gating": 1,
-  "dropout": 0.0,
   "_comment_system": "System and framework parameters",
   "model_type": "super_linear",
   "torch_dtype": "float32",

   "_comment_model_architecture": "Model architecture parameters",
   "train_seq_len": 512,
   "train_pred_len": 96,
   "_comment_moe": "MoE (Mixture of Experts) parameters",
+  "n_experts": 4,
   "top_k_experts": 12,
   "noisy_gating_std": 0.1,
   "moe_temp": 1.0,
   "moe_norm": false,
   "layer_type": "RLinear",
   "comp_moe": 12,
   "freeze_experts": true,
   "_comment_experts": "Expert configuration",
   "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
   "_comment_training": "Training parameters",
   "resample_long_lookback": false,
   "_comment_system": "System and framework parameters",
   "model_type": "super_linear",
   "torch_dtype": "float32",