SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

lirannoc commited on Jul 3

Commit

27cf97b

·

verified ·

1 Parent(s): 0948651

Upload config.json

Files changed (1) hide show

config.json +44 -0

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "super_linear",
+  "architectures": [
+    "SuperLinearForCausalLM"
+  ],
+"auto_map": {
+  "AutoConfig": "configuration_super_linear.SuperLinearConfig",
+  "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
+},
+  "auto_regressive": 1,
+  "d_model": 128,
+  "dropout": 0.0,
+  "fft_len": 5000,
+  "freeze_experts": 1,
+  "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
+  "inf_pred_len": 96,
+  "layer_type": "RLinear",
+  "linear_checkpoints_dir": "checkpoints5",
+  "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
+  "load_linear": 0,
+  "load_weights" :0,
+  "max_horizon": 96,
+  "mlp_gating": 0,
+  "model_type": "super_linear",
+  "moe": 1,
+  "moe_n_experts": 12,
+  "moe_temp": 1,
+  "noisy_gating_std": 0.1,
+  "noisy_gating_std_decay": 1,
+  "pred_len": 96,
+  "seq_len": 512,
+  "moe_norm": 0,
+  "top_k_experts": 10,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "use_fft": 1,
+  "train_epochs": 30,
+  "patience": 5,
+  "lradj": "type1",
+  "learning_rate": 0.005,
+  "channel_ind": 0,
+  "misc_moe" :12,
+  "full_size":0
+}