File size: 1,795 Bytes
			
			| 4930327 da5eb8f 4930327 44cefa6 4930327 44cefa6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | {
  "architectures": [
    "MiniMaxM2ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "attn_type_list": [
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1
  ],
  "bos_token_id": null,
  "eos_token_id": null,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 1536,
  "layernorm_full_attention_beta": 1.0,
  "layernorm_linear_attention_beta": 1.0,
  "layernorm_mlp_beta": 1.0,
  "max_position_embeddings": 196608,
  "mlp_intermediate_size": 8192,
  "model_type": "minimax",
  "mtp_transformer_layers": 1,
  "num_attention_heads": 48,
  "num_experts_per_tok": 8,
  "num_hidden_layers": 62,
  "num_key_value_heads": 8,
  "num_local_experts": 256,
  "num_mtp_modules": 3,
  "output_router_logits": false,
  "qk_norm_type": "per_layer",
  "quantization_config": {
    "activation_scheme": "dynamic",
    "fmt": "float8_e4m3fn",
    "quant_method": "fp8",
    "weight_block_size": [
      128,
      128
    ]
  },
  "rms_norm_eps": 1e-06,
  "rope_theta": 5000000,
  "rotary_dim": 64,
  "router_aux_loss_coef": 0.001,
  "router_jitter_noise": 0.0,
  "scoring_func": "sigmoid",
  "shared_intermediate_size": 0,
  "shared_moe_mode": "sigmoid",
  "sliding_window": null,
  "tie_word_embeddings": false,
  "transformers_version": "4.46.1",
  "use_cache": true,
  "use_mtp": true,
  "use_qk_norm": true,
  "use_routing_bias": true,
  "vocab_size": 200064
} | 
