config_16B.json 417 B

12345678910111213141516171819
  1. {
  2. "vocab_size": 102400,
  3. "dim": 2048,
  4. "inter_dim": 10944,
  5. "moe_inter_dim": 1408,
  6. "n_layers": 27,
  7. "n_dense_layers": 1,
  8. "n_heads": 16,
  9. "n_routed_experts": 64,
  10. "n_shared_experts": 2,
  11. "n_activated_experts": 6,
  12. "route_scale": 1.0,
  13. "q_lora_rank": 0,
  14. "kv_lora_rank": 512,
  15. "qk_nope_head_dim": 128,
  16. "qk_rope_head_dim": 64,
  17. "v_head_dim": 128,
  18. "mscale": 0.707
  19. }