config_236B.json 455 B

1234567891011121314151617181920
  1. {
  2. "vocab_size": 102400,
  3. "dim": 5120,
  4. "inter_dim": 12288,
  5. "moe_inter_dim": 1536,
  6. "n_layers": 60,
  7. "n_dense_layers": 1,
  8. "n_heads": 128,
  9. "n_routed_experts": 160,
  10. "n_shared_experts": 2,
  11. "n_activated_experts": 6,
  12. "n_expert_groups": 8,
  13. "n_limited_groups": 3,
  14. "route_scale": 16.0,
  15. "q_lora_rank": 1536,
  16. "kv_lora_rank": 512,
  17. "qk_nope_head_dim": 128,
  18. "qk_rope_head_dim": 64,
  19. "v_head_dim": 128
  20. }