config_671B.json 503 B

12345678910111213141516171819202122
  1. {
  2. "vocab_size": 129280,
  3. "dim": 7168,
  4. "inter_dim": 18432,
  5. "moe_inter_dim": 2048,
  6. "n_layers": 61,
  7. "n_dense_layers": 3,
  8. "n_heads": 128,
  9. "n_routed_experts": 256,
  10. "n_shared_experts": 1,
  11. "n_activated_experts": 8,
  12. "n_expert_groups": 8,
  13. "n_limited_groups": 4,
  14. "route_scale": 2.5,
  15. "score_func": "sigmoid",
  16. "q_lora_rank": 1536,
  17. "kv_lora_rank": 512,
  18. "qk_nope_head_dim": 128,
  19. "qk_rope_head_dim": 64,
  20. "v_head_dim": 128,
  21. "dtype": "fp8"
  22. }