Models
GitHub
Discord
Docs
Cloud
Sign in
Download
Models
Download
GitHub
Discord
Docs
Cloud
Sign in
cogito-2.1
:671b-q4_K_M
Updated
10 hours ago
The Cogito v2.1 LLMs are instruction tuned generative models. All models are released under MIT license for commercial use.
The Cogito v2.1 LLMs are instruction tuned generative models. All models are released under MIT license for commercial use.
Cancel
cloud
cogito-2.1:671b-q4_K_M
...
/
model
dfcfe3b03ced · 404GB
Metadata
general.architecture
deepseek2
deepseek2
general.file_type
Q4_K_M
Q4_K_M
deepseek2.attention.head_count
128
128
deepseek2.attention.head_count_kv
1
1
deepseek2.attention.key_length
576
576
deepseek2.attention.key_length_mla
192
192
deepseek2.attention.kv_lora_rank
512
512
deepseek2.attention.layer_norm_rms_epsilon
1e-06
1e-06
deepseek2.attention.q_lora_rank
1536
1536
deepseek2.attention.value_length
512
512
deepseek2.attention.value_length_mla
128
128
deepseek2.block_count
61
61
deepseek2.context_length
163840
163840
deepseek2.embedding_length
7168
7168
deepseek2.expert_count
256
256
deepseek2.expert_feed_forward_length
2048
2048
deepseek2.expert_gating_func
2
2
deepseek2.expert_group_count
8
8
deepseek2.expert_group_used_count
4
4
deepseek2.expert_shared_count
1
1
deepseek2.expert_used_count
8
8
deepseek2.expert_weights_norm
true
true
deepseek2.expert_weights_scale
2.5
2.5
deepseek2.feed_forward_length
18432
18432
deepseek2.leading_dense_block_count
3
3
deepseek2.rope.dimension_count
64
64
deepseek2.rope.freq_base
10000
10000
deepseek2.rope.scaling.factor
40
40
deepseek2.rope.scaling.original_context_length
4096
4096
deepseek2.rope.scaling.type
yarn
yarn
deepseek2.rope.scaling.yarn_log_multiplier
0.1
0.1
deepseek2.vocab_size
128815
128815
tokenizer.ggml.add_bos_token
true
true
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.add_sep_token
false
false
tokenizer.ggml.bos_token_id
0
0
tokenizer.ggml.eos_token_id
1
1
tokenizer.ggml.merges
[Ġ t, Ġ a, i n, Ġ Ġ, h e, ...]
[Ġ t, Ġ a, i n, Ġ Ġ, h e, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
2
2
tokenizer.ggml.pre
deepseek-v3
deepseek-v3
tokenizer.ggml.token_type
[3, 3, 3, 1, 1, ...]
[3, 3, 3, 1, 1, ...]
tokenizer.ggml.tokens
[<|begin▁of▁sentence|>, <|end▁of▁sentence|>, <|▁pad▁|>, !, ", ...]
[<|begin▁of▁sentence|>, <|end▁of▁sentence|>, <|▁pad▁|>, !, ", ...]
Tensor
Name
Type
Shape
token_embd.weight
Q4_K
Q4_K
[7168, 128815]
blk.0
blk.0.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.0.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.0.attn_kv_a_norm.weight
F32
F32
[512]
blk.0.attn_norm.weight
F32
F32
[7168]
blk.0.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.0.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.0.attn_q_a_norm.weight
F32
F32
[1536]
blk.0.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.0.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.0.ffn_down.weight
Q6_K
Q6_K
[18432, 7168]
blk.0.ffn_gate.weight
Q4_K
Q4_K
[7168, 18432]
blk.0.ffn_norm.weight
F32
F32
[7168]
blk.0.ffn_up.weight
Q4_K
Q4_K
[7168, 18432]
blk.1
blk.1.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.1.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.1.attn_kv_a_norm.weight
F32
F32
[512]
blk.1.attn_norm.weight
F32
F32
[7168]
blk.1.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.1.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.1.attn_q_a_norm.weight
F32
F32
[1536]
blk.1.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.1.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.1.ffn_down.weight
Q6_K
Q6_K
[18432, 7168]
blk.1.ffn_gate.weight
Q4_K
Q4_K
[7168, 18432]
blk.1.ffn_norm.weight
F32
F32
[7168]
blk.1.ffn_up.weight
Q4_K
Q4_K
[7168, 18432]
blk.2
blk.2.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.2.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.2.attn_kv_a_norm.weight
F32
F32
[512]
blk.2.attn_norm.weight
F32
F32
[7168]
blk.2.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.2.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.2.attn_q_a_norm.weight
F32
F32
[1536]
blk.2.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.2.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.2.ffn_down.weight
Q6_K
Q6_K
[18432, 7168]
blk.2.ffn_gate.weight
Q4_K
Q4_K
[7168, 18432]
blk.2.ffn_norm.weight
F32
F32
[7168]
blk.2.ffn_up.weight
Q4_K
Q4_K
[7168, 18432]
blk.3
blk.3.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.3.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.3.attn_kv_a_norm.weight
F32
F32
[512]
blk.3.attn_norm.weight
F32
F32
[7168]
blk.3.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.3.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.3.attn_q_a_norm.weight
F32
F32
[1536]
blk.3.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.3.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.3.exp_probs_b.bias
F32
F32
[256]
blk.3.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.3.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.3.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.3.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.3.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.3.ffn_norm.weight
F32
F32
[7168]
blk.3.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.3.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.4
blk.4.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.4.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.4.attn_kv_a_norm.weight
F32
F32
[512]
blk.4.attn_norm.weight
F32
F32
[7168]
blk.4.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.4.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.4.attn_q_a_norm.weight
F32
F32
[1536]
blk.4.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.4.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.4.exp_probs_b.bias
F32
F32
[256]
blk.4.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.4.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.4.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.4.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.4.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.4.ffn_norm.weight
F32
F32
[7168]
blk.4.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.4.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.5
blk.5.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.5.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.5.attn_kv_a_norm.weight
F32
F32
[512]
blk.5.attn_norm.weight
F32
F32
[7168]
blk.5.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.5.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.5.attn_q_a_norm.weight
F32
F32
[1536]
blk.5.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.5.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.5.exp_probs_b.bias
F32
F32
[256]
blk.5.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.5.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.5.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.5.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.5.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.5.ffn_norm.weight
F32
F32
[7168]
blk.5.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.5.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.6
blk.6.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.6.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.6.attn_kv_a_norm.weight
F32
F32
[512]
blk.6.attn_norm.weight
F32
F32
[7168]
blk.6.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.6.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.6.attn_q_a_norm.weight
F32
F32
[1536]
blk.6.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.6.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.6.exp_probs_b.bias
F32
F32
[256]
blk.6.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.6.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.6.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.6.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.6.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.6.ffn_norm.weight
F32
F32
[7168]
blk.6.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.6.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.7
blk.7.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.7.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.7.attn_kv_a_norm.weight
F32
F32
[512]
blk.7.attn_norm.weight
F32
F32
[7168]
blk.7.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.7.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.7.attn_q_a_norm.weight
F32
F32
[1536]
blk.7.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.7.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.7.exp_probs_b.bias
F32
F32
[256]
blk.7.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.7.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.7.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.7.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.7.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.7.ffn_norm.weight
F32
F32
[7168]
blk.7.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.7.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.8
blk.8.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.8.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.8.attn_kv_a_norm.weight
F32
F32
[512]
blk.8.attn_norm.weight
F32
F32
[7168]
blk.8.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.8.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.8.attn_q_a_norm.weight
F32
F32
[1536]
blk.8.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.8.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.8.exp_probs_b.bias
F32
F32
[256]
blk.8.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.8.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.8.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.8.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.8.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.8.ffn_norm.weight
F32
F32
[7168]
blk.8.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.8.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.9
blk.9.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.9.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.9.attn_kv_a_norm.weight
F32
F32
[512]
blk.9.attn_norm.weight
F32
F32
[7168]
blk.9.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.9.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.9.attn_q_a_norm.weight
F32
F32
[1536]
blk.9.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.9.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.9.exp_probs_b.bias
F32
F32
[256]
blk.9.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.9.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.9.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.9.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.9.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.9.ffn_norm.weight
F32
F32
[7168]
blk.9.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.9.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.10
blk.10.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.10.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.10.attn_kv_a_norm.weight
F32
F32
[512]
blk.10.attn_norm.weight
F32
F32
[7168]
blk.10.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.10.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.10.attn_q_a_norm.weight
F32
F32
[1536]
blk.10.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.10.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.10.exp_probs_b.bias
F32
F32
[256]
blk.10.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.10.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.10.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.10.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.10.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.10.ffn_norm.weight
F32
F32
[7168]
blk.10.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.10.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.11
blk.11.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.11.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.11.attn_kv_a_norm.weight
F32
F32
[512]
blk.11.attn_norm.weight
F32
F32
[7168]
blk.11.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.11.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.11.attn_q_a_norm.weight
F32
F32
[1536]
blk.11.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.11.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.11.exp_probs_b.bias
F32
F32
[256]
blk.11.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.11.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.11.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.11.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.11.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.11.ffn_norm.weight
F32
F32
[7168]
blk.11.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.11.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.12
blk.12.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.12.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.12.attn_kv_a_norm.weight
F32
F32
[512]
blk.12.attn_norm.weight
F32
F32
[7168]
blk.12.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.12.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.12.attn_q_a_norm.weight
F32
F32
[1536]
blk.12.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.12.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.12.exp_probs_b.bias
F32
F32
[256]
blk.12.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.12.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.12.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.12.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.12.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.12.ffn_norm.weight
F32
F32
[7168]
blk.12.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.12.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.13
blk.13.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.13.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.13.attn_kv_a_norm.weight
F32
F32
[512]
blk.13.attn_norm.weight
F32
F32
[7168]
blk.13.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.13.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.13.attn_q_a_norm.weight
F32
F32
[1536]
blk.13.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.13.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.13.exp_probs_b.bias
F32
F32
[256]
blk.13.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.13.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.13.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.13.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.13.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.13.ffn_norm.weight
F32
F32
[7168]
blk.13.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.13.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.14
blk.14.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.14.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.14.attn_kv_a_norm.weight
F32
F32
[512]
blk.14.attn_norm.weight
F32
F32
[7168]
blk.14.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.14.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.14.attn_q_a_norm.weight
F32
F32
[1536]
blk.14.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.14.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.14.exp_probs_b.bias
F32
F32
[256]
blk.14.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.14.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.14.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.14.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.14.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.14.ffn_norm.weight
F32
F32
[7168]
blk.14.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.14.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.15
blk.15.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.15.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.15.attn_kv_a_norm.weight
F32
F32
[512]
blk.15.attn_norm.weight
F32
F32
[7168]
blk.15.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.15.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.15.attn_q_a_norm.weight
F32
F32
[1536]
blk.15.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.15.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.15.exp_probs_b.bias
F32
F32
[256]
blk.15.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.15.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.15.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.15.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.15.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.15.ffn_norm.weight
F32
F32
[7168]
blk.15.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.15.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.16
blk.16.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.16.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.16.attn_kv_a_norm.weight
F32
F32
[512]
blk.16.attn_norm.weight
F32
F32
[7168]
blk.16.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.16.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.16.attn_q_a_norm.weight
F32
F32
[1536]
blk.16.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.16.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.16.exp_probs_b.bias
F32
F32
[256]
blk.16.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.16.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.16.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.16.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.16.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.16.ffn_norm.weight
F32
F32
[7168]
blk.16.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.16.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.17
blk.17.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.17.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.17.attn_kv_a_norm.weight
F32
F32
[512]
blk.17.attn_norm.weight
F32
F32
[7168]
blk.17.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.17.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.17.attn_q_a_norm.weight
F32
F32
[1536]
blk.17.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.17.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.17.exp_probs_b.bias
F32
F32
[256]
blk.17.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.17.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.17.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.17.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.17.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.17.ffn_norm.weight
F32
F32
[7168]
blk.17.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.17.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.18
blk.18.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.18.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.18.attn_kv_a_norm.weight
F32
F32
[512]
blk.18.attn_norm.weight
F32
F32
[7168]
blk.18.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.18.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.18.attn_q_a_norm.weight
F32
F32
[1536]
blk.18.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.18.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.18.exp_probs_b.bias
F32
F32
[256]
blk.18.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.18.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.18.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.18.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.18.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.18.ffn_norm.weight
F32
F32
[7168]
blk.18.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.18.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.19
blk.19.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.19.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.19.attn_kv_a_norm.weight
F32
F32
[512]
blk.19.attn_norm.weight
F32
F32
[7168]
blk.19.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.19.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.19.attn_q_a_norm.weight
F32
F32
[1536]
blk.19.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.19.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.19.exp_probs_b.bias
F32
F32
[256]
blk.19.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.19.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.19.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.19.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.19.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.19.ffn_norm.weight
F32
F32
[7168]
blk.19.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.19.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.20
blk.20.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.20.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.20.attn_kv_a_norm.weight
F32
F32
[512]
blk.20.attn_norm.weight
F32
F32
[7168]
blk.20.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.20.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.20.attn_q_a_norm.weight
F32
F32
[1536]
blk.20.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.20.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.20.exp_probs_b.bias
F32
F32
[256]
blk.20.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.20.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.20.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.20.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.20.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.20.ffn_norm.weight
F32
F32
[7168]
blk.20.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.20.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.21
blk.21.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.21.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.21.attn_kv_a_norm.weight
F32
F32
[512]
blk.21.attn_norm.weight
F32
F32
[7168]
blk.21.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.21.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.21.attn_q_a_norm.weight
F32
F32
[1536]
blk.21.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.21.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.21.exp_probs_b.bias
F32
F32
[256]
blk.21.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.21.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.21.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.21.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.21.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.21.ffn_norm.weight
F32
F32
[7168]
blk.21.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.21.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.22
blk.22.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.22.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.22.attn_kv_a_norm.weight
F32
F32
[512]
blk.22.attn_norm.weight
F32
F32
[7168]
blk.22.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.22.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.22.attn_q_a_norm.weight
F32
F32
[1536]
blk.22.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.22.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.22.exp_probs_b.bias
F32
F32
[256]
blk.22.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.22.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.22.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.22.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.22.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.22.ffn_norm.weight
F32
F32
[7168]
blk.22.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.22.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.23
blk.23.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.23.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.23.attn_kv_a_norm.weight
F32
F32
[512]
blk.23.attn_norm.weight
F32
F32
[7168]
blk.23.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.23.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.23.attn_q_a_norm.weight
F32
F32
[1536]
blk.23.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.23.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.23.exp_probs_b.bias
F32
F32
[256]
blk.23.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.23.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.23.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.23.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.23.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.23.ffn_norm.weight
F32
F32
[7168]
blk.23.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.23.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.24
blk.24.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.24.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.24.attn_kv_a_norm.weight
F32
F32
[512]
blk.24.attn_norm.weight
F32
F32
[7168]
blk.24.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.24.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.24.attn_q_a_norm.weight
F32
F32
[1536]
blk.24.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.24.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.24.exp_probs_b.bias
F32
F32
[256]
blk.24.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.24.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.24.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.24.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.24.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.24.ffn_norm.weight
F32
F32
[7168]
blk.24.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.24.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.25
blk.25.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.25.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.25.attn_kv_a_norm.weight
F32
F32
[512]
blk.25.attn_norm.weight
F32
F32
[7168]
blk.25.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.25.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.25.attn_q_a_norm.weight
F32
F32
[1536]
blk.25.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.25.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.25.exp_probs_b.bias
F32
F32
[256]
blk.25.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.25.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.25.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.25.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.25.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.25.ffn_norm.weight
F32
F32
[7168]
blk.25.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.25.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.26
blk.26.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.26.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.26.attn_kv_a_norm.weight
F32
F32
[512]
blk.26.attn_norm.weight
F32
F32
[7168]
blk.26.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.26.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.26.attn_q_a_norm.weight
F32
F32
[1536]
blk.26.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.26.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.26.exp_probs_b.bias
F32
F32
[256]
blk.26.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.26.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.26.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.26.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.26.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.26.ffn_norm.weight
F32
F32
[7168]
blk.26.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.26.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.27
blk.27.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.27.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.27.attn_kv_a_norm.weight
F32
F32
[512]
blk.27.attn_norm.weight
F32
F32
[7168]
blk.27.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.27.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.27.attn_q_a_norm.weight
F32
F32
[1536]
blk.27.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.27.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.27.exp_probs_b.bias
F32
F32
[256]
blk.27.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.27.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.27.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.27.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.27.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.27.ffn_norm.weight
F32
F32
[7168]
blk.27.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.27.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.28
blk.28.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.28.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.28.attn_kv_a_norm.weight
F32
F32
[512]
blk.28.attn_norm.weight
F32
F32
[7168]
blk.28.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.28.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.28.attn_q_a_norm.weight
F32
F32
[1536]
blk.28.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.28.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.28.exp_probs_b.bias
F32
F32
[256]
blk.28.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.28.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.28.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.28.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.28.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.28.ffn_norm.weight
F32
F32
[7168]
blk.28.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.28.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.29
blk.29.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.29.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.29.attn_kv_a_norm.weight
F32
F32
[512]
blk.29.attn_norm.weight
F32
F32
[7168]
blk.29.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.29.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.29.attn_q_a_norm.weight
F32
F32
[1536]
blk.29.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.29.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.29.exp_probs_b.bias
F32
F32
[256]
blk.29.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.29.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.29.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.29.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.29.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.29.ffn_norm.weight
F32
F32
[7168]
blk.29.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.29.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.30
blk.30.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.30.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.30.attn_kv_a_norm.weight
F32
F32
[512]
blk.30.attn_norm.weight
F32
F32
[7168]
blk.30.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.30.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.30.attn_q_a_norm.weight
F32
F32
[1536]
blk.30.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.30.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.30.exp_probs_b.bias
F32
F32
[256]
blk.30.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.30.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.30.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.30.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.30.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.30.ffn_norm.weight
F32
F32
[7168]
blk.30.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.30.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.31
blk.31.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.31.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.31.attn_kv_a_norm.weight
F32
F32
[512]
blk.31.attn_norm.weight
F32
F32
[7168]
blk.31.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.31.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.31.attn_q_a_norm.weight
F32
F32
[1536]
blk.31.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.31.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.31.exp_probs_b.bias
F32
F32
[256]
blk.31.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.31.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.31.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.31.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.31.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.31.ffn_norm.weight
F32
F32
[7168]
blk.31.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.31.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.32
blk.32.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.32.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.32.attn_kv_a_norm.weight
F32
F32
[512]
blk.32.attn_norm.weight
F32
F32
[7168]
blk.32.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.32.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.32.attn_q_a_norm.weight
F32
F32
[1536]
blk.32.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.32.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.32.exp_probs_b.bias
F32
F32
[256]
blk.32.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.32.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.32.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.32.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.32.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.32.ffn_norm.weight
F32
F32
[7168]
blk.32.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.32.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.33
blk.33.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.33.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.33.attn_kv_a_norm.weight
F32
F32
[512]
blk.33.attn_norm.weight
F32
F32
[7168]
blk.33.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.33.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.33.attn_q_a_norm.weight
F32
F32
[1536]
blk.33.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.33.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.33.exp_probs_b.bias
F32
F32
[256]
blk.33.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.33.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.33.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.33.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.33.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.33.ffn_norm.weight
F32
F32
[7168]
blk.33.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.33.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.34
blk.34.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.34.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.34.attn_kv_a_norm.weight
F32
F32
[512]
blk.34.attn_norm.weight
F32
F32
[7168]
blk.34.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.34.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.34.attn_q_a_norm.weight
F32
F32
[1536]
blk.34.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.34.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.34.exp_probs_b.bias
F32
F32
[256]
blk.34.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.34.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.34.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.34.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.34.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.34.ffn_norm.weight
F32
F32
[7168]
blk.34.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.34.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.35
blk.35.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.35.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.35.attn_kv_a_norm.weight
F32
F32
[512]
blk.35.attn_norm.weight
F32
F32
[7168]
blk.35.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.35.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.35.attn_q_a_norm.weight
F32
F32
[1536]
blk.35.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.35.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.35.exp_probs_b.bias
F32
F32
[256]
blk.35.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.35.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.35.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.35.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.35.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.35.ffn_norm.weight
F32
F32
[7168]
blk.35.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.35.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.36
blk.36.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.36.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.36.attn_kv_a_norm.weight
F32
F32
[512]
blk.36.attn_norm.weight
F32
F32
[7168]
blk.36.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.36.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.36.attn_q_a_norm.weight
F32
F32
[1536]
blk.36.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.36.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.36.exp_probs_b.bias
F32
F32
[256]
blk.36.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.36.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.36.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.36.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.36.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.36.ffn_norm.weight
F32
F32
[7168]
blk.36.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.36.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.37
blk.37.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.37.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.37.attn_kv_a_norm.weight
F32
F32
[512]
blk.37.attn_norm.weight
F32
F32
[7168]
blk.37.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.37.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.37.attn_q_a_norm.weight
F32
F32
[1536]
blk.37.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.37.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.37.exp_probs_b.bias
F32
F32
[256]
blk.37.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.37.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.37.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.37.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.37.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.37.ffn_norm.weight
F32
F32
[7168]
blk.37.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.37.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.38
blk.38.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.38.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.38.attn_kv_a_norm.weight
F32
F32
[512]
blk.38.attn_norm.weight
F32
F32
[7168]
blk.38.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.38.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.38.attn_q_a_norm.weight
F32
F32
[1536]
blk.38.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.38.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.38.exp_probs_b.bias
F32
F32
[256]
blk.38.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.38.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.38.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.38.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.38.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.38.ffn_norm.weight
F32
F32
[7168]
blk.38.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.38.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.39
blk.39.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.39.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.39.attn_kv_a_norm.weight
F32
F32
[512]
blk.39.attn_norm.weight
F32
F32
[7168]
blk.39.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.39.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.39.attn_q_a_norm.weight
F32
F32
[1536]
blk.39.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.39.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.39.exp_probs_b.bias
F32
F32
[256]
blk.39.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.39.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.39.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.39.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.39.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.39.ffn_norm.weight
F32
F32
[7168]
blk.39.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.39.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.40
blk.40.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.40.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.40.attn_kv_a_norm.weight
F32
F32
[512]
blk.40.attn_norm.weight
F32
F32
[7168]
blk.40.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.40.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.40.attn_q_a_norm.weight
F32
F32
[1536]
blk.40.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.40.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.40.exp_probs_b.bias
F32
F32
[256]
blk.40.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.40.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.40.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.40.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.40.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.40.ffn_norm.weight
F32
F32
[7168]
blk.40.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.40.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.41
blk.41.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.41.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.41.attn_kv_a_norm.weight
F32
F32
[512]
blk.41.attn_norm.weight
F32
F32
[7168]
blk.41.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.41.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.41.attn_q_a_norm.weight
F32
F32
[1536]
blk.41.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.41.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.41.exp_probs_b.bias
F32
F32
[256]
blk.41.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.41.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.41.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.41.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.41.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.41.ffn_norm.weight
F32
F32
[7168]
blk.41.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.41.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.42
blk.42.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.42.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.42.attn_kv_a_norm.weight
F32
F32
[512]
blk.42.attn_norm.weight
F32
F32
[7168]
blk.42.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.42.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.42.attn_q_a_norm.weight
F32
F32
[1536]
blk.42.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.42.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.42.exp_probs_b.bias
F32
F32
[256]
blk.42.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.42.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.42.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.42.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.42.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.42.ffn_norm.weight
F32
F32
[7168]
blk.42.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.42.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.43
blk.43.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.43.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.43.attn_kv_a_norm.weight
F32
F32
[512]
blk.43.attn_norm.weight
F32
F32
[7168]
blk.43.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.43.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.43.attn_q_a_norm.weight
F32
F32
[1536]
blk.43.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.43.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.43.exp_probs_b.bias
F32
F32
[256]
blk.43.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.43.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.43.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.43.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.43.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.43.ffn_norm.weight
F32
F32
[7168]
blk.43.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.43.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.44
blk.44.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.44.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.44.attn_kv_a_norm.weight
F32
F32
[512]
blk.44.attn_norm.weight
F32
F32
[7168]
blk.44.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.44.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.44.attn_q_a_norm.weight
F32
F32
[1536]
blk.44.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.44.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.44.exp_probs_b.bias
F32
F32
[256]
blk.44.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.44.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.44.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.44.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.44.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.44.ffn_norm.weight
F32
F32
[7168]
blk.44.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.44.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.45
blk.45.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.45.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.45.attn_kv_a_norm.weight
F32
F32
[512]
blk.45.attn_norm.weight
F32
F32
[7168]
blk.45.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.45.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.45.attn_q_a_norm.weight
F32
F32
[1536]
blk.45.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.45.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.45.exp_probs_b.bias
F32
F32
[256]
blk.45.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.45.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.45.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.45.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.45.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.45.ffn_norm.weight
F32
F32
[7168]
blk.45.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.45.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.46
blk.46.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.46.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.46.attn_kv_a_norm.weight
F32
F32
[512]
blk.46.attn_norm.weight
F32
F32
[7168]
blk.46.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.46.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.46.attn_q_a_norm.weight
F32
F32
[1536]
blk.46.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.46.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.46.exp_probs_b.bias
F32
F32
[256]
blk.46.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.46.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.46.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.46.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.46.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.46.ffn_norm.weight
F32
F32
[7168]
blk.46.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.46.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.47
blk.47.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.47.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.47.attn_kv_a_norm.weight
F32
F32
[512]
blk.47.attn_norm.weight
F32
F32
[7168]
blk.47.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.47.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.47.attn_q_a_norm.weight
F32
F32
[1536]
blk.47.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.47.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.47.exp_probs_b.bias
F32
F32
[256]
blk.47.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.47.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.47.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.47.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.47.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.47.ffn_norm.weight
F32
F32
[7168]
blk.47.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.47.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.48
blk.48.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.48.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.48.attn_kv_a_norm.weight
F32
F32
[512]
blk.48.attn_norm.weight
F32
F32
[7168]
blk.48.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.48.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.48.attn_q_a_norm.weight
F32
F32
[1536]
blk.48.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.48.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.48.exp_probs_b.bias
F32
F32
[256]
blk.48.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.48.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.48.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.48.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.48.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.48.ffn_norm.weight
F32
F32
[7168]
blk.48.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.48.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.49
blk.49.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.49.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.49.attn_kv_a_norm.weight
F32
F32
[512]
blk.49.attn_norm.weight
F32
F32
[7168]
blk.49.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.49.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.49.attn_q_a_norm.weight
F32
F32
[1536]
blk.49.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.49.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.49.exp_probs_b.bias
F32
F32
[256]
blk.49.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.49.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.49.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.49.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.49.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.49.ffn_norm.weight
F32
F32
[7168]
blk.49.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.49.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.50
blk.50.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.50.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.50.attn_kv_a_norm.weight
F32
F32
[512]
blk.50.attn_norm.weight
F32
F32
[7168]
blk.50.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.50.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.50.attn_q_a_norm.weight
F32
F32
[1536]
blk.50.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.50.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.50.exp_probs_b.bias
F32
F32
[256]
blk.50.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.50.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.50.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.50.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.50.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.50.ffn_norm.weight
F32
F32
[7168]
blk.50.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.50.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.51
blk.51.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.51.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.51.attn_kv_a_norm.weight
F32
F32
[512]
blk.51.attn_norm.weight
F32
F32
[7168]
blk.51.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.51.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.51.attn_q_a_norm.weight
F32
F32
[1536]
blk.51.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.51.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.51.exp_probs_b.bias
F32
F32
[256]
blk.51.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.51.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.51.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.51.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.51.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.51.ffn_norm.weight
F32
F32
[7168]
blk.51.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.51.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.52
blk.52.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.52.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.52.attn_kv_a_norm.weight
F32
F32
[512]
blk.52.attn_norm.weight
F32
F32
[7168]
blk.52.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.52.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.52.attn_q_a_norm.weight
F32
F32
[1536]
blk.52.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.52.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.52.exp_probs_b.bias
F32
F32
[256]
blk.52.ffn_down_exps.weight
Q4_K
Q4_K
[2048, 7168, 256]
blk.52.ffn_down_shexp.weight
Q4_K
Q4_K
[2048, 7168]
blk.52.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.52.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.52.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.52.ffn_norm.weight
F32
F32
[7168]
blk.52.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.52.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.53
blk.53.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.53.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.53.attn_kv_a_norm.weight
F32
F32
[512]
blk.53.attn_norm.weight
F32
F32
[7168]
blk.53.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.53.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.53.attn_q_a_norm.weight
F32
F32
[1536]
blk.53.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.53.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.53.exp_probs_b.bias
F32
F32
[256]
blk.53.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.53.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.53.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.53.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.53.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.53.ffn_norm.weight
F32
F32
[7168]
blk.53.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.53.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.54
blk.54.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.54.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.54.attn_kv_a_norm.weight
F32
F32
[512]
blk.54.attn_norm.weight
F32
F32
[7168]
blk.54.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.54.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.54.attn_q_a_norm.weight
F32
F32
[1536]
blk.54.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.54.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.54.exp_probs_b.bias
F32
F32
[256]
blk.54.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.54.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.54.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.54.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.54.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.54.ffn_norm.weight
F32
F32
[7168]
blk.54.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.54.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.55
blk.55.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.55.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.55.attn_kv_a_norm.weight
F32
F32
[512]
blk.55.attn_norm.weight
F32
F32
[7168]
blk.55.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.55.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.55.attn_q_a_norm.weight
F32
F32
[1536]
blk.55.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.55.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.55.exp_probs_b.bias
F32
F32
[256]
blk.55.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.55.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.55.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.55.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.55.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.55.ffn_norm.weight
F32
F32
[7168]
blk.55.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.55.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.56
blk.56.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.56.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.56.attn_kv_a_norm.weight
F32
F32
[512]
blk.56.attn_norm.weight
F32
F32
[7168]
blk.56.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.56.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.56.attn_q_a_norm.weight
F32
F32
[1536]
blk.56.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.56.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.56.exp_probs_b.bias
F32
F32
[256]
blk.56.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.56.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.56.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.56.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.56.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.56.ffn_norm.weight
F32
F32
[7168]
blk.56.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.56.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.57
blk.57.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.57.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.57.attn_kv_a_norm.weight
F32
F32
[512]
blk.57.attn_norm.weight
F32
F32
[7168]
blk.57.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.57.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.57.attn_q_a_norm.weight
F32
F32
[1536]
blk.57.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.57.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.57.exp_probs_b.bias
F32
F32
[256]
blk.57.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.57.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.57.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.57.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.57.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.57.ffn_norm.weight
F32
F32
[7168]
blk.57.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.57.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.58
blk.58.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.58.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.58.attn_kv_a_norm.weight
F32
F32
[512]
blk.58.attn_norm.weight
F32
F32
[7168]
blk.58.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.58.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.58.attn_q_a_norm.weight
F32
F32
[1536]
blk.58.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.58.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.58.exp_probs_b.bias
F32
F32
[256]
blk.58.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.58.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.58.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.58.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.58.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.58.ffn_norm.weight
F32
F32
[7168]
blk.58.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.58.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.59
blk.59.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.59.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.59.attn_kv_a_norm.weight
F32
F32
[512]
blk.59.attn_norm.weight
F32
F32
[7168]
blk.59.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.59.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.59.attn_q_a_norm.weight
F32
F32
[1536]
blk.59.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.59.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.59.exp_probs_b.bias
F32
F32
[256]
blk.59.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.59.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.59.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.59.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.59.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.59.ffn_norm.weight
F32
F32
[7168]
blk.59.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.59.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.60
blk.60.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 128]
blk.60.attn_kv_a_mqa.weight
Q4_K
Q4_K
[7168, 576]
blk.60.attn_kv_a_norm.weight
F32
F32
[512]
blk.60.attn_norm.weight
F32
F32
[7168]
blk.60.attn_output.weight
Q4_K
Q4_K
[16384, 7168]
blk.60.attn_q_a.weight
Q4_K
Q4_K
[7168, 1536]
blk.60.attn_q_a_norm.weight
F32
F32
[1536]
blk.60.attn_q_b.weight
Q4_K
Q4_K
[1536, 24576]
blk.60.attn_v_b.weight
Q4_K
Q4_K
[512, 128, 128]
blk.60.exp_probs_b.bias
F32
F32
[256]
blk.60.ffn_down_exps.weight
Q6_K
Q6_K
[2048, 7168, 256]
blk.60.ffn_down_shexp.weight
Q6_K
Q6_K
[2048, 7168]
blk.60.ffn_gate_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.60.ffn_gate_inp.weight
F32
F32
[7168, 256]
blk.60.ffn_gate_shexp.weight
Q4_K
Q4_K
[7168, 2048]
blk.60.ffn_norm.weight
F32
F32
[7168]
blk.60.ffn_up_exps.weight
Q4_K
Q4_K
[7168, 2048, 256]
blk.60.ffn_up_shexp.weight
Q4_K
Q4_K
[7168, 2048]
output.weight
Q6_K
Q6_K
[7168, 128815]
output_norm.weight
F32
F32
[7168]