Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
Bored
/
GigaChat3.1-10B-A1.8B-q4_K_M
:latest
23
Downloads
Updated
yesterday
GigaChat 3.1 Lightning is the compact instruct model of the GigaChat 3.1 family. It is a Mixture-of-Experts (MoE) model with 10B total parameters and 1.8B active parameters, designed for fast multilingual assistant workloads, reasoning, code
GigaChat 3.1 Lightning is the compact instruct model of the GigaChat 3.1 family. It is a Mixture-of-Experts (MoE) model with 10B total parameters and 1.8B active parameters, designed for fast multilingual assistant workloads, reasoning, code
Cancel
GigaChat3.1-10B-A1.8B-q4_K_M:latest
...
/
model
68a8732fb5ce · 6.5GB
Metadata
general.architecture
deepseek2
deepseek2
general.file_type
Q4_K_M
Q4_K_M
deepseek2.attention.head_count
32
32
deepseek2.attention.head_count_kv
1
1
deepseek2.attention.key_length
576
576
deepseek2.attention.key_length_mla
192
192
deepseek2.attention.kv_lora_rank
512
512
deepseek2.attention.layer_norm_rms_epsilon
1e-06
1e-06
deepseek2.attention.value_length
512
512
deepseek2.attention.value_length_mla
192
192
deepseek2.block_count
26
26
deepseek2.context_length
262144
262144
deepseek2.embedding_length
1536
1536
deepseek2.expert_count
64
64
deepseek2.expert_feed_forward_length
1280
1280
deepseek2.expert_gating_func
2
2
deepseek2.expert_group_count
1
1
deepseek2.expert_group_used_count
1
1
deepseek2.expert_shared_count
1
1
deepseek2.expert_used_count
4
4
deepseek2.expert_weights_norm
true
true
deepseek2.expert_weights_scale
1
1
deepseek2.feed_forward_length
8960
8960
deepseek2.leading_dense_block_count
1
1
deepseek2.rope.dimension_count
64
64
deepseek2.rope.freq_base
100000
100000
deepseek2.rope.scaling.factor
64
64
deepseek2.rope.scaling.original_context_length
4096
4096
deepseek2.rope.scaling.type
yarn
yarn
deepseek2.rope.scaling.yarn_beta_fast
32
32
deepseek2.rope.scaling.yarn_beta_slow
1
1
deepseek2.rope.scaling.yarn_log_multiplier
0.1
0.1
deepseek2.vocab_size
128256
128256
tokenizer.ggml.add_bos_token
true
true
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
2
2
tokenizer.ggml.merges
[Ð ¾, Ð °, Ð µ, Ð ¸, Ð ½, ...]
[Ð ¾, Ð °, Ð µ, Ð ¸, Ð ½, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.pre
gigachat
gigachat
tokenizer.ggml.token_type
[1, 3, 3, 1, 1, ...]
[1, 3, 3, 1, 1, ...]
tokenizer.ggml.tokens
[<unk>, <s>, </s>, !, ", ...]
[<unk>, <s>, </s>, !, ", ...]
Tensor
Name
Type
Shape
token_embd.weight
Q4_K
Q4_K
[1536, 128256]
blk.0
blk.0.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.0.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.0.attn_kv_a_norm.weight
F32
F32
[512]
blk.0.attn_norm.weight
F32
F32
[1536]
blk.0.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.0.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.0.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.0.ffn_down.weight
Q6_K
Q6_K
[8960, 1536]
blk.0.ffn_gate.weight
Q4_K
Q4_K
[1536, 8960]
blk.0.ffn_norm.weight
F32
F32
[1536]
blk.0.ffn_up.weight
Q4_K
Q4_K
[1536, 8960]
blk.1
blk.1.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.1.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.1.attn_kv_a_norm.weight
F32
F32
[512]
blk.1.attn_norm.weight
F32
F32
[1536]
blk.1.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.1.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.1.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.1.exp_probs_b.bias
F32
F32
[64]
blk.1.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.1.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.1.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.1.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.1.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.1.ffn_norm.weight
F32
F32
[1536]
blk.1.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.1.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.2
blk.2.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.2.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.2.attn_kv_a_norm.weight
F32
F32
[512]
blk.2.attn_norm.weight
F32
F32
[1536]
blk.2.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.2.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.2.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.2.exp_probs_b.bias
F32
F32
[64]
blk.2.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.2.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.2.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.2.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.2.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.2.ffn_norm.weight
F32
F32
[1536]
blk.2.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.2.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.3
blk.3.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.3.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.3.attn_kv_a_norm.weight
F32
F32
[512]
blk.3.attn_norm.weight
F32
F32
[1536]
blk.3.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.3.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.3.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.3.exp_probs_b.bias
F32
F32
[64]
blk.3.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.3.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.3.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.3.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.3.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.3.ffn_norm.weight
F32
F32
[1536]
blk.3.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.3.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.4
blk.4.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.4.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.4.attn_kv_a_norm.weight
F32
F32
[512]
blk.4.attn_norm.weight
F32
F32
[1536]
blk.4.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.4.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.4.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.4.exp_probs_b.bias
F32
F32
[64]
blk.4.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.4.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.4.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.4.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.4.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.4.ffn_norm.weight
F32
F32
[1536]
blk.4.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.4.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.5
blk.5.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.5.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.5.attn_kv_a_norm.weight
F32
F32
[512]
blk.5.attn_norm.weight
F32
F32
[1536]
blk.5.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.5.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.5.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.5.exp_probs_b.bias
F32
F32
[64]
blk.5.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.5.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.5.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.5.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.5.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.5.ffn_norm.weight
F32
F32
[1536]
blk.5.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.5.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.6
blk.6.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.6.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.6.attn_kv_a_norm.weight
F32
F32
[512]
blk.6.attn_norm.weight
F32
F32
[1536]
blk.6.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.6.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.6.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.6.exp_probs_b.bias
F32
F32
[64]
blk.6.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.6.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.6.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.6.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.6.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.6.ffn_norm.weight
F32
F32
[1536]
blk.6.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.6.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.7
blk.7.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.7.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.7.attn_kv_a_norm.weight
F32
F32
[512]
blk.7.attn_norm.weight
F32
F32
[1536]
blk.7.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.7.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.7.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.7.exp_probs_b.bias
F32
F32
[64]
blk.7.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.7.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.7.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.7.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.7.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.7.ffn_norm.weight
F32
F32
[1536]
blk.7.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.7.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.8
blk.8.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.8.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.8.attn_kv_a_norm.weight
F32
F32
[512]
blk.8.attn_norm.weight
F32
F32
[1536]
blk.8.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.8.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.8.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.8.exp_probs_b.bias
F32
F32
[64]
blk.8.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.8.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.8.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.8.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.8.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.8.ffn_norm.weight
F32
F32
[1536]
blk.8.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.8.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.9
blk.9.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.9.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.9.attn_kv_a_norm.weight
F32
F32
[512]
blk.9.attn_norm.weight
F32
F32
[1536]
blk.9.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.9.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.9.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.9.exp_probs_b.bias
F32
F32
[64]
blk.9.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.9.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.9.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.9.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.9.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.9.ffn_norm.weight
F32
F32
[1536]
blk.9.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.9.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.10
blk.10.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.10.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.10.attn_kv_a_norm.weight
F32
F32
[512]
blk.10.attn_norm.weight
F32
F32
[1536]
blk.10.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.10.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.10.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.10.exp_probs_b.bias
F32
F32
[64]
blk.10.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.10.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.10.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.10.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.10.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.10.ffn_norm.weight
F32
F32
[1536]
blk.10.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.10.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.11
blk.11.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.11.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.11.attn_kv_a_norm.weight
F32
F32
[512]
blk.11.attn_norm.weight
F32
F32
[1536]
blk.11.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.11.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.11.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.11.exp_probs_b.bias
F32
F32
[64]
blk.11.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.11.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.11.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.11.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.11.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.11.ffn_norm.weight
F32
F32
[1536]
blk.11.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.11.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.12
blk.12.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.12.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.12.attn_kv_a_norm.weight
F32
F32
[512]
blk.12.attn_norm.weight
F32
F32
[1536]
blk.12.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.12.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.12.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.12.exp_probs_b.bias
F32
F32
[64]
blk.12.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.12.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.12.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.12.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.12.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.12.ffn_norm.weight
F32
F32
[1536]
blk.12.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.12.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.13
blk.13.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.13.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.13.attn_kv_a_norm.weight
F32
F32
[512]
blk.13.attn_norm.weight
F32
F32
[1536]
blk.13.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.13.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.13.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.13.exp_probs_b.bias
F32
F32
[64]
blk.13.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.13.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.13.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.13.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.13.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.13.ffn_norm.weight
F32
F32
[1536]
blk.13.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.13.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.14
blk.14.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.14.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.14.attn_kv_a_norm.weight
F32
F32
[512]
blk.14.attn_norm.weight
F32
F32
[1536]
blk.14.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.14.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.14.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.14.exp_probs_b.bias
F32
F32
[64]
blk.14.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.14.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.14.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.14.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.14.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.14.ffn_norm.weight
F32
F32
[1536]
blk.14.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.14.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.15
blk.15.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.15.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.15.attn_kv_a_norm.weight
F32
F32
[512]
blk.15.attn_norm.weight
F32
F32
[1536]
blk.15.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.15.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.15.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.15.exp_probs_b.bias
F32
F32
[64]
blk.15.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.15.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.15.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.15.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.15.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.15.ffn_norm.weight
F32
F32
[1536]
blk.15.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.15.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.16
blk.16.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.16.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.16.attn_kv_a_norm.weight
F32
F32
[512]
blk.16.attn_norm.weight
F32
F32
[1536]
blk.16.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.16.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.16.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.16.exp_probs_b.bias
F32
F32
[64]
blk.16.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.16.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.16.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.16.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.16.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.16.ffn_norm.weight
F32
F32
[1536]
blk.16.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.16.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.17
blk.17.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.17.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.17.attn_kv_a_norm.weight
F32
F32
[512]
blk.17.attn_norm.weight
F32
F32
[1536]
blk.17.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.17.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.17.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.17.exp_probs_b.bias
F32
F32
[64]
blk.17.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.17.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.17.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.17.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.17.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.17.ffn_norm.weight
F32
F32
[1536]
blk.17.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.17.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.18
blk.18.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.18.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.18.attn_kv_a_norm.weight
F32
F32
[512]
blk.18.attn_norm.weight
F32
F32
[1536]
blk.18.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.18.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.18.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.18.exp_probs_b.bias
F32
F32
[64]
blk.18.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.18.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.18.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.18.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.18.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.18.ffn_norm.weight
F32
F32
[1536]
blk.18.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.18.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.19
blk.19.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.19.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.19.attn_kv_a_norm.weight
F32
F32
[512]
blk.19.attn_norm.weight
F32
F32
[1536]
blk.19.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.19.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.19.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.19.exp_probs_b.bias
F32
F32
[64]
blk.19.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.19.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.19.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.19.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.19.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.19.ffn_norm.weight
F32
F32
[1536]
blk.19.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.19.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.20
blk.20.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.20.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.20.attn_kv_a_norm.weight
F32
F32
[512]
blk.20.attn_norm.weight
F32
F32
[1536]
blk.20.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.20.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.20.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.20.exp_probs_b.bias
F32
F32
[64]
blk.20.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.20.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.20.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.20.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.20.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.20.ffn_norm.weight
F32
F32
[1536]
blk.20.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.20.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.21
blk.21.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.21.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.21.attn_kv_a_norm.weight
F32
F32
[512]
blk.21.attn_norm.weight
F32
F32
[1536]
blk.21.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.21.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.21.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.21.exp_probs_b.bias
F32
F32
[64]
blk.21.ffn_down_exps.weight
Q4_K
Q4_K
[1280, 1536, 64]
blk.21.ffn_down_shexp.weight
Q4_K
Q4_K
[1280, 1536]
blk.21.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.21.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.21.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.21.ffn_norm.weight
F32
F32
[1536]
blk.21.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.21.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.22
blk.22.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.22.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.22.attn_kv_a_norm.weight
F32
F32
[512]
blk.22.attn_norm.weight
F32
F32
[1536]
blk.22.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.22.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.22.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.22.exp_probs_b.bias
F32
F32
[64]
blk.22.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.22.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.22.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.22.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.22.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.22.ffn_norm.weight
F32
F32
[1536]
blk.22.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.22.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.23
blk.23.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.23.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.23.attn_kv_a_norm.weight
F32
F32
[512]
blk.23.attn_norm.weight
F32
F32
[1536]
blk.23.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.23.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.23.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.23.exp_probs_b.bias
F32
F32
[64]
blk.23.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.23.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.23.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.23.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.23.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.23.ffn_norm.weight
F32
F32
[1536]
blk.23.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.23.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.24
blk.24.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.24.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.24.attn_kv_a_norm.weight
F32
F32
[512]
blk.24.attn_norm.weight
F32
F32
[1536]
blk.24.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.24.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.24.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.24.exp_probs_b.bias
F32
F32
[64]
blk.24.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.24.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.24.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.24.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.24.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.24.ffn_norm.weight
F32
F32
[1536]
blk.24.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.24.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.25
blk.25.attn_k_b.weight
Q5_0
Q5_0
[128, 512, 32]
blk.25.attn_kv_a_mqa.weight
Q4_K
Q4_K
[1536, 576]
blk.25.attn_kv_a_norm.weight
F32
F32
[512]
blk.25.attn_norm.weight
F32
F32
[1536]
blk.25.attn_output.weight
Q4_K
Q4_K
[6144, 1536]
blk.25.attn_q.weight
Q4_K
Q4_K
[1536, 6144]
blk.25.attn_v_b.weight
Q4_K
Q4_K
[512, 192, 32]
blk.25.exp_probs_b.bias
F32
F32
[64]
blk.25.ffn_down_exps.weight
Q6_K
Q6_K
[1280, 1536, 64]
blk.25.ffn_down_shexp.weight
Q6_K
Q6_K
[1280, 1536]
blk.25.ffn_gate_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.25.ffn_gate_inp.weight
F32
F32
[1536, 64]
blk.25.ffn_gate_shexp.weight
Q4_K
Q4_K
[1536, 1280]
blk.25.ffn_norm.weight
F32
F32
[1536]
blk.25.ffn_up_exps.weight
Q4_K
Q4_K
[1536, 1280, 64]
blk.25.ffn_up_shexp.weight
Q4_K
Q4_K
[1536, 1280]
output.weight
Q6_K
Q6_K
[1536, 128256]
output_norm.weight
F32
F32
[1536]