Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
gemma3n
:e4b
405.8K
Downloads
Updated
2 months ago
Gemma 3n models are designed for efficient execution on everyday devices such as laptops, tablets or phones.
Gemma 3n models are designed for efficient execution on everyday devices such as laptops, tablets or phones.
Cancel
e2b
e4b
gemma3n:e4b
...
/
model
38e8dcc30df4 · 7.5GB
Metadata
general.architecture
gemma3n
gemma3n
general.file_type
Q4_K_M
Q4_K_M
gemma3n.activation_sparsity_scale
[1.6448535, 1.6448535, 1.6448535, 1.6448535, 1.6448535, ...]
[1.6448535, 1.6448535, 1.6448535, 1.6448535, 1.6448535, ...]
gemma3n.altup.active_idx
0
0
gemma3n.altup.correct_scale
true
true
gemma3n.altup.lr_multiplier
1
1
gemma3n.altup.num_inputs
4
4
gemma3n.attention.head_count
8
8
gemma3n.attention.head_count_kv
2
2
gemma3n.attention.layer_norm_rms_epsilon
1e-06
1e-06
gemma3n.attention.shared_kv_layers
15
15
gemma3n.attention.sliding_window
512
512
gemma3n.attention.sliding_window_pattern
[true, true, true, true, false, ...]
[true, true, true, true, false, ...]
gemma3n.block_count
35
35
gemma3n.context_length
32768
32768
gemma3n.embedding_length
2048
2048
gemma3n.embedding_length_per_layer_input
256
256
gemma3n.feed_forward_length
16384
16384
gemma3n.head_dim
256
256
gemma3n.laurel_rank
64
64
gemma3n.num_kv_shared_layers
15
15
gemma3n.rope.freq_base
1e+06
1e+06
gemma3n.rope.freq_base_local
10000
10000
tokenizer.ggml.add_bos_token
true
true
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.add_padding_token
false
false
tokenizer.ggml.add_unknown_token
false
false
tokenizer.ggml.bos_token_id
2
2
tokenizer.ggml.eos_token_id
1
1
tokenizer.ggml.eos_token_ids
[1, 106]
[1, 106]
tokenizer.ggml.merges
[ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
[ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
0
0
tokenizer.ggml.pre
default
default
tokenizer.ggml.scores
[0, 1, 2, 3, 4, ...]
[0, 1, 2, 3, 4, ...]
tokenizer.ggml.token_type
[3, 3, 3, 3, 4, ...]
[3, 3, 3, 3, 4, ...]
tokenizer.ggml.tokens
[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
tokenizer.ggml.unknown_token_id
3
3
Tensor
Name
Type
Shape
token_embd.weight
Q6_K
Q6_K
[2048, 262400]
altup_proj.weight
Q4_K
Q4_K
[2048, 2048, 3]
altup_unembd_proj.weight
Q4_K
Q4_K
[2048, 2048, 3]
blk.0
blk.0.altup_correct_coef.weight
F16
F16
[4, 4]
blk.0.altup_correct_scale.weight
F32
F32
[2048]
blk.0.altup_predict_coef.weight
F16
F16
[4, 16]
blk.0.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.0.altup_router_norm.weight
F32
F32
[2048]
blk.0.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.0.attn_k_norm.weight
F32
F32
[256]
blk.0.attn_norm.weight
F32
F32
[2048]
blk.0.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.0.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.0.attn_q_norm.weight
F32
F32
[256]
blk.0.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.0.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.0.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.0.ffn_norm.weight
F32
F32
[2048]
blk.0.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.0.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.0.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.0.laurel_post_norm.weight
F32
F32
[2048]
blk.0.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.0.post_attention_norm.weight
F32
F32
[2048]
blk.0.post_ffw_norm.weight
F32
F32
[2048]
blk.0.post_norm.weight
F32
F32
[2048]
blk.0.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.1
blk.1.altup_correct_coef.weight
F16
F16
[4, 4]
blk.1.altup_correct_scale.weight
F32
F32
[2048]
blk.1.altup_predict_coef.weight
F16
F16
[4, 16]
blk.1.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.1.altup_router_norm.weight
F32
F32
[2048]
blk.1.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.1.attn_k_norm.weight
F32
F32
[256]
blk.1.attn_norm.weight
F32
F32
[2048]
blk.1.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.1.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.1.attn_q_norm.weight
F32
F32
[256]
blk.1.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.1.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.1.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.1.ffn_norm.weight
F32
F32
[2048]
blk.1.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.1.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.1.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.1.laurel_post_norm.weight
F32
F32
[2048]
blk.1.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.1.post_attention_norm.weight
F32
F32
[2048]
blk.1.post_ffw_norm.weight
F32
F32
[2048]
blk.1.post_norm.weight
F32
F32
[2048]
blk.1.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.2
blk.2.altup_correct_coef.weight
F16
F16
[4, 4]
blk.2.altup_correct_scale.weight
F32
F32
[2048]
blk.2.altup_predict_coef.weight
F16
F16
[4, 16]
blk.2.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.2.altup_router_norm.weight
F32
F32
[2048]
blk.2.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.2.attn_k_norm.weight
F32
F32
[256]
blk.2.attn_norm.weight
F32
F32
[2048]
blk.2.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.2.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.2.attn_q_norm.weight
F32
F32
[256]
blk.2.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.2.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.2.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.2.ffn_norm.weight
F32
F32
[2048]
blk.2.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.2.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.2.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.2.laurel_post_norm.weight
F32
F32
[2048]
blk.2.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.2.post_attention_norm.weight
F32
F32
[2048]
blk.2.post_ffw_norm.weight
F32
F32
[2048]
blk.2.post_norm.weight
F32
F32
[2048]
blk.2.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.3
blk.3.altup_correct_coef.weight
F16
F16
[4, 4]
blk.3.altup_correct_scale.weight
F32
F32
[2048]
blk.3.altup_predict_coef.weight
F16
F16
[4, 16]
blk.3.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.3.altup_router_norm.weight
F32
F32
[2048]
blk.3.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.3.attn_k_norm.weight
F32
F32
[256]
blk.3.attn_norm.weight
F32
F32
[2048]
blk.3.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.3.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.3.attn_q_norm.weight
F32
F32
[256]
blk.3.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.3.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.3.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.3.ffn_norm.weight
F32
F32
[2048]
blk.3.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.3.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.3.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.3.laurel_post_norm.weight
F32
F32
[2048]
blk.3.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.3.post_attention_norm.weight
F32
F32
[2048]
blk.3.post_ffw_norm.weight
F32
F32
[2048]
blk.3.post_norm.weight
F32
F32
[2048]
blk.3.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.4
blk.4.altup_correct_coef.weight
F16
F16
[4, 4]
blk.4.altup_correct_scale.weight
F32
F32
[2048]
blk.4.altup_predict_coef.weight
F16
F16
[4, 16]
blk.4.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.4.altup_router_norm.weight
F32
F32
[2048]
blk.4.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.4.attn_k_norm.weight
F32
F32
[256]
blk.4.attn_norm.weight
F32
F32
[2048]
blk.4.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.4.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.4.attn_q_norm.weight
F32
F32
[256]
blk.4.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.4.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.4.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.4.ffn_norm.weight
F32
F32
[2048]
blk.4.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.4.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.4.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.4.laurel_post_norm.weight
F32
F32
[2048]
blk.4.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.4.post_attention_norm.weight
F32
F32
[2048]
blk.4.post_ffw_norm.weight
F32
F32
[2048]
blk.4.post_norm.weight
F32
F32
[2048]
blk.4.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.5
blk.5.altup_correct_coef.weight
F16
F16
[4, 4]
blk.5.altup_correct_scale.weight
F32
F32
[2048]
blk.5.altup_predict_coef.weight
F16
F16
[4, 16]
blk.5.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.5.altup_router_norm.weight
F32
F32
[2048]
blk.5.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.5.attn_k_norm.weight
F32
F32
[256]
blk.5.attn_norm.weight
F32
F32
[2048]
blk.5.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.5.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.5.attn_q_norm.weight
F32
F32
[256]
blk.5.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.5.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.5.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.5.ffn_norm.weight
F32
F32
[2048]
blk.5.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.5.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.5.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.5.laurel_post_norm.weight
F32
F32
[2048]
blk.5.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.5.post_attention_norm.weight
F32
F32
[2048]
blk.5.post_ffw_norm.weight
F32
F32
[2048]
blk.5.post_norm.weight
F32
F32
[2048]
blk.5.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.6
blk.6.altup_correct_coef.weight
F16
F16
[4, 4]
blk.6.altup_correct_scale.weight
F32
F32
[2048]
blk.6.altup_predict_coef.weight
F16
F16
[4, 16]
blk.6.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.6.altup_router_norm.weight
F32
F32
[2048]
blk.6.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.6.attn_k_norm.weight
F32
F32
[256]
blk.6.attn_norm.weight
F32
F32
[2048]
blk.6.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.6.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.6.attn_q_norm.weight
F32
F32
[256]
blk.6.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.6.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.6.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.6.ffn_norm.weight
F32
F32
[2048]
blk.6.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.6.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.6.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.6.laurel_post_norm.weight
F32
F32
[2048]
blk.6.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.6.post_attention_norm.weight
F32
F32
[2048]
blk.6.post_ffw_norm.weight
F32
F32
[2048]
blk.6.post_norm.weight
F32
F32
[2048]
blk.6.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.7
blk.7.altup_correct_coef.weight
F16
F16
[4, 4]
blk.7.altup_correct_scale.weight
F32
F32
[2048]
blk.7.altup_predict_coef.weight
F16
F16
[4, 16]
blk.7.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.7.altup_router_norm.weight
F32
F32
[2048]
blk.7.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.7.attn_k_norm.weight
F32
F32
[256]
blk.7.attn_norm.weight
F32
F32
[2048]
blk.7.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.7.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.7.attn_q_norm.weight
F32
F32
[256]
blk.7.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.7.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.7.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.7.ffn_norm.weight
F32
F32
[2048]
blk.7.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.7.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.7.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.7.laurel_post_norm.weight
F32
F32
[2048]
blk.7.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.7.post_attention_norm.weight
F32
F32
[2048]
blk.7.post_ffw_norm.weight
F32
F32
[2048]
blk.7.post_norm.weight
F32
F32
[2048]
blk.7.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.8
blk.8.altup_correct_coef.weight
F16
F16
[4, 4]
blk.8.altup_correct_scale.weight
F32
F32
[2048]
blk.8.altup_predict_coef.weight
F16
F16
[4, 16]
blk.8.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.8.altup_router_norm.weight
F32
F32
[2048]
blk.8.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.8.attn_k_norm.weight
F32
F32
[256]
blk.8.attn_norm.weight
F32
F32
[2048]
blk.8.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.8.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.8.attn_q_norm.weight
F32
F32
[256]
blk.8.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.8.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.8.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.8.ffn_norm.weight
F32
F32
[2048]
blk.8.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.8.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.8.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.8.laurel_post_norm.weight
F32
F32
[2048]
blk.8.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.8.post_attention_norm.weight
F32
F32
[2048]
blk.8.post_ffw_norm.weight
F32
F32
[2048]
blk.8.post_norm.weight
F32
F32
[2048]
blk.8.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.9
blk.9.altup_correct_coef.weight
F16
F16
[4, 4]
blk.9.altup_correct_scale.weight
F32
F32
[2048]
blk.9.altup_predict_coef.weight
F16
F16
[4, 16]
blk.9.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.9.altup_router_norm.weight
F32
F32
[2048]
blk.9.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.9.attn_k_norm.weight
F32
F32
[256]
blk.9.attn_norm.weight
F32
F32
[2048]
blk.9.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.9.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.9.attn_q_norm.weight
F32
F32
[256]
blk.9.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.9.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.9.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.9.ffn_norm.weight
F32
F32
[2048]
blk.9.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.9.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.9.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.9.laurel_post_norm.weight
F32
F32
[2048]
blk.9.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.9.post_attention_norm.weight
F32
F32
[2048]
blk.9.post_ffw_norm.weight
F32
F32
[2048]
blk.9.post_norm.weight
F32
F32
[2048]
blk.9.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.10
blk.10.altup_correct_coef.weight
F16
F16
[4, 4]
blk.10.altup_correct_scale.weight
F32
F32
[2048]
blk.10.altup_predict_coef.weight
F16
F16
[4, 16]
blk.10.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.10.altup_router_norm.weight
F32
F32
[2048]
blk.10.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.10.attn_k_norm.weight
F32
F32
[256]
blk.10.attn_norm.weight
F32
F32
[2048]
blk.10.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.10.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.10.attn_q_norm.weight
F32
F32
[256]
blk.10.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.10.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.10.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.10.ffn_norm.weight
F32
F32
[2048]
blk.10.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.10.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.10.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.10.laurel_post_norm.weight
F32
F32
[2048]
blk.10.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.10.post_attention_norm.weight
F32
F32
[2048]
blk.10.post_ffw_norm.weight
F32
F32
[2048]
blk.10.post_norm.weight
F32
F32
[2048]
blk.10.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.11
blk.11.altup_correct_coef.weight
F16
F16
[4, 4]
blk.11.altup_correct_scale.weight
F32
F32
[2048]
blk.11.altup_predict_coef.weight
F16
F16
[4, 16]
blk.11.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.11.altup_router_norm.weight
F32
F32
[2048]
blk.11.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.11.attn_k_norm.weight
F32
F32
[256]
blk.11.attn_norm.weight
F32
F32
[2048]
blk.11.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.11.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.11.attn_q_norm.weight
F32
F32
[256]
blk.11.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.11.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.11.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.11.ffn_norm.weight
F32
F32
[2048]
blk.11.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.11.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.11.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.11.laurel_post_norm.weight
F32
F32
[2048]
blk.11.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.11.post_attention_norm.weight
F32
F32
[2048]
blk.11.post_ffw_norm.weight
F32
F32
[2048]
blk.11.post_norm.weight
F32
F32
[2048]
blk.11.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.12
blk.12.altup_correct_coef.weight
F16
F16
[4, 4]
blk.12.altup_correct_scale.weight
F32
F32
[2048]
blk.12.altup_predict_coef.weight
F16
F16
[4, 16]
blk.12.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.12.altup_router_norm.weight
F32
F32
[2048]
blk.12.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.12.attn_k_norm.weight
F32
F32
[256]
blk.12.attn_norm.weight
F32
F32
[2048]
blk.12.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.12.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.12.attn_q_norm.weight
F32
F32
[256]
blk.12.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.12.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.12.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.12.ffn_norm.weight
F32
F32
[2048]
blk.12.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.12.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.12.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.12.laurel_post_norm.weight
F32
F32
[2048]
blk.12.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.12.post_attention_norm.weight
F32
F32
[2048]
blk.12.post_ffw_norm.weight
F32
F32
[2048]
blk.12.post_norm.weight
F32
F32
[2048]
blk.12.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.13
blk.13.altup_correct_coef.weight
F16
F16
[4, 4]
blk.13.altup_correct_scale.weight
F32
F32
[2048]
blk.13.altup_predict_coef.weight
F16
F16
[4, 16]
blk.13.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.13.altup_router_norm.weight
F32
F32
[2048]
blk.13.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.13.attn_k_norm.weight
F32
F32
[256]
blk.13.attn_norm.weight
F32
F32
[2048]
blk.13.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.13.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.13.attn_q_norm.weight
F32
F32
[256]
blk.13.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.13.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.13.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.13.ffn_norm.weight
F32
F32
[2048]
blk.13.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.13.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.13.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.13.laurel_post_norm.weight
F32
F32
[2048]
blk.13.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.13.post_attention_norm.weight
F32
F32
[2048]
blk.13.post_ffw_norm.weight
F32
F32
[2048]
blk.13.post_norm.weight
F32
F32
[2048]
blk.13.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.14
blk.14.altup_correct_coef.weight
F16
F16
[4, 4]
blk.14.altup_correct_scale.weight
F32
F32
[2048]
blk.14.altup_predict_coef.weight
F16
F16
[4, 16]
blk.14.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.14.altup_router_norm.weight
F32
F32
[2048]
blk.14.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.14.attn_k_norm.weight
F32
F32
[256]
blk.14.attn_norm.weight
F32
F32
[2048]
blk.14.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.14.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.14.attn_q_norm.weight
F32
F32
[256]
blk.14.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.14.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.14.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.14.ffn_norm.weight
F32
F32
[2048]
blk.14.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.14.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.14.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.14.laurel_post_norm.weight
F32
F32
[2048]
blk.14.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.14.post_attention_norm.weight
F32
F32
[2048]
blk.14.post_ffw_norm.weight
F32
F32
[2048]
blk.14.post_norm.weight
F32
F32
[2048]
blk.14.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.15
blk.15.altup_correct_coef.weight
F16
F16
[4, 4]
blk.15.altup_correct_scale.weight
F32
F32
[2048]
blk.15.altup_predict_coef.weight
F16
F16
[4, 16]
blk.15.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.15.altup_router_norm.weight
F32
F32
[2048]
blk.15.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.15.attn_k_norm.weight
F32
F32
[256]
blk.15.attn_norm.weight
F32
F32
[2048]
blk.15.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.15.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.15.attn_q_norm.weight
F32
F32
[256]
blk.15.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.15.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.15.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.15.ffn_norm.weight
F32
F32
[2048]
blk.15.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.15.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.15.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.15.laurel_post_norm.weight
F32
F32
[2048]
blk.15.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.15.post_attention_norm.weight
F32
F32
[2048]
blk.15.post_ffw_norm.weight
F32
F32
[2048]
blk.15.post_norm.weight
F32
F32
[2048]
blk.15.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.16
blk.16.altup_correct_coef.weight
F16
F16
[4, 4]
blk.16.altup_correct_scale.weight
F32
F32
[2048]
blk.16.altup_predict_coef.weight
F16
F16
[4, 16]
blk.16.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.16.altup_router_norm.weight
F32
F32
[2048]
blk.16.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.16.attn_k_norm.weight
F32
F32
[256]
blk.16.attn_norm.weight
F32
F32
[2048]
blk.16.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.16.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.16.attn_q_norm.weight
F32
F32
[256]
blk.16.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.16.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.16.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.16.ffn_norm.weight
F32
F32
[2048]
blk.16.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.16.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.16.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.16.laurel_post_norm.weight
F32
F32
[2048]
blk.16.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.16.post_attention_norm.weight
F32
F32
[2048]
blk.16.post_ffw_norm.weight
F32
F32
[2048]
blk.16.post_norm.weight
F32
F32
[2048]
blk.16.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.17
blk.17.altup_correct_coef.weight
F16
F16
[4, 4]
blk.17.altup_correct_scale.weight
F32
F32
[2048]
blk.17.altup_predict_coef.weight
F16
F16
[4, 16]
blk.17.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.17.altup_router_norm.weight
F32
F32
[2048]
blk.17.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.17.attn_k_norm.weight
F32
F32
[256]
blk.17.attn_norm.weight
F32
F32
[2048]
blk.17.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.17.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.17.attn_q_norm.weight
F32
F32
[256]
blk.17.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.17.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.17.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.17.ffn_norm.weight
F32
F32
[2048]
blk.17.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.17.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.17.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.17.laurel_post_norm.weight
F32
F32
[2048]
blk.17.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.17.post_attention_norm.weight
F32
F32
[2048]
blk.17.post_ffw_norm.weight
F32
F32
[2048]
blk.17.post_norm.weight
F32
F32
[2048]
blk.17.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.18
blk.18.altup_correct_coef.weight
F16
F16
[4, 4]
blk.18.altup_correct_scale.weight
F32
F32
[2048]
blk.18.altup_predict_coef.weight
F16
F16
[4, 16]
blk.18.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.18.altup_router_norm.weight
F32
F32
[2048]
blk.18.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.18.attn_k_norm.weight
F32
F32
[256]
blk.18.attn_norm.weight
F32
F32
[2048]
blk.18.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.18.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.18.attn_q_norm.weight
F32
F32
[256]
blk.18.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.18.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.18.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.18.ffn_norm.weight
F32
F32
[2048]
blk.18.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.18.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.18.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.18.laurel_post_norm.weight
F32
F32
[2048]
blk.18.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.18.post_attention_norm.weight
F32
F32
[2048]
blk.18.post_ffw_norm.weight
F32
F32
[2048]
blk.18.post_norm.weight
F32
F32
[2048]
blk.18.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.19
blk.19.altup_correct_coef.weight
F16
F16
[4, 4]
blk.19.altup_correct_scale.weight
F32
F32
[2048]
blk.19.altup_predict_coef.weight
F16
F16
[4, 16]
blk.19.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.19.altup_router_norm.weight
F32
F32
[2048]
blk.19.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.19.attn_k_norm.weight
F32
F32
[256]
blk.19.attn_norm.weight
F32
F32
[2048]
blk.19.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.19.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.19.attn_q_norm.weight
F32
F32
[256]
blk.19.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.19.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.19.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.19.ffn_norm.weight
F32
F32
[2048]
blk.19.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.19.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.19.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.19.laurel_post_norm.weight
F32
F32
[2048]
blk.19.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.19.post_attention_norm.weight
F32
F32
[2048]
blk.19.post_ffw_norm.weight
F32
F32
[2048]
blk.19.post_norm.weight
F32
F32
[2048]
blk.19.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.20
blk.20.altup_correct_coef.weight
F16
F16
[4, 4]
blk.20.altup_correct_scale.weight
F32
F32
[2048]
blk.20.altup_predict_coef.weight
F16
F16
[4, 16]
blk.20.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.20.altup_router_norm.weight
F32
F32
[2048]
blk.20.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.20.attn_k_norm.weight
F32
F32
[256]
blk.20.attn_norm.weight
F32
F32
[2048]
blk.20.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.20.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.20.attn_q_norm.weight
F32
F32
[256]
blk.20.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.20.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.20.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.20.ffn_norm.weight
F32
F32
[2048]
blk.20.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.20.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.20.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.20.laurel_post_norm.weight
F32
F32
[2048]
blk.20.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.20.post_attention_norm.weight
F32
F32
[2048]
blk.20.post_ffw_norm.weight
F32
F32
[2048]
blk.20.post_norm.weight
F32
F32
[2048]
blk.20.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.21
blk.21.altup_correct_coef.weight
F16
F16
[4, 4]
blk.21.altup_correct_scale.weight
F32
F32
[2048]
blk.21.altup_predict_coef.weight
F16
F16
[4, 16]
blk.21.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.21.altup_router_norm.weight
F32
F32
[2048]
blk.21.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.21.attn_k_norm.weight
F32
F32
[256]
blk.21.attn_norm.weight
F32
F32
[2048]
blk.21.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.21.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.21.attn_q_norm.weight
F32
F32
[256]
blk.21.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.21.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.21.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.21.ffn_norm.weight
F32
F32
[2048]
blk.21.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.21.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.21.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.21.laurel_post_norm.weight
F32
F32
[2048]
blk.21.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.21.post_attention_norm.weight
F32
F32
[2048]
blk.21.post_ffw_norm.weight
F32
F32
[2048]
blk.21.post_norm.weight
F32
F32
[2048]
blk.21.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.22
blk.22.altup_correct_coef.weight
F16
F16
[4, 4]
blk.22.altup_correct_scale.weight
F32
F32
[2048]
blk.22.altup_predict_coef.weight
F16
F16
[4, 16]
blk.22.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.22.altup_router_norm.weight
F32
F32
[2048]
blk.22.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.22.attn_k_norm.weight
F32
F32
[256]
blk.22.attn_norm.weight
F32
F32
[2048]
blk.22.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.22.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.22.attn_q_norm.weight
F32
F32
[256]
blk.22.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.22.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.22.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.22.ffn_norm.weight
F32
F32
[2048]
blk.22.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.22.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.22.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.22.laurel_post_norm.weight
F32
F32
[2048]
blk.22.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.22.post_attention_norm.weight
F32
F32
[2048]
blk.22.post_ffw_norm.weight
F32
F32
[2048]
blk.22.post_norm.weight
F32
F32
[2048]
blk.22.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.23
blk.23.altup_correct_coef.weight
F16
F16
[4, 4]
blk.23.altup_correct_scale.weight
F32
F32
[2048]
blk.23.altup_predict_coef.weight
F16
F16
[4, 16]
blk.23.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.23.altup_router_norm.weight
F32
F32
[2048]
blk.23.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.23.attn_k_norm.weight
F32
F32
[256]
blk.23.attn_norm.weight
F32
F32
[2048]
blk.23.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.23.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.23.attn_q_norm.weight
F32
F32
[256]
blk.23.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.23.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.23.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.23.ffn_norm.weight
F32
F32
[2048]
blk.23.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.23.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.23.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.23.laurel_post_norm.weight
F32
F32
[2048]
blk.23.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.23.post_attention_norm.weight
F32
F32
[2048]
blk.23.post_ffw_norm.weight
F32
F32
[2048]
blk.23.post_norm.weight
F32
F32
[2048]
blk.23.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.24
blk.24.altup_correct_coef.weight
F16
F16
[4, 4]
blk.24.altup_correct_scale.weight
F32
F32
[2048]
blk.24.altup_predict_coef.weight
F16
F16
[4, 16]
blk.24.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.24.altup_router_norm.weight
F32
F32
[2048]
blk.24.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.24.attn_k_norm.weight
F32
F32
[256]
blk.24.attn_norm.weight
F32
F32
[2048]
blk.24.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.24.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.24.attn_q_norm.weight
F32
F32
[256]
blk.24.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.24.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.24.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.24.ffn_norm.weight
F32
F32
[2048]
blk.24.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.24.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.24.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.24.laurel_post_norm.weight
F32
F32
[2048]
blk.24.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.24.post_attention_norm.weight
F32
F32
[2048]
blk.24.post_ffw_norm.weight
F32
F32
[2048]
blk.24.post_norm.weight
F32
F32
[2048]
blk.24.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.25
blk.25.altup_correct_coef.weight
F16
F16
[4, 4]
blk.25.altup_correct_scale.weight
F32
F32
[2048]
blk.25.altup_predict_coef.weight
F16
F16
[4, 16]
blk.25.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.25.altup_router_norm.weight
F32
F32
[2048]
blk.25.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.25.attn_k_norm.weight
F32
F32
[256]
blk.25.attn_norm.weight
F32
F32
[2048]
blk.25.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.25.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.25.attn_q_norm.weight
F32
F32
[256]
blk.25.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.25.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.25.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.25.ffn_norm.weight
F32
F32
[2048]
blk.25.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.25.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.25.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.25.laurel_post_norm.weight
F32
F32
[2048]
blk.25.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.25.post_attention_norm.weight
F32
F32
[2048]
blk.25.post_ffw_norm.weight
F32
F32
[2048]
blk.25.post_norm.weight
F32
F32
[2048]
blk.25.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.26
blk.26.altup_correct_coef.weight
F16
F16
[4, 4]
blk.26.altup_correct_scale.weight
F32
F32
[2048]
blk.26.altup_predict_coef.weight
F16
F16
[4, 16]
blk.26.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.26.altup_router_norm.weight
F32
F32
[2048]
blk.26.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.26.attn_k_norm.weight
F32
F32
[256]
blk.26.attn_norm.weight
F32
F32
[2048]
blk.26.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.26.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.26.attn_q_norm.weight
F32
F32
[256]
blk.26.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.26.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.26.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.26.ffn_norm.weight
F32
F32
[2048]
blk.26.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.26.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.26.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.26.laurel_post_norm.weight
F32
F32
[2048]
blk.26.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.26.post_attention_norm.weight
F32
F32
[2048]
blk.26.post_ffw_norm.weight
F32
F32
[2048]
blk.26.post_norm.weight
F32
F32
[2048]
blk.26.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.27
blk.27.altup_correct_coef.weight
F16
F16
[4, 4]
blk.27.altup_correct_scale.weight
F32
F32
[2048]
blk.27.altup_predict_coef.weight
F16
F16
[4, 16]
blk.27.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.27.altup_router_norm.weight
F32
F32
[2048]
blk.27.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.27.attn_k_norm.weight
F32
F32
[256]
blk.27.attn_norm.weight
F32
F32
[2048]
blk.27.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.27.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.27.attn_q_norm.weight
F32
F32
[256]
blk.27.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.27.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.27.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.27.ffn_norm.weight
F32
F32
[2048]
blk.27.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.27.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.27.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.27.laurel_post_norm.weight
F32
F32
[2048]
blk.27.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.27.post_attention_norm.weight
F32
F32
[2048]
blk.27.post_ffw_norm.weight
F32
F32
[2048]
blk.27.post_norm.weight
F32
F32
[2048]
blk.27.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.28
blk.28.altup_correct_coef.weight
F16
F16
[4, 4]
blk.28.altup_correct_scale.weight
F32
F32
[2048]
blk.28.altup_predict_coef.weight
F16
F16
[4, 16]
blk.28.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.28.altup_router_norm.weight
F32
F32
[2048]
blk.28.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.28.attn_k_norm.weight
F32
F32
[256]
blk.28.attn_norm.weight
F32
F32
[2048]
blk.28.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.28.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.28.attn_q_norm.weight
F32
F32
[256]
blk.28.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.28.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.28.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.28.ffn_norm.weight
F32
F32
[2048]
blk.28.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.28.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.28.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.28.laurel_post_norm.weight
F32
F32
[2048]
blk.28.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.28.post_attention_norm.weight
F32
F32
[2048]
blk.28.post_ffw_norm.weight
F32
F32
[2048]
blk.28.post_norm.weight
F32
F32
[2048]
blk.28.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.29
blk.29.altup_correct_coef.weight
F16
F16
[4, 4]
blk.29.altup_correct_scale.weight
F32
F32
[2048]
blk.29.altup_predict_coef.weight
F16
F16
[4, 16]
blk.29.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.29.altup_router_norm.weight
F32
F32
[2048]
blk.29.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.29.attn_k_norm.weight
F32
F32
[256]
blk.29.attn_norm.weight
F32
F32
[2048]
blk.29.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.29.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.29.attn_q_norm.weight
F32
F32
[256]
blk.29.attn_v.weight
Q4_K
Q4_K
[2048, 512]
blk.29.ffn_down.weight
Q4_K
Q4_K
[16384, 2048]
blk.29.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.29.ffn_norm.weight
F32
F32
[2048]
blk.29.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.29.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.29.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.29.laurel_post_norm.weight
F32
F32
[2048]
blk.29.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.29.post_attention_norm.weight
F32
F32
[2048]
blk.29.post_ffw_norm.weight
F32
F32
[2048]
blk.29.post_norm.weight
F32
F32
[2048]
blk.29.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.30
blk.30.altup_correct_coef.weight
F16
F16
[4, 4]
blk.30.altup_correct_scale.weight
F32
F32
[2048]
blk.30.altup_predict_coef.weight
F16
F16
[4, 16]
blk.30.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.30.altup_router_norm.weight
F32
F32
[2048]
blk.30.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.30.attn_k_norm.weight
F32
F32
[256]
blk.30.attn_norm.weight
F32
F32
[2048]
blk.30.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.30.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.30.attn_q_norm.weight
F32
F32
[256]
blk.30.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.30.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.30.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.30.ffn_norm.weight
F32
F32
[2048]
blk.30.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.30.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.30.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.30.laurel_post_norm.weight
F32
F32
[2048]
blk.30.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.30.post_attention_norm.weight
F32
F32
[2048]
blk.30.post_ffw_norm.weight
F32
F32
[2048]
blk.30.post_norm.weight
F32
F32
[2048]
blk.30.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.31
blk.31.altup_correct_coef.weight
F16
F16
[4, 4]
blk.31.altup_correct_scale.weight
F32
F32
[2048]
blk.31.altup_predict_coef.weight
F16
F16
[4, 16]
blk.31.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.31.altup_router_norm.weight
F32
F32
[2048]
blk.31.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.31.attn_k_norm.weight
F32
F32
[256]
blk.31.attn_norm.weight
F32
F32
[2048]
blk.31.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.31.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.31.attn_q_norm.weight
F32
F32
[256]
blk.31.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.31.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.31.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.31.ffn_norm.weight
F32
F32
[2048]
blk.31.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.31.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.31.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.31.laurel_post_norm.weight
F32
F32
[2048]
blk.31.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.31.post_attention_norm.weight
F32
F32
[2048]
blk.31.post_ffw_norm.weight
F32
F32
[2048]
blk.31.post_norm.weight
F32
F32
[2048]
blk.31.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.32
blk.32.altup_correct_coef.weight
F16
F16
[4, 4]
blk.32.altup_correct_scale.weight
F32
F32
[2048]
blk.32.altup_predict_coef.weight
F16
F16
[4, 16]
blk.32.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.32.altup_router_norm.weight
F32
F32
[2048]
blk.32.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.32.attn_k_norm.weight
F32
F32
[256]
blk.32.attn_norm.weight
F32
F32
[2048]
blk.32.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.32.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.32.attn_q_norm.weight
F32
F32
[256]
blk.32.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.32.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.32.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.32.ffn_norm.weight
F32
F32
[2048]
blk.32.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.32.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.32.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.32.laurel_post_norm.weight
F32
F32
[2048]
blk.32.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.32.post_attention_norm.weight
F32
F32
[2048]
blk.32.post_ffw_norm.weight
F32
F32
[2048]
blk.32.post_norm.weight
F32
F32
[2048]
blk.32.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.33
blk.33.altup_correct_coef.weight
F16
F16
[4, 4]
blk.33.altup_correct_scale.weight
F32
F32
[2048]
blk.33.altup_predict_coef.weight
F16
F16
[4, 16]
blk.33.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.33.altup_router_norm.weight
F32
F32
[2048]
blk.33.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.33.attn_k_norm.weight
F32
F32
[256]
blk.33.attn_norm.weight
F32
F32
[2048]
blk.33.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.33.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.33.attn_q_norm.weight
F32
F32
[256]
blk.33.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.33.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.33.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.33.ffn_norm.weight
F32
F32
[2048]
blk.33.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.33.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.33.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.33.laurel_post_norm.weight
F32
F32
[2048]
blk.33.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.33.post_attention_norm.weight
F32
F32
[2048]
blk.33.post_ffw_norm.weight
F32
F32
[2048]
blk.33.post_norm.weight
F32
F32
[2048]
blk.33.proj.weight
Q4_K
Q4_K
[256, 2048]
blk.34
blk.34.altup_correct_coef.weight
F16
F16
[4, 4]
blk.34.altup_correct_scale.weight
F32
F32
[2048]
blk.34.altup_predict_coef.weight
F16
F16
[4, 16]
blk.34.altup_router.weight
Q4_K
Q4_K
[2048, 4]
blk.34.altup_router_norm.weight
F32
F32
[2048]
blk.34.attn_k.weight
Q4_K
Q4_K
[2048, 512]
blk.34.attn_k_norm.weight
F32
F32
[256]
blk.34.attn_norm.weight
F32
F32
[2048]
blk.34.attn_output.weight
Q4_K
Q4_K
[2048, 2048]
blk.34.attn_q.weight
Q4_K
Q4_K
[2048, 2048]
blk.34.attn_q_norm.weight
F32
F32
[256]
blk.34.attn_v.weight
Q6_K
Q6_K
[2048, 512]
blk.34.ffn_down.weight
Q6_K
Q6_K
[16384, 2048]
blk.34.ffn_gate.weight
Q4_K
Q4_K
[2048, 16384]
blk.34.ffn_norm.weight
F32
F32
[2048]
blk.34.ffn_up.weight
Q4_K
Q4_K
[2048, 16384]
blk.34.inp_gate.weight
Q4_K
Q4_K
[2048, 256]
blk.34.laurel_l.weight
Q4_K
Q4_K
[2048, 64]
blk.34.laurel_post_norm.weight
F32
F32
[2048]
blk.34.laurel_r.weight
Q5_0
Q5_0
[64, 2048]
blk.34.post_attention_norm.weight
F32
F32
[2048]
blk.34.post_ffw_norm.weight
F32
F32
[2048]
blk.34.post_norm.weight
F32
F32
[2048]
blk.34.proj.weight
Q4_K
Q4_K
[256, 2048]
per_layer_model_proj.weight
Q4_K
Q4_K
[2048, 8960]
per_layer_proj_norm.weight
F32
F32
[256]
per_layer_token_embd.weight
F16
F16
[8960, 262144]
output_norm.weight
F32
F32
[2048]