Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
h4rithd
/
nullguard
:latest
10
Downloads
Updated
6 hours ago
NullGuard is an abliterated research build from gemma4:e4b by h4rithd, made for academic study of local LLM customization, refusal behavior, and safety limits. For research, education, and controlled security testing only.
NullGuard is an abliterated research build from gemma4:e4b by h4rithd, made for academic study of local LLM customization, refusal behavior, and safety limits. For research, education, and controlled security testing only.
Cancel
tools
thinking
nullguard:latest
...
/
model
ad873661e545 · 8.0GB
Metadata
general.architecture
gemma4
gemma4
general.file_type
Q8_0
Q8_0
gemma4.attention.head_count
8
8
gemma4.attention.head_count_kv
2
2
gemma4.attention.key_length
512
512
gemma4.attention.key_length_swa
256
256
gemma4.attention.layer_norm_rms_epsilon
1e-06
1e-06
gemma4.attention.shared_kv_layers
18
18
gemma4.attention.sliding_window
512
512
gemma4.attention.sliding_window_pattern
[true, true, true, true, true, ...]
[true, true, true, true, true, ...]
gemma4.attention.value_length
512
512
gemma4.attention.value_length_swa
256
256
gemma4.block_count
42
42
gemma4.context_length
131072
131072
gemma4.embedding_length
2560
2560
gemma4.embedding_length_per_layer_input
256
256
gemma4.feed_forward_length
10240
10240
gemma4.final_logit_softcapping
30
30
gemma4.rope.dimension_count
512
512
gemma4.rope.dimension_count_swa
256
256
gemma4.rope.freq_base
1e+06
1e+06
gemma4.rope.freq_base_swa
10000
10000
tokenizer.ggml.add_bos_token
false
false
tokenizer.ggml.add_space_prefix
false
false
tokenizer.ggml.bos_token_id
2
2
tokenizer.ggml.eos_token_id
1
1
tokenizer.ggml.mask_token_id
4
4
tokenizer.ggml.merges
[ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
[ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
tokenizer.ggml.model
gemma4
gemma4
tokenizer.ggml.padding_token_id
0
0
tokenizer.ggml.scores
[-1000, -1000, -1000, -1000, -1000, ...]
[-1000, -1000, -1000, -1000, -1000, ...]
tokenizer.ggml.token_type
[3, 3, 3, 3, 3, ...]
[3, 3, 3, 3, 3, ...]
tokenizer.ggml.tokens
[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
tokenizer.ggml.unknown_token_id
3
3
Tensor
Name
Type
Shape
token_embd.weight
Q8_0
Q8_0
[2560, 262144]
blk.0
blk.0.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.0.attn_k_norm.weight
F32
F32
[256]
blk.0.attn_norm.weight
F32
F32
[2560]
blk.0.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.0.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.0.attn_q_norm.weight
F32
F32
[256]
blk.0.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.0.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.0.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.0.ffn_norm.weight
F32
F32
[2560]
blk.0.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.0.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.0.layer_output_scale.weight
F32
F32
[1]
blk.0.post_attention_norm.weight
F32
F32
[2560]
blk.0.post_ffw_norm.weight
F32
F32
[2560]
blk.0.post_norm.weight
F32
F32
[2560]
blk.0.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.1
blk.1.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.1.attn_k_norm.weight
F32
F32
[256]
blk.1.attn_norm.weight
F32
F32
[2560]
blk.1.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.1.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.1.attn_q_norm.weight
F32
F32
[256]
blk.1.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.1.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.1.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.1.ffn_norm.weight
F32
F32
[2560]
blk.1.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.1.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.1.layer_output_scale.weight
F32
F32
[1]
blk.1.post_attention_norm.weight
F32
F32
[2560]
blk.1.post_ffw_norm.weight
F32
F32
[2560]
blk.1.post_norm.weight
F32
F32
[2560]
blk.1.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.2
blk.2.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.2.attn_k_norm.weight
F32
F32
[256]
blk.2.attn_norm.weight
F32
F32
[2560]
blk.2.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.2.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.2.attn_q_norm.weight
F32
F32
[256]
blk.2.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.2.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.2.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.2.ffn_norm.weight
F32
F32
[2560]
blk.2.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.2.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.2.layer_output_scale.weight
F32
F32
[1]
blk.2.post_attention_norm.weight
F32
F32
[2560]
blk.2.post_ffw_norm.weight
F32
F32
[2560]
blk.2.post_norm.weight
F32
F32
[2560]
blk.2.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.3
blk.3.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.3.attn_k_norm.weight
F32
F32
[256]
blk.3.attn_norm.weight
F32
F32
[2560]
blk.3.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.3.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.3.attn_q_norm.weight
F32
F32
[256]
blk.3.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.3.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.3.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.3.ffn_norm.weight
F32
F32
[2560]
blk.3.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.3.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.3.layer_output_scale.weight
F32
F32
[1]
blk.3.post_attention_norm.weight
F32
F32
[2560]
blk.3.post_ffw_norm.weight
F32
F32
[2560]
blk.3.post_norm.weight
F32
F32
[2560]
blk.3.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.4
blk.4.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.4.attn_k_norm.weight
F32
F32
[256]
blk.4.attn_norm.weight
F32
F32
[2560]
blk.4.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.4.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.4.attn_q_norm.weight
F32
F32
[256]
blk.4.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.4.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.4.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.4.ffn_norm.weight
F32
F32
[2560]
blk.4.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.4.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.4.layer_output_scale.weight
F32
F32
[1]
blk.4.post_attention_norm.weight
F32
F32
[2560]
blk.4.post_ffw_norm.weight
F32
F32
[2560]
blk.4.post_norm.weight
F32
F32
[2560]
blk.4.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.5
blk.5.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.5.attn_k_norm.weight
F32
F32
[512]
blk.5.attn_norm.weight
F32
F32
[2560]
blk.5.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.5.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.5.attn_q_norm.weight
F32
F32
[512]
blk.5.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.5.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.5.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.5.ffn_norm.weight
F32
F32
[2560]
blk.5.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.5.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.5.layer_output_scale.weight
F32
F32
[1]
blk.5.post_attention_norm.weight
F32
F32
[2560]
blk.5.post_ffw_norm.weight
F32
F32
[2560]
blk.5.post_norm.weight
F32
F32
[2560]
blk.5.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.6
blk.6.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.6.attn_k_norm.weight
F32
F32
[256]
blk.6.attn_norm.weight
F32
F32
[2560]
blk.6.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.6.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.6.attn_q_norm.weight
F32
F32
[256]
blk.6.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.6.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.6.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.6.ffn_norm.weight
F32
F32
[2560]
blk.6.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.6.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.6.layer_output_scale.weight
F32
F32
[1]
blk.6.post_attention_norm.weight
F32
F32
[2560]
blk.6.post_ffw_norm.weight
F32
F32
[2560]
blk.6.post_norm.weight
F32
F32
[2560]
blk.6.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.7
blk.7.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.7.attn_k_norm.weight
F32
F32
[256]
blk.7.attn_norm.weight
F32
F32
[2560]
blk.7.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.7.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.7.attn_q_norm.weight
F32
F32
[256]
blk.7.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.7.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.7.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.7.ffn_norm.weight
F32
F32
[2560]
blk.7.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.7.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.7.layer_output_scale.weight
F32
F32
[1]
blk.7.post_attention_norm.weight
F32
F32
[2560]
blk.7.post_ffw_norm.weight
F32
F32
[2560]
blk.7.post_norm.weight
F32
F32
[2560]
blk.7.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.8
blk.8.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.8.attn_k_norm.weight
F32
F32
[256]
blk.8.attn_norm.weight
F32
F32
[2560]
blk.8.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.8.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.8.attn_q_norm.weight
F32
F32
[256]
blk.8.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.8.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.8.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.8.ffn_norm.weight
F32
F32
[2560]
blk.8.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.8.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.8.layer_output_scale.weight
F32
F32
[1]
blk.8.post_attention_norm.weight
F32
F32
[2560]
blk.8.post_ffw_norm.weight
F32
F32
[2560]
blk.8.post_norm.weight
F32
F32
[2560]
blk.8.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.9
blk.9.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.9.attn_k_norm.weight
F32
F32
[256]
blk.9.attn_norm.weight
F32
F32
[2560]
blk.9.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.9.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.9.attn_q_norm.weight
F32
F32
[256]
blk.9.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.9.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.9.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.9.ffn_norm.weight
F32
F32
[2560]
blk.9.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.9.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.9.layer_output_scale.weight
F32
F32
[1]
blk.9.post_attention_norm.weight
F32
F32
[2560]
blk.9.post_ffw_norm.weight
F32
F32
[2560]
blk.9.post_norm.weight
F32
F32
[2560]
blk.9.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.10
blk.10.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.10.attn_k_norm.weight
F32
F32
[256]
blk.10.attn_norm.weight
F32
F32
[2560]
blk.10.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.10.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.10.attn_q_norm.weight
F32
F32
[256]
blk.10.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.10.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.10.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.10.ffn_norm.weight
F32
F32
[2560]
blk.10.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.10.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.10.layer_output_scale.weight
F32
F32
[1]
blk.10.post_attention_norm.weight
F32
F32
[2560]
blk.10.post_ffw_norm.weight
F32
F32
[2560]
blk.10.post_norm.weight
F32
F32
[2560]
blk.10.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.11
blk.11.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.11.attn_k_norm.weight
F32
F32
[512]
blk.11.attn_norm.weight
F32
F32
[2560]
blk.11.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.11.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.11.attn_q_norm.weight
F32
F32
[512]
blk.11.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.11.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.11.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.11.ffn_norm.weight
F32
F32
[2560]
blk.11.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.11.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.11.layer_output_scale.weight
F32
F32
[1]
blk.11.post_attention_norm.weight
F32
F32
[2560]
blk.11.post_ffw_norm.weight
F32
F32
[2560]
blk.11.post_norm.weight
F32
F32
[2560]
blk.11.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.12
blk.12.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.12.attn_k_norm.weight
F32
F32
[256]
blk.12.attn_norm.weight
F32
F32
[2560]
blk.12.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.12.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.12.attn_q_norm.weight
F32
F32
[256]
blk.12.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.12.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.12.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.12.ffn_norm.weight
F32
F32
[2560]
blk.12.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.12.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.12.layer_output_scale.weight
F32
F32
[1]
blk.12.post_attention_norm.weight
F32
F32
[2560]
blk.12.post_ffw_norm.weight
F32
F32
[2560]
blk.12.post_norm.weight
F32
F32
[2560]
blk.12.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.13
blk.13.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.13.attn_k_norm.weight
F32
F32
[256]
blk.13.attn_norm.weight
F32
F32
[2560]
blk.13.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.13.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.13.attn_q_norm.weight
F32
F32
[256]
blk.13.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.13.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.13.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.13.ffn_norm.weight
F32
F32
[2560]
blk.13.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.13.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.13.layer_output_scale.weight
F32
F32
[1]
blk.13.post_attention_norm.weight
F32
F32
[2560]
blk.13.post_ffw_norm.weight
F32
F32
[2560]
blk.13.post_norm.weight
F32
F32
[2560]
blk.13.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.14
blk.14.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.14.attn_k_norm.weight
F32
F32
[256]
blk.14.attn_norm.weight
F32
F32
[2560]
blk.14.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.14.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.14.attn_q_norm.weight
F32
F32
[256]
blk.14.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.14.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.14.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.14.ffn_norm.weight
F32
F32
[2560]
blk.14.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.14.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.14.layer_output_scale.weight
F32
F32
[1]
blk.14.post_attention_norm.weight
F32
F32
[2560]
blk.14.post_ffw_norm.weight
F32
F32
[2560]
blk.14.post_norm.weight
F32
F32
[2560]
blk.14.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.15
blk.15.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.15.attn_k_norm.weight
F32
F32
[256]
blk.15.attn_norm.weight
F32
F32
[2560]
blk.15.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.15.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.15.attn_q_norm.weight
F32
F32
[256]
blk.15.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.15.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.15.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.15.ffn_norm.weight
F32
F32
[2560]
blk.15.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.15.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.15.layer_output_scale.weight
F32
F32
[1]
blk.15.post_attention_norm.weight
F32
F32
[2560]
blk.15.post_ffw_norm.weight
F32
F32
[2560]
blk.15.post_norm.weight
F32
F32
[2560]
blk.15.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.16
blk.16.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.16.attn_k_norm.weight
F32
F32
[256]
blk.16.attn_norm.weight
F32
F32
[2560]
blk.16.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.16.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.16.attn_q_norm.weight
F32
F32
[256]
blk.16.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.16.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.16.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.16.ffn_norm.weight
F32
F32
[2560]
blk.16.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.16.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.16.layer_output_scale.weight
F32
F32
[1]
blk.16.post_attention_norm.weight
F32
F32
[2560]
blk.16.post_ffw_norm.weight
F32
F32
[2560]
blk.16.post_norm.weight
F32
F32
[2560]
blk.16.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.17
blk.17.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.17.attn_k_norm.weight
F32
F32
[512]
blk.17.attn_norm.weight
F32
F32
[2560]
blk.17.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.17.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.17.attn_q_norm.weight
F32
F32
[512]
blk.17.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.17.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.17.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.17.ffn_norm.weight
F32
F32
[2560]
blk.17.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.17.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.17.layer_output_scale.weight
F32
F32
[1]
blk.17.post_attention_norm.weight
F32
F32
[2560]
blk.17.post_ffw_norm.weight
F32
F32
[2560]
blk.17.post_norm.weight
F32
F32
[2560]
blk.17.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.18
blk.18.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.18.attn_k_norm.weight
F32
F32
[256]
blk.18.attn_norm.weight
F32
F32
[2560]
blk.18.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.18.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.18.attn_q_norm.weight
F32
F32
[256]
blk.18.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.18.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.18.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.18.ffn_norm.weight
F32
F32
[2560]
blk.18.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.18.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.18.layer_output_scale.weight
F32
F32
[1]
blk.18.post_attention_norm.weight
F32
F32
[2560]
blk.18.post_ffw_norm.weight
F32
F32
[2560]
blk.18.post_norm.weight
F32
F32
[2560]
blk.18.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.19
blk.19.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.19.attn_k_norm.weight
F32
F32
[256]
blk.19.attn_norm.weight
F32
F32
[2560]
blk.19.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.19.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.19.attn_q_norm.weight
F32
F32
[256]
blk.19.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.19.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.19.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.19.ffn_norm.weight
F32
F32
[2560]
blk.19.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.19.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.19.layer_output_scale.weight
F32
F32
[1]
blk.19.post_attention_norm.weight
F32
F32
[2560]
blk.19.post_ffw_norm.weight
F32
F32
[2560]
blk.19.post_norm.weight
F32
F32
[2560]
blk.19.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.20
blk.20.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.20.attn_k_norm.weight
F32
F32
[256]
blk.20.attn_norm.weight
F32
F32
[2560]
blk.20.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.20.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.20.attn_q_norm.weight
F32
F32
[256]
blk.20.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.20.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.20.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.20.ffn_norm.weight
F32
F32
[2560]
blk.20.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.20.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.20.layer_output_scale.weight
F32
F32
[1]
blk.20.post_attention_norm.weight
F32
F32
[2560]
blk.20.post_ffw_norm.weight
F32
F32
[2560]
blk.20.post_norm.weight
F32
F32
[2560]
blk.20.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.21
blk.21.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.21.attn_k_norm.weight
F32
F32
[256]
blk.21.attn_norm.weight
F32
F32
[2560]
blk.21.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.21.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.21.attn_q_norm.weight
F32
F32
[256]
blk.21.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.21.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.21.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.21.ffn_norm.weight
F32
F32
[2560]
blk.21.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.21.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.21.layer_output_scale.weight
F32
F32
[1]
blk.21.post_attention_norm.weight
F32
F32
[2560]
blk.21.post_ffw_norm.weight
F32
F32
[2560]
blk.21.post_norm.weight
F32
F32
[2560]
blk.21.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.22
blk.22.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.22.attn_k_norm.weight
F32
F32
[256]
blk.22.attn_norm.weight
F32
F32
[2560]
blk.22.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.22.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.22.attn_q_norm.weight
F32
F32
[256]
blk.22.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.22.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.22.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.22.ffn_norm.weight
F32
F32
[2560]
blk.22.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.22.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.22.layer_output_scale.weight
F32
F32
[1]
blk.22.post_attention_norm.weight
F32
F32
[2560]
blk.22.post_ffw_norm.weight
F32
F32
[2560]
blk.22.post_norm.weight
F32
F32
[2560]
blk.22.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.23
blk.23.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.23.attn_k_norm.weight
F32
F32
[512]
blk.23.attn_norm.weight
F32
F32
[2560]
blk.23.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.23.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.23.attn_q_norm.weight
F32
F32
[512]
blk.23.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.23.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.23.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.23.ffn_norm.weight
F32
F32
[2560]
blk.23.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.23.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.23.layer_output_scale.weight
F32
F32
[1]
blk.23.post_attention_norm.weight
F32
F32
[2560]
blk.23.post_ffw_norm.weight
F32
F32
[2560]
blk.23.post_norm.weight
F32
F32
[2560]
blk.23.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.24
blk.24.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.24.attn_k_norm.weight
F32
F32
[256]
blk.24.attn_norm.weight
F32
F32
[2560]
blk.24.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.24.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.24.attn_q_norm.weight
F32
F32
[256]
blk.24.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.24.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.24.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.24.ffn_norm.weight
F32
F32
[2560]
blk.24.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.24.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.24.layer_output_scale.weight
F32
F32
[1]
blk.24.post_attention_norm.weight
F32
F32
[2560]
blk.24.post_ffw_norm.weight
F32
F32
[2560]
blk.24.post_norm.weight
F32
F32
[2560]
blk.24.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.25
blk.25.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.25.attn_k_norm.weight
F32
F32
[256]
blk.25.attn_norm.weight
F32
F32
[2560]
blk.25.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.25.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.25.attn_q_norm.weight
F32
F32
[256]
blk.25.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.25.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.25.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.25.ffn_norm.weight
F32
F32
[2560]
blk.25.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.25.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.25.layer_output_scale.weight
F32
F32
[1]
blk.25.post_attention_norm.weight
F32
F32
[2560]
blk.25.post_ffw_norm.weight
F32
F32
[2560]
blk.25.post_norm.weight
F32
F32
[2560]
blk.25.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.26
blk.26.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.26.attn_k_norm.weight
F32
F32
[256]
blk.26.attn_norm.weight
F32
F32
[2560]
blk.26.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.26.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.26.attn_q_norm.weight
F32
F32
[256]
blk.26.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.26.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.26.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.26.ffn_norm.weight
F32
F32
[2560]
blk.26.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.26.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.26.layer_output_scale.weight
F32
F32
[1]
blk.26.post_attention_norm.weight
F32
F32
[2560]
blk.26.post_ffw_norm.weight
F32
F32
[2560]
blk.26.post_norm.weight
F32
F32
[2560]
blk.26.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.27
blk.27.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.27.attn_k_norm.weight
F32
F32
[256]
blk.27.attn_norm.weight
F32
F32
[2560]
blk.27.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.27.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.27.attn_q_norm.weight
F32
F32
[256]
blk.27.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.27.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.27.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.27.ffn_norm.weight
F32
F32
[2560]
blk.27.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.27.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.27.layer_output_scale.weight
F32
F32
[1]
blk.27.post_attention_norm.weight
F32
F32
[2560]
blk.27.post_ffw_norm.weight
F32
F32
[2560]
blk.27.post_norm.weight
F32
F32
[2560]
blk.27.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.28
blk.28.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.28.attn_k_norm.weight
F32
F32
[256]
blk.28.attn_norm.weight
F32
F32
[2560]
blk.28.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.28.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.28.attn_q_norm.weight
F32
F32
[256]
blk.28.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.28.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.28.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.28.ffn_norm.weight
F32
F32
[2560]
blk.28.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.28.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.28.layer_output_scale.weight
F32
F32
[1]
blk.28.post_attention_norm.weight
F32
F32
[2560]
blk.28.post_ffw_norm.weight
F32
F32
[2560]
blk.28.post_norm.weight
F32
F32
[2560]
blk.28.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.29
blk.29.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.29.attn_k_norm.weight
F32
F32
[512]
blk.29.attn_norm.weight
F32
F32
[2560]
blk.29.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.29.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.29.attn_q_norm.weight
F32
F32
[512]
blk.29.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.29.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.29.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.29.ffn_norm.weight
F32
F32
[2560]
blk.29.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.29.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.29.layer_output_scale.weight
F32
F32
[1]
blk.29.post_attention_norm.weight
F32
F32
[2560]
blk.29.post_ffw_norm.weight
F32
F32
[2560]
blk.29.post_norm.weight
F32
F32
[2560]
blk.29.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.30
blk.30.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.30.attn_k_norm.weight
F32
F32
[256]
blk.30.attn_norm.weight
F32
F32
[2560]
blk.30.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.30.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.30.attn_q_norm.weight
F32
F32
[256]
blk.30.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.30.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.30.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.30.ffn_norm.weight
F32
F32
[2560]
blk.30.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.30.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.30.layer_output_scale.weight
F32
F32
[1]
blk.30.post_attention_norm.weight
F32
F32
[2560]
blk.30.post_ffw_norm.weight
F32
F32
[2560]
blk.30.post_norm.weight
F32
F32
[2560]
blk.30.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.31
blk.31.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.31.attn_k_norm.weight
F32
F32
[256]
blk.31.attn_norm.weight
F32
F32
[2560]
blk.31.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.31.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.31.attn_q_norm.weight
F32
F32
[256]
blk.31.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.31.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.31.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.31.ffn_norm.weight
F32
F32
[2560]
blk.31.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.31.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.31.layer_output_scale.weight
F32
F32
[1]
blk.31.post_attention_norm.weight
F32
F32
[2560]
blk.31.post_ffw_norm.weight
F32
F32
[2560]
blk.31.post_norm.weight
F32
F32
[2560]
blk.31.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.32
blk.32.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.32.attn_k_norm.weight
F32
F32
[256]
blk.32.attn_norm.weight
F32
F32
[2560]
blk.32.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.32.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.32.attn_q_norm.weight
F32
F32
[256]
blk.32.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.32.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.32.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.32.ffn_norm.weight
F32
F32
[2560]
blk.32.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.32.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.32.layer_output_scale.weight
F32
F32
[1]
blk.32.post_attention_norm.weight
F32
F32
[2560]
blk.32.post_ffw_norm.weight
F32
F32
[2560]
blk.32.post_norm.weight
F32
F32
[2560]
blk.32.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.33
blk.33.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.33.attn_k_norm.weight
F32
F32
[256]
blk.33.attn_norm.weight
F32
F32
[2560]
blk.33.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.33.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.33.attn_q_norm.weight
F32
F32
[256]
blk.33.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.33.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.33.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.33.ffn_norm.weight
F32
F32
[2560]
blk.33.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.33.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.33.layer_output_scale.weight
F32
F32
[1]
blk.33.post_attention_norm.weight
F32
F32
[2560]
blk.33.post_ffw_norm.weight
F32
F32
[2560]
blk.33.post_norm.weight
F32
F32
[2560]
blk.33.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.34
blk.34.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.34.attn_k_norm.weight
F32
F32
[256]
blk.34.attn_norm.weight
F32
F32
[2560]
blk.34.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.34.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.34.attn_q_norm.weight
F32
F32
[256]
blk.34.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.34.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.34.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.34.ffn_norm.weight
F32
F32
[2560]
blk.34.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.34.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.34.layer_output_scale.weight
F32
F32
[1]
blk.34.post_attention_norm.weight
F32
F32
[2560]
blk.34.post_ffw_norm.weight
F32
F32
[2560]
blk.34.post_norm.weight
F32
F32
[2560]
blk.34.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.35
blk.35.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.35.attn_k_norm.weight
F32
F32
[512]
blk.35.attn_norm.weight
F32
F32
[2560]
blk.35.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.35.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.35.attn_q_norm.weight
F32
F32
[512]
blk.35.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.35.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.35.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.35.ffn_norm.weight
F32
F32
[2560]
blk.35.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.35.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.35.layer_output_scale.weight
F32
F32
[1]
blk.35.post_attention_norm.weight
F32
F32
[2560]
blk.35.post_ffw_norm.weight
F32
F32
[2560]
blk.35.post_norm.weight
F32
F32
[2560]
blk.35.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.36
blk.36.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.36.attn_k_norm.weight
F32
F32
[256]
blk.36.attn_norm.weight
F32
F32
[2560]
blk.36.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.36.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.36.attn_q_norm.weight
F32
F32
[256]
blk.36.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.36.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.36.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.36.ffn_norm.weight
F32
F32
[2560]
blk.36.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.36.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.36.layer_output_scale.weight
F32
F32
[1]
blk.36.post_attention_norm.weight
F32
F32
[2560]
blk.36.post_ffw_norm.weight
F32
F32
[2560]
blk.36.post_norm.weight
F32
F32
[2560]
blk.36.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.37
blk.37.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.37.attn_k_norm.weight
F32
F32
[256]
blk.37.attn_norm.weight
F32
F32
[2560]
blk.37.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.37.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.37.attn_q_norm.weight
F32
F32
[256]
blk.37.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.37.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.37.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.37.ffn_norm.weight
F32
F32
[2560]
blk.37.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.37.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.37.layer_output_scale.weight
F32
F32
[1]
blk.37.post_attention_norm.weight
F32
F32
[2560]
blk.37.post_ffw_norm.weight
F32
F32
[2560]
blk.37.post_norm.weight
F32
F32
[2560]
blk.37.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.38
blk.38.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.38.attn_k_norm.weight
F32
F32
[256]
blk.38.attn_norm.weight
F32
F32
[2560]
blk.38.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.38.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.38.attn_q_norm.weight
F32
F32
[256]
blk.38.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.38.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.38.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.38.ffn_norm.weight
F32
F32
[2560]
blk.38.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.38.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.38.layer_output_scale.weight
F32
F32
[1]
blk.38.post_attention_norm.weight
F32
F32
[2560]
blk.38.post_ffw_norm.weight
F32
F32
[2560]
blk.38.post_norm.weight
F32
F32
[2560]
blk.38.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.39
blk.39.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.39.attn_k_norm.weight
F32
F32
[256]
blk.39.attn_norm.weight
F32
F32
[2560]
blk.39.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.39.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.39.attn_q_norm.weight
F32
F32
[256]
blk.39.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.39.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.39.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.39.ffn_norm.weight
F32
F32
[2560]
blk.39.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.39.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.39.layer_output_scale.weight
F32
F32
[1]
blk.39.post_attention_norm.weight
F32
F32
[2560]
blk.39.post_ffw_norm.weight
F32
F32
[2560]
blk.39.post_norm.weight
F32
F32
[2560]
blk.39.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.40
blk.40.attn_k.weight
Q8_0
Q8_0
[2560, 512]
blk.40.attn_k_norm.weight
F32
F32
[256]
blk.40.attn_norm.weight
F32
F32
[2560]
blk.40.attn_output.weight
Q8_0
Q8_0
[2048, 2560]
blk.40.attn_q.weight
Q8_0
Q8_0
[2560, 2048]
blk.40.attn_q_norm.weight
F32
F32
[256]
blk.40.attn_v.weight
Q8_0
Q8_0
[2560, 512]
blk.40.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.40.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.40.ffn_norm.weight
F32
F32
[2560]
blk.40.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.40.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.40.layer_output_scale.weight
F32
F32
[1]
blk.40.post_attention_norm.weight
F32
F32
[2560]
blk.40.post_ffw_norm.weight
F32
F32
[2560]
blk.40.post_norm.weight
F32
F32
[2560]
blk.40.proj.weight
Q8_0
Q8_0
[256, 2560]
blk.41
blk.41.attn_k.weight
Q8_0
Q8_0
[2560, 1024]
blk.41.attn_k_norm.weight
F32
F32
[512]
blk.41.attn_norm.weight
F32
F32
[2560]
blk.41.attn_output.weight
Q8_0
Q8_0
[4096, 2560]
blk.41.attn_q.weight
Q8_0
Q8_0
[2560, 4096]
blk.41.attn_q_norm.weight
F32
F32
[512]
blk.41.attn_v.weight
Q8_0
Q8_0
[2560, 1024]
blk.41.ffn_down.weight
Q8_0
Q8_0
[10240, 2560]
blk.41.ffn_gate.weight
Q8_0
Q8_0
[2560, 10240]
blk.41.ffn_norm.weight
F32
F32
[2560]
blk.41.ffn_up.weight
Q8_0
Q8_0
[2560, 10240]
blk.41.inp_gate.weight
Q8_0
Q8_0
[2560, 256]
blk.41.layer_output_scale.weight
F32
F32
[1]
blk.41.post_attention_norm.weight
F32
F32
[2560]
blk.41.post_ffw_norm.weight
F32
F32
[2560]
blk.41.post_norm.weight
F32
F32
[2560]
blk.41.proj.weight
Q8_0
Q8_0
[256, 2560]
per_layer_model_proj.weight
BF16
BF16
[2560, 10752]
per_layer_proj_norm.weight
F32
F32
[256]
per_layer_token_embd.weight
Q8_0
Q8_0
[10752, 262144]
rope_freqs.weight
F32
F32
[256]
output_norm.weight
F32
F32
[2560]