Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
ionicus
/
kldzj_gpt-oss-120b-heretic-v2-bf16
:latest
13
Downloads
Updated
1 week ago
Bartowski's GGUF at full BF16 weights. Parameters and Template optimized for tool use.
Bartowski's GGUF at full BF16 weights. Parameters and Template optimized for tool use.
Cancel
tools
thinking
kldzj_gpt-oss-120b-heretic-v2-bf16:latest
...
/
model
58fc2c408871 · 65GB
Metadata
general.architecture
gpt-oss
gpt-oss
general.file_type
BF16
BF16
gpt-oss.attention.head_count
64
64
gpt-oss.attention.head_count_kv
8
8
gpt-oss.attention.key_length
64
64
gpt-oss.attention.layer_norm_rms_epsilon
1e-05
1e-05
gpt-oss.attention.sliding_window
128
128
gpt-oss.attention.value_length
64
64
gpt-oss.block_count
36
36
gpt-oss.context_length
131072
131072
gpt-oss.embedding_length
2880
2880
gpt-oss.expert_count
128
128
gpt-oss.expert_feed_forward_length
2880
2880
gpt-oss.expert_used_count
4
4
gpt-oss.feed_forward_length
2880
2880
gpt-oss.rope.freq_base
150000
150000
gpt-oss.rope.scaling.factor
32
32
gpt-oss.rope.scaling.original_context_length
4096
4096
gpt-oss.rope.scaling.type
yarn
yarn
tokenizer.ggml.bos_token_id
199998
199998
tokenizer.ggml.eos_token_id
200002
200002
tokenizer.ggml.merges
[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
199999
199999
tokenizer.ggml.pre
gpt-4o
gpt-4o
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
split.count
0
0
split.no
0
0
split.tensors.count
687
687
Tensor
Name
Type
Shape
token_embd.weight
BF16
BF16
[2880, 201088]
blk.0
blk.0.attn_k.bias
F32
F32
[512]
blk.0.attn_k.weight
BF16
BF16
[2880, 512]
blk.0.attn_norm.weight
F32
F32
[2880]
blk.0.attn_output.bias
F32
F32
[2880]
blk.0.attn_output.weight
BF16
BF16
[4096, 2880]
blk.0.attn_q.bias
F32
F32
[4096]
blk.0.attn_q.weight
BF16
BF16
[2880, 4096]
blk.0.attn_sinks.weight
F32
F32
[64]
blk.0.attn_v.bias
F32
F32
[512]
blk.0.attn_v.weight
BF16
BF16
[2880, 512]
blk.0.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.0.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.0.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.0.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.0.ffn_gate_inp.bias
F32
F32
[128]
blk.0.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.0.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.0.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.0.post_attention_norm.weight
F32
F32
[2880]
blk.1
blk.1.attn_k.bias
F32
F32
[512]
blk.1.attn_k.weight
BF16
BF16
[2880, 512]
blk.1.attn_norm.weight
F32
F32
[2880]
blk.1.attn_output.bias
F32
F32
[2880]
blk.1.attn_output.weight
BF16
BF16
[4096, 2880]
blk.1.attn_q.bias
F32
F32
[4096]
blk.1.attn_q.weight
BF16
BF16
[2880, 4096]
blk.1.attn_sinks.weight
F32
F32
[64]
blk.1.attn_v.bias
F32
F32
[512]
blk.1.attn_v.weight
BF16
BF16
[2880, 512]
blk.1.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.1.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.1.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.1.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.1.ffn_gate_inp.bias
F32
F32
[128]
blk.1.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.1.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.1.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.1.post_attention_norm.weight
F32
F32
[2880]
blk.2
blk.2.attn_k.bias
F32
F32
[512]
blk.2.attn_k.weight
BF16
BF16
[2880, 512]
blk.2.attn_norm.weight
F32
F32
[2880]
blk.2.attn_output.bias
F32
F32
[2880]
blk.2.attn_output.weight
BF16
BF16
[4096, 2880]
blk.2.attn_q.bias
F32
F32
[4096]
blk.2.attn_q.weight
BF16
BF16
[2880, 4096]
blk.2.attn_sinks.weight
F32
F32
[64]
blk.2.attn_v.bias
F32
F32
[512]
blk.2.attn_v.weight
BF16
BF16
[2880, 512]
blk.2.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.2.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.2.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.2.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.2.ffn_gate_inp.bias
F32
F32
[128]
blk.2.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.2.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.2.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.2.post_attention_norm.weight
F32
F32
[2880]
blk.3
blk.3.attn_k.bias
F32
F32
[512]
blk.3.attn_k.weight
BF16
BF16
[2880, 512]
blk.3.attn_norm.weight
F32
F32
[2880]
blk.3.attn_output.bias
F32
F32
[2880]
blk.3.attn_output.weight
BF16
BF16
[4096, 2880]
blk.3.attn_q.bias
F32
F32
[4096]
blk.3.attn_q.weight
BF16
BF16
[2880, 4096]
blk.3.attn_sinks.weight
F32
F32
[64]
blk.3.attn_v.bias
F32
F32
[512]
blk.3.attn_v.weight
BF16
BF16
[2880, 512]
blk.3.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.3.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.3.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.3.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.3.ffn_gate_inp.bias
F32
F32
[128]
blk.3.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.3.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.3.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.3.post_attention_norm.weight
F32
F32
[2880]
blk.4
blk.4.attn_k.bias
F32
F32
[512]
blk.4.attn_k.weight
BF16
BF16
[2880, 512]
blk.4.attn_norm.weight
F32
F32
[2880]
blk.4.attn_output.bias
F32
F32
[2880]
blk.4.attn_output.weight
BF16
BF16
[4096, 2880]
blk.4.attn_q.bias
F32
F32
[4096]
blk.4.attn_q.weight
BF16
BF16
[2880, 4096]
blk.4.attn_sinks.weight
F32
F32
[64]
blk.4.attn_v.bias
F32
F32
[512]
blk.4.attn_v.weight
BF16
BF16
[2880, 512]
blk.4.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.4.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.4.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.4.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.4.ffn_gate_inp.bias
F32
F32
[128]
blk.4.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.4.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.4.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.4.post_attention_norm.weight
F32
F32
[2880]
blk.5
blk.5.attn_k.bias
F32
F32
[512]
blk.5.attn_k.weight
BF16
BF16
[2880, 512]
blk.5.attn_norm.weight
F32
F32
[2880]
blk.5.attn_output.bias
F32
F32
[2880]
blk.5.attn_output.weight
BF16
BF16
[4096, 2880]
blk.5.attn_q.bias
F32
F32
[4096]
blk.5.attn_q.weight
BF16
BF16
[2880, 4096]
blk.5.attn_sinks.weight
F32
F32
[64]
blk.5.attn_v.bias
F32
F32
[512]
blk.5.attn_v.weight
BF16
BF16
[2880, 512]
blk.5.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.5.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.5.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.5.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.5.ffn_gate_inp.bias
F32
F32
[128]
blk.5.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.5.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.5.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.5.post_attention_norm.weight
F32
F32
[2880]
blk.6
blk.6.attn_k.bias
F32
F32
[512]
blk.6.attn_k.weight
BF16
BF16
[2880, 512]
blk.6.attn_norm.weight
F32
F32
[2880]
blk.6.attn_output.bias
F32
F32
[2880]
blk.6.attn_output.weight
BF16
BF16
[4096, 2880]
blk.6.attn_q.bias
F32
F32
[4096]
blk.6.attn_q.weight
BF16
BF16
[2880, 4096]
blk.6.attn_sinks.weight
F32
F32
[64]
blk.6.attn_v.bias
F32
F32
[512]
blk.6.attn_v.weight
BF16
BF16
[2880, 512]
blk.6.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.6.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.6.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.6.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.6.ffn_gate_inp.bias
F32
F32
[128]
blk.6.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.6.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.6.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.6.post_attention_norm.weight
F32
F32
[2880]
blk.7
blk.7.attn_k.bias
F32
F32
[512]
blk.7.attn_k.weight
BF16
BF16
[2880, 512]
blk.7.attn_norm.weight
F32
F32
[2880]
blk.7.attn_output.bias
F32
F32
[2880]
blk.7.attn_output.weight
BF16
BF16
[4096, 2880]
blk.7.attn_q.bias
F32
F32
[4096]
blk.7.attn_q.weight
BF16
BF16
[2880, 4096]
blk.7.attn_sinks.weight
F32
F32
[64]
blk.7.attn_v.bias
F32
F32
[512]
blk.7.attn_v.weight
BF16
BF16
[2880, 512]
blk.7.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.7.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.7.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.7.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.7.ffn_gate_inp.bias
F32
F32
[128]
blk.7.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.7.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.7.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.7.post_attention_norm.weight
F32
F32
[2880]
blk.8
blk.8.attn_k.bias
F32
F32
[512]
blk.8.attn_k.weight
BF16
BF16
[2880, 512]
blk.8.attn_norm.weight
F32
F32
[2880]
blk.8.attn_output.bias
F32
F32
[2880]
blk.8.attn_output.weight
BF16
BF16
[4096, 2880]
blk.8.attn_q.bias
F32
F32
[4096]
blk.8.attn_q.weight
BF16
BF16
[2880, 4096]
blk.8.attn_sinks.weight
F32
F32
[64]
blk.8.attn_v.bias
F32
F32
[512]
blk.8.attn_v.weight
BF16
BF16
[2880, 512]
blk.8.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.8.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.8.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.8.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.8.ffn_gate_inp.bias
F32
F32
[128]
blk.8.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.8.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.8.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.8.post_attention_norm.weight
F32
F32
[2880]
blk.9
blk.9.attn_k.bias
F32
F32
[512]
blk.9.attn_k.weight
BF16
BF16
[2880, 512]
blk.9.attn_norm.weight
F32
F32
[2880]
blk.9.attn_output.bias
F32
F32
[2880]
blk.9.attn_output.weight
BF16
BF16
[4096, 2880]
blk.9.attn_q.bias
F32
F32
[4096]
blk.9.attn_q.weight
BF16
BF16
[2880, 4096]
blk.9.attn_sinks.weight
F32
F32
[64]
blk.9.attn_v.bias
F32
F32
[512]
blk.9.attn_v.weight
BF16
BF16
[2880, 512]
blk.9.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.9.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.9.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.9.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.9.ffn_gate_inp.bias
F32
F32
[128]
blk.9.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.9.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.9.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.9.post_attention_norm.weight
F32
F32
[2880]
blk.10
blk.10.attn_k.bias
F32
F32
[512]
blk.10.attn_k.weight
BF16
BF16
[2880, 512]
blk.10.attn_norm.weight
F32
F32
[2880]
blk.10.attn_output.bias
F32
F32
[2880]
blk.10.attn_output.weight
BF16
BF16
[4096, 2880]
blk.10.attn_q.bias
F32
F32
[4096]
blk.10.attn_q.weight
BF16
BF16
[2880, 4096]
blk.10.attn_sinks.weight
F32
F32
[64]
blk.10.attn_v.bias
F32
F32
[512]
blk.10.attn_v.weight
BF16
BF16
[2880, 512]
blk.10.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.10.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.10.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.10.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.10.ffn_gate_inp.bias
F32
F32
[128]
blk.10.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.10.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.10.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.10.post_attention_norm.weight
F32
F32
[2880]
blk.11
blk.11.attn_k.bias
F32
F32
[512]
blk.11.attn_k.weight
BF16
BF16
[2880, 512]
blk.11.attn_norm.weight
F32
F32
[2880]
blk.11.attn_output.bias
F32
F32
[2880]
blk.11.attn_output.weight
BF16
BF16
[4096, 2880]
blk.11.attn_q.bias
F32
F32
[4096]
blk.11.attn_q.weight
BF16
BF16
[2880, 4096]
blk.11.attn_sinks.weight
F32
F32
[64]
blk.11.attn_v.bias
F32
F32
[512]
blk.11.attn_v.weight
BF16
BF16
[2880, 512]
blk.11.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.11.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.11.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.11.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.11.ffn_gate_inp.bias
F32
F32
[128]
blk.11.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.11.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.11.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.11.post_attention_norm.weight
F32
F32
[2880]
blk.12
blk.12.attn_k.bias
F32
F32
[512]
blk.12.attn_k.weight
BF16
BF16
[2880, 512]
blk.12.attn_norm.weight
F32
F32
[2880]
blk.12.attn_output.bias
F32
F32
[2880]
blk.12.attn_output.weight
BF16
BF16
[4096, 2880]
blk.12.attn_q.bias
F32
F32
[4096]
blk.12.attn_q.weight
BF16
BF16
[2880, 4096]
blk.12.attn_sinks.weight
F32
F32
[64]
blk.12.attn_v.bias
F32
F32
[512]
blk.12.attn_v.weight
BF16
BF16
[2880, 512]
blk.12.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.12.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.12.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.12.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.12.ffn_gate_inp.bias
F32
F32
[128]
blk.12.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.12.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.12.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.12.post_attention_norm.weight
F32
F32
[2880]
blk.13
blk.13.attn_k.bias
F32
F32
[512]
blk.13.attn_k.weight
BF16
BF16
[2880, 512]
blk.13.attn_norm.weight
F32
F32
[2880]
blk.13.attn_output.bias
F32
F32
[2880]
blk.13.attn_output.weight
BF16
BF16
[4096, 2880]
blk.13.attn_q.bias
F32
F32
[4096]
blk.13.attn_q.weight
BF16
BF16
[2880, 4096]
blk.13.attn_sinks.weight
F32
F32
[64]
blk.13.attn_v.bias
F32
F32
[512]
blk.13.attn_v.weight
BF16
BF16
[2880, 512]
blk.13.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.13.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.13.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.13.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.13.ffn_gate_inp.bias
F32
F32
[128]
blk.13.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.13.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.13.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.13.post_attention_norm.weight
F32
F32
[2880]
blk.14
blk.14.attn_k.bias
F32
F32
[512]
blk.14.attn_k.weight
BF16
BF16
[2880, 512]
blk.14.attn_norm.weight
F32
F32
[2880]
blk.14.attn_output.bias
F32
F32
[2880]
blk.14.attn_output.weight
BF16
BF16
[4096, 2880]
blk.14.attn_q.bias
F32
F32
[4096]
blk.14.attn_q.weight
BF16
BF16
[2880, 4096]
blk.14.attn_sinks.weight
F32
F32
[64]
blk.14.attn_v.bias
F32
F32
[512]
blk.14.attn_v.weight
BF16
BF16
[2880, 512]
blk.14.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.14.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.14.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.14.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.14.ffn_gate_inp.bias
F32
F32
[128]
blk.14.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.14.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.14.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.14.post_attention_norm.weight
F32
F32
[2880]
blk.15
blk.15.attn_k.bias
F32
F32
[512]
blk.15.attn_k.weight
BF16
BF16
[2880, 512]
blk.15.attn_norm.weight
F32
F32
[2880]
blk.15.attn_output.bias
F32
F32
[2880]
blk.15.attn_output.weight
BF16
BF16
[4096, 2880]
blk.15.attn_q.bias
F32
F32
[4096]
blk.15.attn_q.weight
BF16
BF16
[2880, 4096]
blk.15.attn_sinks.weight
F32
F32
[64]
blk.15.attn_v.bias
F32
F32
[512]
blk.15.attn_v.weight
BF16
BF16
[2880, 512]
blk.15.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.15.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.15.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.15.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.15.ffn_gate_inp.bias
F32
F32
[128]
blk.15.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.15.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.15.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.15.post_attention_norm.weight
F32
F32
[2880]
blk.16
blk.16.attn_k.bias
F32
F32
[512]
blk.16.attn_k.weight
BF16
BF16
[2880, 512]
blk.16.attn_norm.weight
F32
F32
[2880]
blk.16.attn_output.bias
F32
F32
[2880]
blk.16.attn_output.weight
BF16
BF16
[4096, 2880]
blk.16.attn_q.bias
F32
F32
[4096]
blk.16.attn_q.weight
BF16
BF16
[2880, 4096]
blk.16.attn_sinks.weight
F32
F32
[64]
blk.16.attn_v.bias
F32
F32
[512]
blk.16.attn_v.weight
BF16
BF16
[2880, 512]
blk.16.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.16.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.16.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.16.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.16.ffn_gate_inp.bias
F32
F32
[128]
blk.16.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.16.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.16.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.16.post_attention_norm.weight
F32
F32
[2880]
blk.17
blk.17.attn_k.bias
F32
F32
[512]
blk.17.attn_k.weight
BF16
BF16
[2880, 512]
blk.17.attn_norm.weight
F32
F32
[2880]
blk.17.attn_output.bias
F32
F32
[2880]
blk.17.attn_output.weight
BF16
BF16
[4096, 2880]
blk.17.attn_q.bias
F32
F32
[4096]
blk.17.attn_q.weight
BF16
BF16
[2880, 4096]
blk.17.attn_sinks.weight
F32
F32
[64]
blk.17.attn_v.bias
F32
F32
[512]
blk.17.attn_v.weight
BF16
BF16
[2880, 512]
blk.17.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.17.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.17.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.17.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.17.ffn_gate_inp.bias
F32
F32
[128]
blk.17.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.17.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.17.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.17.post_attention_norm.weight
F32
F32
[2880]
blk.18
blk.18.attn_k.bias
F32
F32
[512]
blk.18.attn_k.weight
BF16
BF16
[2880, 512]
blk.18.attn_norm.weight
F32
F32
[2880]
blk.18.attn_output.bias
F32
F32
[2880]
blk.18.attn_output.weight
BF16
BF16
[4096, 2880]
blk.18.attn_q.bias
F32
F32
[4096]
blk.18.attn_q.weight
BF16
BF16
[2880, 4096]
blk.18.attn_sinks.weight
F32
F32
[64]
blk.18.attn_v.bias
F32
F32
[512]
blk.18.attn_v.weight
BF16
BF16
[2880, 512]
blk.18.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.18.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.18.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.18.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.18.ffn_gate_inp.bias
F32
F32
[128]
blk.18.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.18.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.18.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.18.post_attention_norm.weight
F32
F32
[2880]
blk.19
blk.19.attn_k.bias
F32
F32
[512]
blk.19.attn_k.weight
BF16
BF16
[2880, 512]
blk.19.attn_norm.weight
F32
F32
[2880]
blk.19.attn_output.bias
F32
F32
[2880]
blk.19.attn_output.weight
BF16
BF16
[4096, 2880]
blk.19.attn_q.bias
F32
F32
[4096]
blk.19.attn_q.weight
BF16
BF16
[2880, 4096]
blk.19.attn_sinks.weight
F32
F32
[64]
blk.19.attn_v.bias
F32
F32
[512]
blk.19.attn_v.weight
BF16
BF16
[2880, 512]
blk.19.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.19.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.19.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.19.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.19.ffn_gate_inp.bias
F32
F32
[128]
blk.19.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.19.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.19.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.19.post_attention_norm.weight
F32
F32
[2880]
blk.20
blk.20.attn_k.bias
F32
F32
[512]
blk.20.attn_k.weight
BF16
BF16
[2880, 512]
blk.20.attn_norm.weight
F32
F32
[2880]
blk.20.attn_output.bias
F32
F32
[2880]
blk.20.attn_output.weight
BF16
BF16
[4096, 2880]
blk.20.attn_q.bias
F32
F32
[4096]
blk.20.attn_q.weight
BF16
BF16
[2880, 4096]
blk.20.attn_sinks.weight
F32
F32
[64]
blk.20.attn_v.bias
F32
F32
[512]
blk.20.attn_v.weight
BF16
BF16
[2880, 512]
blk.20.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.20.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.20.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.20.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.20.ffn_gate_inp.bias
F32
F32
[128]
blk.20.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.20.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.20.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.20.post_attention_norm.weight
F32
F32
[2880]
blk.21
blk.21.attn_k.bias
F32
F32
[512]
blk.21.attn_k.weight
BF16
BF16
[2880, 512]
blk.21.attn_norm.weight
F32
F32
[2880]
blk.21.attn_output.bias
F32
F32
[2880]
blk.21.attn_output.weight
BF16
BF16
[4096, 2880]
blk.21.attn_q.bias
F32
F32
[4096]
blk.21.attn_q.weight
BF16
BF16
[2880, 4096]
blk.21.attn_sinks.weight
F32
F32
[64]
blk.21.attn_v.bias
F32
F32
[512]
blk.21.attn_v.weight
BF16
BF16
[2880, 512]
blk.21.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.21.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.21.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.21.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.21.ffn_gate_inp.bias
F32
F32
[128]
blk.21.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.21.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.21.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.21.post_attention_norm.weight
F32
F32
[2880]
blk.22
blk.22.attn_k.bias
F32
F32
[512]
blk.22.attn_k.weight
BF16
BF16
[2880, 512]
blk.22.attn_norm.weight
F32
F32
[2880]
blk.22.attn_output.bias
F32
F32
[2880]
blk.22.attn_output.weight
BF16
BF16
[4096, 2880]
blk.22.attn_q.bias
F32
F32
[4096]
blk.22.attn_q.weight
BF16
BF16
[2880, 4096]
blk.22.attn_sinks.weight
F32
F32
[64]
blk.22.attn_v.bias
F32
F32
[512]
blk.22.attn_v.weight
BF16
BF16
[2880, 512]
blk.22.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.22.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.22.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.22.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.22.ffn_gate_inp.bias
F32
F32
[128]
blk.22.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.22.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.22.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.22.post_attention_norm.weight
F32
F32
[2880]
blk.23
blk.23.attn_k.bias
F32
F32
[512]
blk.23.attn_k.weight
BF16
BF16
[2880, 512]
blk.23.attn_norm.weight
F32
F32
[2880]
blk.23.attn_output.bias
F32
F32
[2880]
blk.23.attn_output.weight
BF16
BF16
[4096, 2880]
blk.23.attn_q.bias
F32
F32
[4096]
blk.23.attn_q.weight
BF16
BF16
[2880, 4096]
blk.23.attn_sinks.weight
F32
F32
[64]
blk.23.attn_v.bias
F32
F32
[512]
blk.23.attn_v.weight
BF16
BF16
[2880, 512]
blk.23.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.23.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.23.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.23.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.23.ffn_gate_inp.bias
F32
F32
[128]
blk.23.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.23.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.23.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.23.post_attention_norm.weight
F32
F32
[2880]
blk.24
blk.24.attn_k.bias
F32
F32
[512]
blk.24.attn_k.weight
BF16
BF16
[2880, 512]
blk.24.attn_norm.weight
F32
F32
[2880]
blk.24.attn_output.bias
F32
F32
[2880]
blk.24.attn_output.weight
BF16
BF16
[4096, 2880]
blk.24.attn_q.bias
F32
F32
[4096]
blk.24.attn_q.weight
BF16
BF16
[2880, 4096]
blk.24.attn_sinks.weight
F32
F32
[64]
blk.24.attn_v.bias
F32
F32
[512]
blk.24.attn_v.weight
BF16
BF16
[2880, 512]
blk.24.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.24.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.24.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.24.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.24.ffn_gate_inp.bias
F32
F32
[128]
blk.24.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.24.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.24.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.24.post_attention_norm.weight
F32
F32
[2880]
blk.25
blk.25.attn_k.bias
F32
F32
[512]
blk.25.attn_k.weight
BF16
BF16
[2880, 512]
blk.25.attn_norm.weight
F32
F32
[2880]
blk.25.attn_output.bias
F32
F32
[2880]
blk.25.attn_output.weight
BF16
BF16
[4096, 2880]
blk.25.attn_q.bias
F32
F32
[4096]
blk.25.attn_q.weight
BF16
BF16
[2880, 4096]
blk.25.attn_sinks.weight
F32
F32
[64]
blk.25.attn_v.bias
F32
F32
[512]
blk.25.attn_v.weight
BF16
BF16
[2880, 512]
blk.25.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.25.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.25.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.25.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.25.ffn_gate_inp.bias
F32
F32
[128]
blk.25.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.25.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.25.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.25.post_attention_norm.weight
F32
F32
[2880]
blk.26
blk.26.attn_k.bias
F32
F32
[512]
blk.26.attn_k.weight
BF16
BF16
[2880, 512]
blk.26.attn_norm.weight
F32
F32
[2880]
blk.26.attn_output.bias
F32
F32
[2880]
blk.26.attn_output.weight
BF16
BF16
[4096, 2880]
blk.26.attn_q.bias
F32
F32
[4096]
blk.26.attn_q.weight
BF16
BF16
[2880, 4096]
blk.26.attn_sinks.weight
F32
F32
[64]
blk.26.attn_v.bias
F32
F32
[512]
blk.26.attn_v.weight
BF16
BF16
[2880, 512]
blk.26.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.26.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.26.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.26.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.26.ffn_gate_inp.bias
F32
F32
[128]
blk.26.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.26.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.26.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.26.post_attention_norm.weight
F32
F32
[2880]
blk.27
blk.27.attn_k.bias
F32
F32
[512]
blk.27.attn_k.weight
BF16
BF16
[2880, 512]
blk.27.attn_norm.weight
F32
F32
[2880]
blk.27.attn_output.bias
F32
F32
[2880]
blk.27.attn_output.weight
BF16
BF16
[4096, 2880]
blk.27.attn_q.bias
F32
F32
[4096]
blk.27.attn_q.weight
BF16
BF16
[2880, 4096]
blk.27.attn_sinks.weight
F32
F32
[64]
blk.27.attn_v.bias
F32
F32
[512]
blk.27.attn_v.weight
BF16
BF16
[2880, 512]
blk.27.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.27.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.27.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.27.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.27.ffn_gate_inp.bias
F32
F32
[128]
blk.27.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.27.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.27.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.27.post_attention_norm.weight
F32
F32
[2880]
blk.28
blk.28.attn_k.bias
F32
F32
[512]
blk.28.attn_k.weight
BF16
BF16
[2880, 512]
blk.28.attn_norm.weight
F32
F32
[2880]
blk.28.attn_output.bias
F32
F32
[2880]
blk.28.attn_output.weight
BF16
BF16
[4096, 2880]
blk.28.attn_q.bias
F32
F32
[4096]
blk.28.attn_q.weight
BF16
BF16
[2880, 4096]
blk.28.attn_sinks.weight
F32
F32
[64]
blk.28.attn_v.bias
F32
F32
[512]
blk.28.attn_v.weight
BF16
BF16
[2880, 512]
blk.28.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.28.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.28.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.28.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.28.ffn_gate_inp.bias
F32
F32
[128]
blk.28.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.28.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.28.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.28.post_attention_norm.weight
F32
F32
[2880]
blk.29
blk.29.attn_k.bias
F32
F32
[512]
blk.29.attn_k.weight
BF16
BF16
[2880, 512]
blk.29.attn_norm.weight
F32
F32
[2880]
blk.29.attn_output.bias
F32
F32
[2880]
blk.29.attn_output.weight
BF16
BF16
[4096, 2880]
blk.29.attn_q.bias
F32
F32
[4096]
blk.29.attn_q.weight
BF16
BF16
[2880, 4096]
blk.29.attn_sinks.weight
F32
F32
[64]
blk.29.attn_v.bias
F32
F32
[512]
blk.29.attn_v.weight
BF16
BF16
[2880, 512]
blk.29.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.29.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.29.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.29.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.29.ffn_gate_inp.bias
F32
F32
[128]
blk.29.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.29.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.29.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.29.post_attention_norm.weight
F32
F32
[2880]
blk.30
blk.30.attn_k.bias
F32
F32
[512]
blk.30.attn_k.weight
BF16
BF16
[2880, 512]
blk.30.attn_norm.weight
F32
F32
[2880]
blk.30.attn_output.bias
F32
F32
[2880]
blk.30.attn_output.weight
BF16
BF16
[4096, 2880]
blk.30.attn_q.bias
F32
F32
[4096]
blk.30.attn_q.weight
BF16
BF16
[2880, 4096]
blk.30.attn_sinks.weight
F32
F32
[64]
blk.30.attn_v.bias
F32
F32
[512]
blk.30.attn_v.weight
BF16
BF16
[2880, 512]
blk.30.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.30.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.30.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.30.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.30.ffn_gate_inp.bias
F32
F32
[128]
blk.30.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.30.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.30.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.30.post_attention_norm.weight
F32
F32
[2880]
blk.31
blk.31.attn_k.bias
F32
F32
[512]
blk.31.attn_k.weight
BF16
BF16
[2880, 512]
blk.31.attn_norm.weight
F32
F32
[2880]
blk.31.attn_output.bias
F32
F32
[2880]
blk.31.attn_output.weight
BF16
BF16
[4096, 2880]
blk.31.attn_q.bias
F32
F32
[4096]
blk.31.attn_q.weight
BF16
BF16
[2880, 4096]
blk.31.attn_sinks.weight
F32
F32
[64]
blk.31.attn_v.bias
F32
F32
[512]
blk.31.attn_v.weight
BF16
BF16
[2880, 512]
blk.31.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.31.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.31.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.31.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.31.ffn_gate_inp.bias
F32
F32
[128]
blk.31.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.31.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.31.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.31.post_attention_norm.weight
F32
F32
[2880]
blk.32
blk.32.attn_k.bias
F32
F32
[512]
blk.32.attn_k.weight
BF16
BF16
[2880, 512]
blk.32.attn_norm.weight
F32
F32
[2880]
blk.32.attn_output.bias
F32
F32
[2880]
blk.32.attn_output.weight
BF16
BF16
[4096, 2880]
blk.32.attn_q.bias
F32
F32
[4096]
blk.32.attn_q.weight
BF16
BF16
[2880, 4096]
blk.32.attn_sinks.weight
F32
F32
[64]
blk.32.attn_v.bias
F32
F32
[512]
blk.32.attn_v.weight
BF16
BF16
[2880, 512]
blk.32.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.32.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.32.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.32.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.32.ffn_gate_inp.bias
F32
F32
[128]
blk.32.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.32.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.32.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.32.post_attention_norm.weight
F32
F32
[2880]
blk.33
blk.33.attn_k.bias
F32
F32
[512]
blk.33.attn_k.weight
BF16
BF16
[2880, 512]
blk.33.attn_norm.weight
F32
F32
[2880]
blk.33.attn_output.bias
F32
F32
[2880]
blk.33.attn_output.weight
BF16
BF16
[4096, 2880]
blk.33.attn_q.bias
F32
F32
[4096]
blk.33.attn_q.weight
BF16
BF16
[2880, 4096]
blk.33.attn_sinks.weight
F32
F32
[64]
blk.33.attn_v.bias
F32
F32
[512]
blk.33.attn_v.weight
BF16
BF16
[2880, 512]
blk.33.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.33.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.33.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.33.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.33.ffn_gate_inp.bias
F32
F32
[128]
blk.33.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.33.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.33.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.33.post_attention_norm.weight
F32
F32
[2880]
blk.34
blk.34.attn_k.bias
F32
F32
[512]
blk.34.attn_k.weight
BF16
BF16
[2880, 512]
blk.34.attn_norm.weight
F32
F32
[2880]
blk.34.attn_output.bias
F32
F32
[2880]
blk.34.attn_output.weight
BF16
BF16
[4096, 2880]
blk.34.attn_q.bias
F32
F32
[4096]
blk.34.attn_q.weight
BF16
BF16
[2880, 4096]
blk.34.attn_sinks.weight
F32
F32
[64]
blk.34.attn_v.bias
F32
F32
[512]
blk.34.attn_v.weight
BF16
BF16
[2880, 512]
blk.34.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.34.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.34.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.34.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.34.ffn_gate_inp.bias
F32
F32
[128]
blk.34.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.34.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.34.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.34.post_attention_norm.weight
F32
F32
[2880]
blk.35
blk.35.attn_k.bias
F32
F32
[512]
blk.35.attn_k.weight
BF16
BF16
[2880, 512]
blk.35.attn_norm.weight
F32
F32
[2880]
blk.35.attn_output.bias
F32
F32
[2880]
blk.35.attn_output.weight
BF16
BF16
[4096, 2880]
blk.35.attn_q.bias
F32
F32
[4096]
blk.35.attn_q.weight
BF16
BF16
[2880, 4096]
blk.35.attn_sinks.weight
F32
F32
[64]
blk.35.attn_v.bias
F32
F32
[512]
blk.35.attn_v.weight
BF16
BF16
[2880, 512]
blk.35.ffn_down_exps.bias
F32
F32
[2880, 128]
blk.35.ffn_down_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.35.ffn_gate_exps.bias
F32
F32
[2880, 128]
blk.35.ffn_gate_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.35.ffn_gate_inp.bias
F32
F32
[128]
blk.35.ffn_gate_inp.weight
F32
F32
[2880, 128]
blk.35.ffn_up_exps.bias
F32
F32
[2880, 128]
blk.35.ffn_up_exps.weight
MXFP4
MXFP4
[2880, 2880, 128]
blk.35.post_attention_norm.weight
F32
F32
[2880]
output.weight
BF16
BF16
[2880, 201088]
output_norm.weight
F32
F32
[2880]