Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
nemotron-cascade-2
:30b
55
Downloads
Updated
35 minutes ago
An open 30B MoE model from NVIDIA with 3B activated parameters that delivers strong reasoning and agentic capabilities.
An open 30B MoE model from NVIDIA with 3B activated parameters that delivers strong reasoning and agentic capabilities.
Cancel
tools
thinking
30b
nemotron-cascade-2:30b
...
/
model
9e0c827cfd6a · 24GB
Metadata
general.architecture
nemotron_h_moe
nemotron_h_moe
general.file_type
Q4_K_M
Q4_K_M
nemotron_h_moe.attention.head_count
32
32
nemotron_h_moe.attention.head_count_kv
[0, 0, 0, 0, 0, ...]
[0, 0, 0, 0, 0, ...]
nemotron_h_moe.attention.key_length
128
128
nemotron_h_moe.attention.layer_norm_epsilon
1e-05
1e-05
nemotron_h_moe.attention.layer_norm_rms_epsilon
1e-05
1e-05
nemotron_h_moe.attention.value_length
128
128
nemotron_h_moe.block_count
52
52
nemotron_h_moe.context_length
262144
262144
nemotron_h_moe.embedding_length
2688
2688
nemotron_h_moe.expert_count
128
128
nemotron_h_moe.expert_feed_forward_length
1856
1856
nemotron_h_moe.expert_group_count
1
1
nemotron_h_moe.expert_group_used_count
1
1
nemotron_h_moe.expert_shared_count
1
1
nemotron_h_moe.expert_shared_feed_forward_length
3712
3712
nemotron_h_moe.expert_used_count
6
6
nemotron_h_moe.expert_weights_norm
true
true
nemotron_h_moe.expert_weights_scale
2.5
2.5
nemotron_h_moe.feed_forward_length
[0, 1856, 0, 1856, 0, ...]
[0, 1856, 0, 1856, 0, ...]
nemotron_h_moe.rope.dimension_count
128
128
nemotron_h_moe.rope.freq_base
10000
10000
nemotron_h_moe.ssm.conv_kernel
4
4
nemotron_h_moe.ssm.group_count
8
8
nemotron_h_moe.ssm.inner_size
4096
4096
nemotron_h_moe.ssm.state_size
128
128
nemotron_h_moe.ssm.time_step_rank
64
64
tokenizer.ggml.add_bos_token
false
false
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.add_padding_token
false
false
tokenizer.ggml.add_unknown_token
false
false
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
11
11
tokenizer.ggml.eos_token_ids
[2, 11]
[2, 11]
tokenizer.ggml.merges
[Ġ Ġ, Ġ t, e r, i n, Ġ ĠĠĠ, ...]
[Ġ Ġ, Ġ t, e r, i n, Ġ ĠĠĠ, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
11
11
tokenizer.ggml.pre
default
default
tokenizer.ggml.scores
[0, 1, 2, 3, 4, ...]
[0, 1, 2, 3, 4, ...]
tokenizer.ggml.token_type
[3, 3, 3, 3, 3, ...]
[3, 3, 3, 3, 3, ...]
tokenizer.ggml.tokens
[<unk>, <s>, </s>, [INST], [/INST], ...]
[<unk>, <s>, </s>, [INST], [/INST], ...]
tokenizer.ggml.unknown_token_id
0
0
Tensor
Name
Type
Shape
token_embd.weight
Q5_0
Q5_0
[2688, 131072]
blk.0
blk.0.attn_norm.weight
F32
F32
[2688]
blk.0.ssm_a
F32
F32
[1, 64]
blk.0.ssm_conv1d.bias
F32
F32
[6144]
blk.0.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.0.ssm_d
F32
F32
[1, 64]
blk.0.ssm_dt.bias
F32
F32
[64]
blk.0.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.0.ssm_norm.weight
F32
F32
[512, 8]
blk.0.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.1
blk.1.attn_norm.weight
F32
F32
[2688]
blk.1.exp_probs_b.bias
F32
F32
[128]
blk.1.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.1.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.1.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.1.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.1.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.2
blk.2.attn_norm.weight
F32
F32
[2688]
blk.2.ssm_a
F32
F32
[1, 64]
blk.2.ssm_conv1d.bias
F32
F32
[6144]
blk.2.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.2.ssm_d
F32
F32
[1, 64]
blk.2.ssm_dt.bias
F32
F32
[64]
blk.2.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.2.ssm_norm.weight
F32
F32
[512, 8]
blk.2.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.3
blk.3.attn_norm.weight
F32
F32
[2688]
blk.3.exp_probs_b.bias
F32
F32
[128]
blk.3.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.3.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.3.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.3.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.3.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.4
blk.4.attn_norm.weight
F32
F32
[2688]
blk.4.ssm_a
F32
F32
[1, 64]
blk.4.ssm_conv1d.bias
F32
F32
[6144]
blk.4.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.4.ssm_d
F32
F32
[1, 64]
blk.4.ssm_dt.bias
F32
F32
[64]
blk.4.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.4.ssm_norm.weight
F32
F32
[512, 8]
blk.4.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.5
blk.5.attn_k.weight
Q5_0
Q5_0
[2688, 256]
blk.5.attn_norm.weight
F32
F32
[2688]
blk.5.attn_output.weight
Q4_K
Q4_K
[4096, 2688]
blk.5.attn_q.weight
Q5_0
Q5_0
[2688, 4096]
blk.5.attn_v.weight
Q5_0
Q5_0
[2688, 256]
blk.6
blk.6.attn_norm.weight
F32
F32
[2688]
blk.6.exp_probs_b.bias
F32
F32
[128]
blk.6.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.6.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.6.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.6.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.6.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.7
blk.7.attn_norm.weight
F32
F32
[2688]
blk.7.ssm_a
F32
F32
[1, 64]
blk.7.ssm_conv1d.bias
F32
F32
[6144]
blk.7.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.7.ssm_d
F32
F32
[1, 64]
blk.7.ssm_dt.bias
F32
F32
[64]
blk.7.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.7.ssm_norm.weight
F32
F32
[512, 8]
blk.7.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.8
blk.8.attn_norm.weight
F32
F32
[2688]
blk.8.exp_probs_b.bias
F32
F32
[128]
blk.8.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.8.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.8.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.8.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.8.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.9
blk.9.attn_norm.weight
F32
F32
[2688]
blk.9.ssm_a
F32
F32
[1, 64]
blk.9.ssm_conv1d.bias
F32
F32
[6144]
blk.9.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.9.ssm_d
F32
F32
[1, 64]
blk.9.ssm_dt.bias
F32
F32
[64]
blk.9.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.9.ssm_norm.weight
F32
F32
[512, 8]
blk.9.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.10
blk.10.attn_norm.weight
F32
F32
[2688]
blk.10.exp_probs_b.bias
F32
F32
[128]
blk.10.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.10.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.10.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.10.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.10.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.11
blk.11.attn_norm.weight
F32
F32
[2688]
blk.11.ssm_a
F32
F32
[1, 64]
blk.11.ssm_conv1d.bias
F32
F32
[6144]
blk.11.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.11.ssm_d
F32
F32
[1, 64]
blk.11.ssm_dt.bias
F32
F32
[64]
blk.11.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.11.ssm_norm.weight
F32
F32
[512, 8]
blk.11.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.12
blk.12.attn_k.weight
Q5_0
Q5_0
[2688, 256]
blk.12.attn_norm.weight
F32
F32
[2688]
blk.12.attn_output.weight
Q4_K
Q4_K
[4096, 2688]
blk.12.attn_q.weight
Q5_0
Q5_0
[2688, 4096]
blk.12.attn_v.weight
Q5_0
Q5_0
[2688, 256]
blk.13
blk.13.attn_norm.weight
F32
F32
[2688]
blk.13.exp_probs_b.bias
F32
F32
[128]
blk.13.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.13.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.13.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.13.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.13.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.14
blk.14.attn_norm.weight
F32
F32
[2688]
blk.14.ssm_a
F32
F32
[1, 64]
blk.14.ssm_conv1d.bias
F32
F32
[6144]
blk.14.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.14.ssm_d
F32
F32
[1, 64]
blk.14.ssm_dt.bias
F32
F32
[64]
blk.14.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.14.ssm_norm.weight
F32
F32
[512, 8]
blk.14.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.15
blk.15.attn_norm.weight
F32
F32
[2688]
blk.15.exp_probs_b.bias
F32
F32
[128]
blk.15.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.15.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.15.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.15.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.15.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.16
blk.16.attn_norm.weight
F32
F32
[2688]
blk.16.ssm_a
F32
F32
[1, 64]
blk.16.ssm_conv1d.bias
F32
F32
[6144]
blk.16.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.16.ssm_d
F32
F32
[1, 64]
blk.16.ssm_dt.bias
F32
F32
[64]
blk.16.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.16.ssm_norm.weight
F32
F32
[512, 8]
blk.16.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.17
blk.17.attn_norm.weight
F32
F32
[2688]
blk.17.exp_probs_b.bias
F32
F32
[128]
blk.17.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.17.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.17.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.17.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.17.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.18
blk.18.attn_norm.weight
F32
F32
[2688]
blk.18.ssm_a
F32
F32
[1, 64]
blk.18.ssm_conv1d.bias
F32
F32
[6144]
blk.18.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.18.ssm_d
F32
F32
[1, 64]
blk.18.ssm_dt.bias
F32
F32
[64]
blk.18.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.18.ssm_norm.weight
F32
F32
[512, 8]
blk.18.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.19
blk.19.attn_k.weight
Q5_0
Q5_0
[2688, 256]
blk.19.attn_norm.weight
F32
F32
[2688]
blk.19.attn_output.weight
Q4_K
Q4_K
[4096, 2688]
blk.19.attn_q.weight
Q5_0
Q5_0
[2688, 4096]
blk.19.attn_v.weight
Q8_0
Q8_0
[2688, 256]
blk.20
blk.20.attn_norm.weight
F32
F32
[2688]
blk.20.exp_probs_b.bias
F32
F32
[128]
blk.20.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.20.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.20.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.20.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.20.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.21
blk.21.attn_norm.weight
F32
F32
[2688]
blk.21.ssm_a
F32
F32
[1, 64]
blk.21.ssm_conv1d.bias
F32
F32
[6144]
blk.21.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.21.ssm_d
F32
F32
[1, 64]
blk.21.ssm_dt.bias
F32
F32
[64]
blk.21.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.21.ssm_norm.weight
F32
F32
[512, 8]
blk.21.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.22
blk.22.attn_norm.weight
F32
F32
[2688]
blk.22.exp_probs_b.bias
F32
F32
[128]
blk.22.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.22.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.22.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.22.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.22.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.23
blk.23.attn_norm.weight
F32
F32
[2688]
blk.23.ssm_a
F32
F32
[1, 64]
blk.23.ssm_conv1d.bias
F32
F32
[6144]
blk.23.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.23.ssm_d
F32
F32
[1, 64]
blk.23.ssm_dt.bias
F32
F32
[64]
blk.23.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.23.ssm_norm.weight
F32
F32
[512, 8]
blk.23.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.24
blk.24.attn_norm.weight
F32
F32
[2688]
blk.24.exp_probs_b.bias
F32
F32
[128]
blk.24.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.24.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.24.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.24.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.24.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.25
blk.25.attn_norm.weight
F32
F32
[2688]
blk.25.ssm_a
F32
F32
[1, 64]
blk.25.ssm_conv1d.bias
F32
F32
[6144]
blk.25.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.25.ssm_d
F32
F32
[1, 64]
blk.25.ssm_dt.bias
F32
F32
[64]
blk.25.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.25.ssm_norm.weight
F32
F32
[512, 8]
blk.25.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.26
blk.26.attn_k.weight
Q5_0
Q5_0
[2688, 256]
blk.26.attn_norm.weight
F32
F32
[2688]
blk.26.attn_output.weight
Q4_K
Q4_K
[4096, 2688]
blk.26.attn_q.weight
Q5_0
Q5_0
[2688, 4096]
blk.26.attn_v.weight
Q5_0
Q5_0
[2688, 256]
blk.27
blk.27.attn_norm.weight
F32
F32
[2688]
blk.27.exp_probs_b.bias
F32
F32
[128]
blk.27.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.27.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.27.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.27.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.27.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.28
blk.28.attn_norm.weight
F32
F32
[2688]
blk.28.ssm_a
F32
F32
[1, 64]
blk.28.ssm_conv1d.bias
F32
F32
[6144]
blk.28.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.28.ssm_d
F32
F32
[1, 64]
blk.28.ssm_dt.bias
F32
F32
[64]
blk.28.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.28.ssm_norm.weight
F32
F32
[512, 8]
blk.28.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.29
blk.29.attn_norm.weight
F32
F32
[2688]
blk.29.exp_probs_b.bias
F32
F32
[128]
blk.29.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.29.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.29.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.29.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.29.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.30
blk.30.attn_norm.weight
F32
F32
[2688]
blk.30.ssm_a
F32
F32
[1, 64]
blk.30.ssm_conv1d.bias
F32
F32
[6144]
blk.30.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.30.ssm_d
F32
F32
[1, 64]
blk.30.ssm_dt.bias
F32
F32
[64]
blk.30.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.30.ssm_norm.weight
F32
F32
[512, 8]
blk.30.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.31
blk.31.attn_norm.weight
F32
F32
[2688]
blk.31.exp_probs_b.bias
F32
F32
[128]
blk.31.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.31.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.31.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.31.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.31.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.32
blk.32.attn_norm.weight
F32
F32
[2688]
blk.32.ssm_a
F32
F32
[1, 64]
blk.32.ssm_conv1d.bias
F32
F32
[6144]
blk.32.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.32.ssm_d
F32
F32
[1, 64]
blk.32.ssm_dt.bias
F32
F32
[64]
blk.32.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.32.ssm_norm.weight
F32
F32
[512, 8]
blk.32.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.33
blk.33.attn_k.weight
Q5_0
Q5_0
[2688, 256]
blk.33.attn_norm.weight
F32
F32
[2688]
blk.33.attn_output.weight
Q4_K
Q4_K
[4096, 2688]
blk.33.attn_q.weight
Q5_0
Q5_0
[2688, 4096]
blk.33.attn_v.weight
Q5_0
Q5_0
[2688, 256]
blk.34
blk.34.attn_norm.weight
F32
F32
[2688]
blk.34.exp_probs_b.bias
F32
F32
[128]
blk.34.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.34.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.34.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.34.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.34.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.35
blk.35.attn_norm.weight
F32
F32
[2688]
blk.35.ssm_a
F32
F32
[1, 64]
blk.35.ssm_conv1d.bias
F32
F32
[6144]
blk.35.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.35.ssm_d
F32
F32
[1, 64]
blk.35.ssm_dt.bias
F32
F32
[64]
blk.35.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.35.ssm_norm.weight
F32
F32
[512, 8]
blk.35.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.36
blk.36.attn_norm.weight
F32
F32
[2688]
blk.36.exp_probs_b.bias
F32
F32
[128]
blk.36.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.36.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.36.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.36.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.36.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.37
blk.37.attn_norm.weight
F32
F32
[2688]
blk.37.ssm_a
F32
F32
[1, 64]
blk.37.ssm_conv1d.bias
F32
F32
[6144]
blk.37.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.37.ssm_d
F32
F32
[1, 64]
blk.37.ssm_dt.bias
F32
F32
[64]
blk.37.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.37.ssm_norm.weight
F32
F32
[512, 8]
blk.37.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.38
blk.38.attn_norm.weight
F32
F32
[2688]
blk.38.exp_probs_b.bias
F32
F32
[128]
blk.38.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.38.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.38.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.38.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.38.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.39
blk.39.attn_norm.weight
F32
F32
[2688]
blk.39.ssm_a
F32
F32
[1, 64]
blk.39.ssm_conv1d.bias
F32
F32
[6144]
blk.39.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.39.ssm_d
F32
F32
[1, 64]
blk.39.ssm_dt.bias
F32
F32
[64]
blk.39.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.39.ssm_norm.weight
F32
F32
[512, 8]
blk.39.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.40
blk.40.attn_norm.weight
F32
F32
[2688]
blk.40.exp_probs_b.bias
F32
F32
[128]
blk.40.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.40.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.40.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.40.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.40.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.41
blk.41.attn_norm.weight
F32
F32
[2688]
blk.41.ssm_a
F32
F32
[1, 64]
blk.41.ssm_conv1d.bias
F32
F32
[6144]
blk.41.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.41.ssm_d
F32
F32
[1, 64]
blk.41.ssm_dt.bias
F32
F32
[64]
blk.41.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.41.ssm_norm.weight
F32
F32
[512, 8]
blk.41.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.42
blk.42.attn_k.weight
Q5_0
Q5_0
[2688, 256]
blk.42.attn_norm.weight
F32
F32
[2688]
blk.42.attn_output.weight
Q4_K
Q4_K
[4096, 2688]
blk.42.attn_q.weight
Q5_0
Q5_0
[2688, 4096]
blk.42.attn_v.weight
Q8_0
Q8_0
[2688, 256]
blk.43
blk.43.attn_norm.weight
F32
F32
[2688]
blk.43.exp_probs_b.bias
F32
F32
[128]
blk.43.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.43.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.43.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.43.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.43.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.44
blk.44.attn_norm.weight
F32
F32
[2688]
blk.44.ssm_a
F32
F32
[1, 64]
blk.44.ssm_conv1d.bias
F32
F32
[6144]
blk.44.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.44.ssm_d
F32
F32
[1, 64]
blk.44.ssm_dt.bias
F32
F32
[64]
blk.44.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.44.ssm_norm.weight
F32
F32
[512, 8]
blk.44.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.45
blk.45.attn_norm.weight
F32
F32
[2688]
blk.45.exp_probs_b.bias
F32
F32
[128]
blk.45.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.45.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.45.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.45.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.45.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.46
blk.46.attn_norm.weight
F32
F32
[2688]
blk.46.ssm_a
F32
F32
[1, 64]
blk.46.ssm_conv1d.bias
F32
F32
[6144]
blk.46.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.46.ssm_d
F32
F32
[1, 64]
blk.46.ssm_dt.bias
F32
F32
[64]
blk.46.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.46.ssm_norm.weight
F32
F32
[512, 8]
blk.46.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.47
blk.47.attn_norm.weight
F32
F32
[2688]
blk.47.exp_probs_b.bias
F32
F32
[128]
blk.47.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.47.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.47.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.47.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.47.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.48
blk.48.attn_norm.weight
F32
F32
[2688]
blk.48.ssm_a
F32
F32
[1, 64]
blk.48.ssm_conv1d.bias
F32
F32
[6144]
blk.48.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.48.ssm_d
F32
F32
[1, 64]
blk.48.ssm_dt.bias
F32
F32
[64]
blk.48.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.48.ssm_norm.weight
F32
F32
[512, 8]
blk.48.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.49
blk.49.attn_norm.weight
F32
F32
[2688]
blk.49.exp_probs_b.bias
F32
F32
[128]
blk.49.ffn_down_exps.weight
Q5_0
Q5_0
[1856, 2688, 128]
blk.49.ffn_down_shexp.weight
Q5_0
Q5_0
[3712, 2688]
blk.49.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.49.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.49.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
blk.50
blk.50.attn_norm.weight
F32
F32
[2688]
blk.50.ssm_a
F32
F32
[1, 64]
blk.50.ssm_conv1d.bias
F32
F32
[6144]
blk.50.ssm_conv1d.weight
F32
F32
[4, 6144]
blk.50.ssm_d
F32
F32
[1, 64]
blk.50.ssm_dt.bias
F32
F32
[64]
blk.50.ssm_in.weight
Q5_0
Q5_0
[2688, 10304]
blk.50.ssm_norm.weight
F32
F32
[512, 8]
blk.50.ssm_out.weight
Q4_K
Q4_K
[4096, 2688]
blk.51
blk.51.attn_norm.weight
F32
F32
[2688]
blk.51.exp_probs_b.bias
F32
F32
[128]
blk.51.ffn_down_exps.weight
Q8_0
Q8_0
[1856, 2688, 128]
blk.51.ffn_down_shexp.weight
Q8_0
Q8_0
[3712, 2688]
blk.51.ffn_gate_inp.weight
F32
F32
[2688, 128]
blk.51.ffn_up_exps.weight
Q5_0
Q5_0
[2688, 1856, 128]
blk.51.ffn_up_shexp.weight
Q5_0
Q5_0
[2688, 3712]
output.weight
Q8_0
Q8_0
[2688, 131072]
output_norm.weight
F32
F32
[2688]