Models
GitHub
Discord
Docs
Cloud
Sign in
Download
Models
Download
GitHub
Discord
Docs
Cloud
Sign in
mollysama
/
rwkv-6-world
:3b
1,967
Downloads
Updated
8 months ago
RWKV's 6th generation models. RWKV (pronounced RwaKuv) is an RNN with great LLM performance.
RWKV's 6th generation models. RWKV (pronounced RwaKuv) is an RNN with great LLM performance.
Cancel
1.6b
3b
7b
14b
rwkv-6-world:3b
...
/
model
fcdc23735214 · 1.9GB
Metadata
general.architecture
rwkv6
rwkv6
general.file_type
Q4_K_M
Q4_K_M
rwkv6.attention.head_count
0
0
rwkv6.attention.layer_norm_epsilon
1e-05
1e-05
rwkv6.block_count
32
32
rwkv6.context_length
1048576
1048576
rwkv6.embedding_length
2560
2560
rwkv6.feed_forward_length
8960
8960
rwkv6.rescale_every_n_layers
6
6
rwkv6.time_decay_extra_dim
64
64
rwkv6.time_mix_extra_dim
32
32
rwkv6.wkv.head_size
64
64
tokenizer.ggml.bos_token_id
0
0
tokenizer.ggml.eos_token_id
0
0
tokenizer.ggml.eot_token_id
261
261
tokenizer.ggml.model
rwkv
rwkv
tokenizer.ggml.token_type
[3, 1, 1, 1, 1, ...]
[3, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[<s>, \x00, \x01, \x02, \x03, ...]
[<s>, \x00, \x01, \x02, \x03, ...]
Tensor
Name
Type
Shape
token_embd.weight
Q4_K
Q4_K
[2560, 65536]
blk.0
blk.0.attn_norm.bias
F32
F32
[2560]
blk.0.attn_norm.weight
F32
F32
[2560]
blk.0.attn_norm_2.bias
F32
F32
[2560]
blk.0.attn_norm_2.weight
F32
F32
[2560]
blk.0.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.0.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.0.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.0.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.0.time_mix_decay.weight
F32
F32
[2560]
blk.0.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.0.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.0.time_mix_first.weight
F32
F32
[64, 40]
blk.0.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.time_mix_lerp_g.weight
F32
F32
[2560]
blk.0.time_mix_lerp_k.weight
F32
F32
[2560]
blk.0.time_mix_lerp_r.weight
F32
F32
[2560]
blk.0.time_mix_lerp_v.weight
F32
F32
[2560]
blk.0.time_mix_lerp_w.weight
F32
F32
[2560]
blk.0.time_mix_lerp_x.weight
F32
F32
[2560]
blk.0.time_mix_ln.bias
F32
F32
[2560]
blk.0.time_mix_ln.weight
F32
F32
[2560]
blk.0.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.time_mix_w1.weight
F32
F32
[2560, 160]
blk.0.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.1
blk.1.attn_norm.bias
F32
F32
[2560]
blk.1.attn_norm.weight
F32
F32
[2560]
blk.1.attn_norm_2.bias
F32
F32
[2560]
blk.1.attn_norm_2.weight
F32
F32
[2560]
blk.1.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.1.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.1.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.1.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.1.time_mix_decay.weight
F32
F32
[2560]
blk.1.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.1.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.1.time_mix_first.weight
F32
F32
[64, 40]
blk.1.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.time_mix_lerp_g.weight
F32
F32
[2560]
blk.1.time_mix_lerp_k.weight
F32
F32
[2560]
blk.1.time_mix_lerp_r.weight
F32
F32
[2560]
blk.1.time_mix_lerp_v.weight
F32
F32
[2560]
blk.1.time_mix_lerp_w.weight
F32
F32
[2560]
blk.1.time_mix_lerp_x.weight
F32
F32
[2560]
blk.1.time_mix_ln.bias
F32
F32
[2560]
blk.1.time_mix_ln.weight
F32
F32
[2560]
blk.1.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.time_mix_w1.weight
F32
F32
[2560, 160]
blk.1.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.2
blk.2.attn_norm.bias
F32
F32
[2560]
blk.2.attn_norm.weight
F32
F32
[2560]
blk.2.attn_norm_2.bias
F32
F32
[2560]
blk.2.attn_norm_2.weight
F32
F32
[2560]
blk.2.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.2.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.2.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.2.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.2.time_mix_decay.weight
F32
F32
[2560]
blk.2.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.2.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.2.time_mix_first.weight
F32
F32
[64, 40]
blk.2.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.time_mix_lerp_g.weight
F32
F32
[2560]
blk.2.time_mix_lerp_k.weight
F32
F32
[2560]
blk.2.time_mix_lerp_r.weight
F32
F32
[2560]
blk.2.time_mix_lerp_v.weight
F32
F32
[2560]
blk.2.time_mix_lerp_w.weight
F32
F32
[2560]
blk.2.time_mix_lerp_x.weight
F32
F32
[2560]
blk.2.time_mix_ln.bias
F32
F32
[2560]
blk.2.time_mix_ln.weight
F32
F32
[2560]
blk.2.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.time_mix_w1.weight
F32
F32
[2560, 160]
blk.2.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.3
blk.3.attn_norm.bias
F32
F32
[2560]
blk.3.attn_norm.weight
F32
F32
[2560]
blk.3.attn_norm_2.bias
F32
F32
[2560]
blk.3.attn_norm_2.weight
F32
F32
[2560]
blk.3.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.3.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.3.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.3.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.3.time_mix_decay.weight
F32
F32
[2560]
blk.3.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.3.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.3.time_mix_first.weight
F32
F32
[64, 40]
blk.3.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.time_mix_lerp_g.weight
F32
F32
[2560]
blk.3.time_mix_lerp_k.weight
F32
F32
[2560]
blk.3.time_mix_lerp_r.weight
F32
F32
[2560]
blk.3.time_mix_lerp_v.weight
F32
F32
[2560]
blk.3.time_mix_lerp_w.weight
F32
F32
[2560]
blk.3.time_mix_lerp_x.weight
F32
F32
[2560]
blk.3.time_mix_ln.bias
F32
F32
[2560]
blk.3.time_mix_ln.weight
F32
F32
[2560]
blk.3.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.time_mix_w1.weight
F32
F32
[2560, 160]
blk.3.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.4
blk.4.attn_norm.bias
F32
F32
[2560]
blk.4.attn_norm.weight
F32
F32
[2560]
blk.4.attn_norm_2.bias
F32
F32
[2560]
blk.4.attn_norm_2.weight
F32
F32
[2560]
blk.4.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.4.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.4.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.4.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.4.time_mix_decay.weight
F32
F32
[2560]
blk.4.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.4.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.4.time_mix_first.weight
F32
F32
[64, 40]
blk.4.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.time_mix_lerp_g.weight
F32
F32
[2560]
blk.4.time_mix_lerp_k.weight
F32
F32
[2560]
blk.4.time_mix_lerp_r.weight
F32
F32
[2560]
blk.4.time_mix_lerp_v.weight
F32
F32
[2560]
blk.4.time_mix_lerp_w.weight
F32
F32
[2560]
blk.4.time_mix_lerp_x.weight
F32
F32
[2560]
blk.4.time_mix_ln.bias
F32
F32
[2560]
blk.4.time_mix_ln.weight
F32
F32
[2560]
blk.4.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.time_mix_w1.weight
F32
F32
[2560, 160]
blk.4.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.5
blk.5.attn_norm.bias
F32
F32
[2560]
blk.5.attn_norm.weight
F32
F32
[2560]
blk.5.attn_norm_2.bias
F32
F32
[2560]
blk.5.attn_norm_2.weight
F32
F32
[2560]
blk.5.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.5.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.5.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.5.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.5.time_mix_decay.weight
F32
F32
[2560]
blk.5.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.5.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.5.time_mix_first.weight
F32
F32
[64, 40]
blk.5.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.time_mix_lerp_g.weight
F32
F32
[2560]
blk.5.time_mix_lerp_k.weight
F32
F32
[2560]
blk.5.time_mix_lerp_r.weight
F32
F32
[2560]
blk.5.time_mix_lerp_v.weight
F32
F32
[2560]
blk.5.time_mix_lerp_w.weight
F32
F32
[2560]
blk.5.time_mix_lerp_x.weight
F32
F32
[2560]
blk.5.time_mix_ln.bias
F32
F32
[2560]
blk.5.time_mix_ln.weight
F32
F32
[2560]
blk.5.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.time_mix_w1.weight
F32
F32
[2560, 160]
blk.5.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.6
blk.6.attn_norm.bias
F32
F32
[2560]
blk.6.attn_norm.weight
F32
F32
[2560]
blk.6.attn_norm_2.bias
F32
F32
[2560]
blk.6.attn_norm_2.weight
F32
F32
[2560]
blk.6.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.6.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.6.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.6.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.6.time_mix_decay.weight
F32
F32
[2560]
blk.6.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.6.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.6.time_mix_first.weight
F32
F32
[64, 40]
blk.6.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.time_mix_lerp_g.weight
F32
F32
[2560]
blk.6.time_mix_lerp_k.weight
F32
F32
[2560]
blk.6.time_mix_lerp_r.weight
F32
F32
[2560]
blk.6.time_mix_lerp_v.weight
F32
F32
[2560]
blk.6.time_mix_lerp_w.weight
F32
F32
[2560]
blk.6.time_mix_lerp_x.weight
F32
F32
[2560]
blk.6.time_mix_ln.bias
F32
F32
[2560]
blk.6.time_mix_ln.weight
F32
F32
[2560]
blk.6.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.time_mix_w1.weight
F32
F32
[2560, 160]
blk.6.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.7
blk.7.attn_norm.bias
F32
F32
[2560]
blk.7.attn_norm.weight
F32
F32
[2560]
blk.7.attn_norm_2.bias
F32
F32
[2560]
blk.7.attn_norm_2.weight
F32
F32
[2560]
blk.7.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.7.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.7.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.7.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.7.time_mix_decay.weight
F32
F32
[2560]
blk.7.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.7.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.7.time_mix_first.weight
F32
F32
[64, 40]
blk.7.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.time_mix_lerp_g.weight
F32
F32
[2560]
blk.7.time_mix_lerp_k.weight
F32
F32
[2560]
blk.7.time_mix_lerp_r.weight
F32
F32
[2560]
blk.7.time_mix_lerp_v.weight
F32
F32
[2560]
blk.7.time_mix_lerp_w.weight
F32
F32
[2560]
blk.7.time_mix_lerp_x.weight
F32
F32
[2560]
blk.7.time_mix_ln.bias
F32
F32
[2560]
blk.7.time_mix_ln.weight
F32
F32
[2560]
blk.7.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.time_mix_w1.weight
F32
F32
[2560, 160]
blk.7.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.8
blk.8.attn_norm.bias
F32
F32
[2560]
blk.8.attn_norm.weight
F32
F32
[2560]
blk.8.attn_norm_2.bias
F32
F32
[2560]
blk.8.attn_norm_2.weight
F32
F32
[2560]
blk.8.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.8.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.8.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.8.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.8.time_mix_decay.weight
F32
F32
[2560]
blk.8.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.8.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.8.time_mix_first.weight
F32
F32
[64, 40]
blk.8.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.time_mix_lerp_g.weight
F32
F32
[2560]
blk.8.time_mix_lerp_k.weight
F32
F32
[2560]
blk.8.time_mix_lerp_r.weight
F32
F32
[2560]
blk.8.time_mix_lerp_v.weight
F32
F32
[2560]
blk.8.time_mix_lerp_w.weight
F32
F32
[2560]
blk.8.time_mix_lerp_x.weight
F32
F32
[2560]
blk.8.time_mix_ln.bias
F32
F32
[2560]
blk.8.time_mix_ln.weight
F32
F32
[2560]
blk.8.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.time_mix_w1.weight
F32
F32
[2560, 160]
blk.8.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.9
blk.9.attn_norm.bias
F32
F32
[2560]
blk.9.attn_norm.weight
F32
F32
[2560]
blk.9.attn_norm_2.bias
F32
F32
[2560]
blk.9.attn_norm_2.weight
F32
F32
[2560]
blk.9.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.9.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.9.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.9.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.9.time_mix_decay.weight
F32
F32
[2560]
blk.9.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.9.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.9.time_mix_first.weight
F32
F32
[64, 40]
blk.9.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.time_mix_lerp_g.weight
F32
F32
[2560]
blk.9.time_mix_lerp_k.weight
F32
F32
[2560]
blk.9.time_mix_lerp_r.weight
F32
F32
[2560]
blk.9.time_mix_lerp_v.weight
F32
F32
[2560]
blk.9.time_mix_lerp_w.weight
F32
F32
[2560]
blk.9.time_mix_lerp_x.weight
F32
F32
[2560]
blk.9.time_mix_ln.bias
F32
F32
[2560]
blk.9.time_mix_ln.weight
F32
F32
[2560]
blk.9.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.time_mix_w1.weight
F32
F32
[2560, 160]
blk.9.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.10
blk.10.attn_norm.bias
F32
F32
[2560]
blk.10.attn_norm.weight
F32
F32
[2560]
blk.10.attn_norm_2.bias
F32
F32
[2560]
blk.10.attn_norm_2.weight
F32
F32
[2560]
blk.10.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.10.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.10.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.10.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.10.time_mix_decay.weight
F32
F32
[2560]
blk.10.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.10.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.10.time_mix_first.weight
F32
F32
[64, 40]
blk.10.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.time_mix_lerp_g.weight
F32
F32
[2560]
blk.10.time_mix_lerp_k.weight
F32
F32
[2560]
blk.10.time_mix_lerp_r.weight
F32
F32
[2560]
blk.10.time_mix_lerp_v.weight
F32
F32
[2560]
blk.10.time_mix_lerp_w.weight
F32
F32
[2560]
blk.10.time_mix_lerp_x.weight
F32
F32
[2560]
blk.10.time_mix_ln.bias
F32
F32
[2560]
blk.10.time_mix_ln.weight
F32
F32
[2560]
blk.10.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.time_mix_w1.weight
F32
F32
[2560, 160]
blk.10.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.11
blk.11.attn_norm.bias
F32
F32
[2560]
blk.11.attn_norm.weight
F32
F32
[2560]
blk.11.attn_norm_2.bias
F32
F32
[2560]
blk.11.attn_norm_2.weight
F32
F32
[2560]
blk.11.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.11.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.11.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.11.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.11.time_mix_decay.weight
F32
F32
[2560]
blk.11.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.11.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.11.time_mix_first.weight
F32
F32
[64, 40]
blk.11.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.time_mix_lerp_g.weight
F32
F32
[2560]
blk.11.time_mix_lerp_k.weight
F32
F32
[2560]
blk.11.time_mix_lerp_r.weight
F32
F32
[2560]
blk.11.time_mix_lerp_v.weight
F32
F32
[2560]
blk.11.time_mix_lerp_w.weight
F32
F32
[2560]
blk.11.time_mix_lerp_x.weight
F32
F32
[2560]
blk.11.time_mix_ln.bias
F32
F32
[2560]
blk.11.time_mix_ln.weight
F32
F32
[2560]
blk.11.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.time_mix_w1.weight
F32
F32
[2560, 160]
blk.11.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.12
blk.12.attn_norm.bias
F32
F32
[2560]
blk.12.attn_norm.weight
F32
F32
[2560]
blk.12.attn_norm_2.bias
F32
F32
[2560]
blk.12.attn_norm_2.weight
F32
F32
[2560]
blk.12.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.12.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.12.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.12.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.12.time_mix_decay.weight
F32
F32
[2560]
blk.12.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.12.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.12.time_mix_first.weight
F32
F32
[64, 40]
blk.12.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.time_mix_lerp_g.weight
F32
F32
[2560]
blk.12.time_mix_lerp_k.weight
F32
F32
[2560]
blk.12.time_mix_lerp_r.weight
F32
F32
[2560]
blk.12.time_mix_lerp_v.weight
F32
F32
[2560]
blk.12.time_mix_lerp_w.weight
F32
F32
[2560]
blk.12.time_mix_lerp_x.weight
F32
F32
[2560]
blk.12.time_mix_ln.bias
F32
F32
[2560]
blk.12.time_mix_ln.weight
F32
F32
[2560]
blk.12.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.time_mix_w1.weight
F32
F32
[2560, 160]
blk.12.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.13
blk.13.attn_norm.bias
F32
F32
[2560]
blk.13.attn_norm.weight
F32
F32
[2560]
blk.13.attn_norm_2.bias
F32
F32
[2560]
blk.13.attn_norm_2.weight
F32
F32
[2560]
blk.13.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.13.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.13.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.13.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.13.time_mix_decay.weight
F32
F32
[2560]
blk.13.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.13.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.13.time_mix_first.weight
F32
F32
[64, 40]
blk.13.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.time_mix_lerp_g.weight
F32
F32
[2560]
blk.13.time_mix_lerp_k.weight
F32
F32
[2560]
blk.13.time_mix_lerp_r.weight
F32
F32
[2560]
blk.13.time_mix_lerp_v.weight
F32
F32
[2560]
blk.13.time_mix_lerp_w.weight
F32
F32
[2560]
blk.13.time_mix_lerp_x.weight
F32
F32
[2560]
blk.13.time_mix_ln.bias
F32
F32
[2560]
blk.13.time_mix_ln.weight
F32
F32
[2560]
blk.13.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.time_mix_w1.weight
F32
F32
[2560, 160]
blk.13.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.14
blk.14.attn_norm.bias
F32
F32
[2560]
blk.14.attn_norm.weight
F32
F32
[2560]
blk.14.attn_norm_2.bias
F32
F32
[2560]
blk.14.attn_norm_2.weight
F32
F32
[2560]
blk.14.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.14.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.14.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.14.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.14.time_mix_decay.weight
F32
F32
[2560]
blk.14.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.14.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.14.time_mix_first.weight
F32
F32
[64, 40]
blk.14.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.time_mix_lerp_g.weight
F32
F32
[2560]
blk.14.time_mix_lerp_k.weight
F32
F32
[2560]
blk.14.time_mix_lerp_r.weight
F32
F32
[2560]
blk.14.time_mix_lerp_v.weight
F32
F32
[2560]
blk.14.time_mix_lerp_w.weight
F32
F32
[2560]
blk.14.time_mix_lerp_x.weight
F32
F32
[2560]
blk.14.time_mix_ln.bias
F32
F32
[2560]
blk.14.time_mix_ln.weight
F32
F32
[2560]
blk.14.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.time_mix_w1.weight
F32
F32
[2560, 160]
blk.14.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.15
blk.15.attn_norm.bias
F32
F32
[2560]
blk.15.attn_norm.weight
F32
F32
[2560]
blk.15.attn_norm_2.bias
F32
F32
[2560]
blk.15.attn_norm_2.weight
F32
F32
[2560]
blk.15.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.15.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.15.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.15.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.15.time_mix_decay.weight
F32
F32
[2560]
blk.15.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.15.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.15.time_mix_first.weight
F32
F32
[64, 40]
blk.15.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.time_mix_lerp_g.weight
F32
F32
[2560]
blk.15.time_mix_lerp_k.weight
F32
F32
[2560]
blk.15.time_mix_lerp_r.weight
F32
F32
[2560]
blk.15.time_mix_lerp_v.weight
F32
F32
[2560]
blk.15.time_mix_lerp_w.weight
F32
F32
[2560]
blk.15.time_mix_lerp_x.weight
F32
F32
[2560]
blk.15.time_mix_ln.bias
F32
F32
[2560]
blk.15.time_mix_ln.weight
F32
F32
[2560]
blk.15.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.time_mix_w1.weight
F32
F32
[2560, 160]
blk.15.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.16
blk.16.attn_norm.bias
F32
F32
[2560]
blk.16.attn_norm.weight
F32
F32
[2560]
blk.16.attn_norm_2.bias
F32
F32
[2560]
blk.16.attn_norm_2.weight
F32
F32
[2560]
blk.16.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.16.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.16.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.16.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.16.time_mix_decay.weight
F32
F32
[2560]
blk.16.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.16.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.16.time_mix_first.weight
F32
F32
[64, 40]
blk.16.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.time_mix_lerp_g.weight
F32
F32
[2560]
blk.16.time_mix_lerp_k.weight
F32
F32
[2560]
blk.16.time_mix_lerp_r.weight
F32
F32
[2560]
blk.16.time_mix_lerp_v.weight
F32
F32
[2560]
blk.16.time_mix_lerp_w.weight
F32
F32
[2560]
blk.16.time_mix_lerp_x.weight
F32
F32
[2560]
blk.16.time_mix_ln.bias
F32
F32
[2560]
blk.16.time_mix_ln.weight
F32
F32
[2560]
blk.16.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.time_mix_w1.weight
F32
F32
[2560, 160]
blk.16.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.17
blk.17.attn_norm.bias
F32
F32
[2560]
blk.17.attn_norm.weight
F32
F32
[2560]
blk.17.attn_norm_2.bias
F32
F32
[2560]
blk.17.attn_norm_2.weight
F32
F32
[2560]
blk.17.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.17.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.17.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.17.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.17.time_mix_decay.weight
F32
F32
[2560]
blk.17.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.17.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.17.time_mix_first.weight
F32
F32
[64, 40]
blk.17.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.time_mix_lerp_g.weight
F32
F32
[2560]
blk.17.time_mix_lerp_k.weight
F32
F32
[2560]
blk.17.time_mix_lerp_r.weight
F32
F32
[2560]
blk.17.time_mix_lerp_v.weight
F32
F32
[2560]
blk.17.time_mix_lerp_w.weight
F32
F32
[2560]
blk.17.time_mix_lerp_x.weight
F32
F32
[2560]
blk.17.time_mix_ln.bias
F32
F32
[2560]
blk.17.time_mix_ln.weight
F32
F32
[2560]
blk.17.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.time_mix_w1.weight
F32
F32
[2560, 160]
blk.17.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.18
blk.18.attn_norm.bias
F32
F32
[2560]
blk.18.attn_norm.weight
F32
F32
[2560]
blk.18.attn_norm_2.bias
F32
F32
[2560]
blk.18.attn_norm_2.weight
F32
F32
[2560]
blk.18.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.18.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.18.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.18.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.18.time_mix_decay.weight
F32
F32
[2560]
blk.18.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.18.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.18.time_mix_first.weight
F32
F32
[64, 40]
blk.18.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.time_mix_lerp_g.weight
F32
F32
[2560]
blk.18.time_mix_lerp_k.weight
F32
F32
[2560]
blk.18.time_mix_lerp_r.weight
F32
F32
[2560]
blk.18.time_mix_lerp_v.weight
F32
F32
[2560]
blk.18.time_mix_lerp_w.weight
F32
F32
[2560]
blk.18.time_mix_lerp_x.weight
F32
F32
[2560]
blk.18.time_mix_ln.bias
F32
F32
[2560]
blk.18.time_mix_ln.weight
F32
F32
[2560]
blk.18.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.time_mix_w1.weight
F32
F32
[2560, 160]
blk.18.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.19
blk.19.attn_norm.bias
F32
F32
[2560]
blk.19.attn_norm.weight
F32
F32
[2560]
blk.19.attn_norm_2.bias
F32
F32
[2560]
blk.19.attn_norm_2.weight
F32
F32
[2560]
blk.19.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.19.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.19.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.19.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.19.time_mix_decay.weight
F32
F32
[2560]
blk.19.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.19.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.19.time_mix_first.weight
F32
F32
[64, 40]
blk.19.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.time_mix_lerp_g.weight
F32
F32
[2560]
blk.19.time_mix_lerp_k.weight
F32
F32
[2560]
blk.19.time_mix_lerp_r.weight
F32
F32
[2560]
blk.19.time_mix_lerp_v.weight
F32
F32
[2560]
blk.19.time_mix_lerp_w.weight
F32
F32
[2560]
blk.19.time_mix_lerp_x.weight
F32
F32
[2560]
blk.19.time_mix_ln.bias
F32
F32
[2560]
blk.19.time_mix_ln.weight
F32
F32
[2560]
blk.19.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.time_mix_w1.weight
F32
F32
[2560, 160]
blk.19.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.20
blk.20.attn_norm.bias
F32
F32
[2560]
blk.20.attn_norm.weight
F32
F32
[2560]
blk.20.attn_norm_2.bias
F32
F32
[2560]
blk.20.attn_norm_2.weight
F32
F32
[2560]
blk.20.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.20.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.20.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.20.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.20.time_mix_decay.weight
F32
F32
[2560]
blk.20.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.20.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.20.time_mix_first.weight
F32
F32
[64, 40]
blk.20.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.time_mix_lerp_g.weight
F32
F32
[2560]
blk.20.time_mix_lerp_k.weight
F32
F32
[2560]
blk.20.time_mix_lerp_r.weight
F32
F32
[2560]
blk.20.time_mix_lerp_v.weight
F32
F32
[2560]
blk.20.time_mix_lerp_w.weight
F32
F32
[2560]
blk.20.time_mix_lerp_x.weight
F32
F32
[2560]
blk.20.time_mix_ln.bias
F32
F32
[2560]
blk.20.time_mix_ln.weight
F32
F32
[2560]
blk.20.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.time_mix_w1.weight
F32
F32
[2560, 160]
blk.20.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.21
blk.21.attn_norm.bias
F32
F32
[2560]
blk.21.attn_norm.weight
F32
F32
[2560]
blk.21.attn_norm_2.bias
F32
F32
[2560]
blk.21.attn_norm_2.weight
F32
F32
[2560]
blk.21.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.21.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.21.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.21.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.21.time_mix_decay.weight
F32
F32
[2560]
blk.21.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.21.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.21.time_mix_first.weight
F32
F32
[64, 40]
blk.21.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.time_mix_lerp_g.weight
F32
F32
[2560]
blk.21.time_mix_lerp_k.weight
F32
F32
[2560]
blk.21.time_mix_lerp_r.weight
F32
F32
[2560]
blk.21.time_mix_lerp_v.weight
F32
F32
[2560]
blk.21.time_mix_lerp_w.weight
F32
F32
[2560]
blk.21.time_mix_lerp_x.weight
F32
F32
[2560]
blk.21.time_mix_ln.bias
F32
F32
[2560]
blk.21.time_mix_ln.weight
F32
F32
[2560]
blk.21.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.time_mix_w1.weight
F32
F32
[2560, 160]
blk.21.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.22
blk.22.attn_norm.bias
F32
F32
[2560]
blk.22.attn_norm.weight
F32
F32
[2560]
blk.22.attn_norm_2.bias
F32
F32
[2560]
blk.22.attn_norm_2.weight
F32
F32
[2560]
blk.22.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.22.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.22.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.22.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.22.time_mix_decay.weight
F32
F32
[2560]
blk.22.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.22.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.22.time_mix_first.weight
F32
F32
[64, 40]
blk.22.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.time_mix_lerp_g.weight
F32
F32
[2560]
blk.22.time_mix_lerp_k.weight
F32
F32
[2560]
blk.22.time_mix_lerp_r.weight
F32
F32
[2560]
blk.22.time_mix_lerp_v.weight
F32
F32
[2560]
blk.22.time_mix_lerp_w.weight
F32
F32
[2560]
blk.22.time_mix_lerp_x.weight
F32
F32
[2560]
blk.22.time_mix_ln.bias
F32
F32
[2560]
blk.22.time_mix_ln.weight
F32
F32
[2560]
blk.22.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.time_mix_w1.weight
F32
F32
[2560, 160]
blk.22.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.23
blk.23.attn_norm.bias
F32
F32
[2560]
blk.23.attn_norm.weight
F32
F32
[2560]
blk.23.attn_norm_2.bias
F32
F32
[2560]
blk.23.attn_norm_2.weight
F32
F32
[2560]
blk.23.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.23.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.23.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.23.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.23.time_mix_decay.weight
F32
F32
[2560]
blk.23.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.23.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.23.time_mix_first.weight
F32
F32
[64, 40]
blk.23.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.time_mix_lerp_g.weight
F32
F32
[2560]
blk.23.time_mix_lerp_k.weight
F32
F32
[2560]
blk.23.time_mix_lerp_r.weight
F32
F32
[2560]
blk.23.time_mix_lerp_v.weight
F32
F32
[2560]
blk.23.time_mix_lerp_w.weight
F32
F32
[2560]
blk.23.time_mix_lerp_x.weight
F32
F32
[2560]
blk.23.time_mix_ln.bias
F32
F32
[2560]
blk.23.time_mix_ln.weight
F32
F32
[2560]
blk.23.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.time_mix_w1.weight
F32
F32
[2560, 160]
blk.23.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.24
blk.24.attn_norm.bias
F32
F32
[2560]
blk.24.attn_norm.weight
F32
F32
[2560]
blk.24.attn_norm_2.bias
F32
F32
[2560]
blk.24.attn_norm_2.weight
F32
F32
[2560]
blk.24.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.24.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.24.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.24.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.24.time_mix_decay.weight
F32
F32
[2560]
blk.24.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.24.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.24.time_mix_first.weight
F32
F32
[64, 40]
blk.24.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.time_mix_lerp_g.weight
F32
F32
[2560]
blk.24.time_mix_lerp_k.weight
F32
F32
[2560]
blk.24.time_mix_lerp_r.weight
F32
F32
[2560]
blk.24.time_mix_lerp_v.weight
F32
F32
[2560]
blk.24.time_mix_lerp_w.weight
F32
F32
[2560]
blk.24.time_mix_lerp_x.weight
F32
F32
[2560]
blk.24.time_mix_ln.bias
F32
F32
[2560]
blk.24.time_mix_ln.weight
F32
F32
[2560]
blk.24.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.time_mix_w1.weight
F32
F32
[2560, 160]
blk.24.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.25
blk.25.attn_norm.bias
F32
F32
[2560]
blk.25.attn_norm.weight
F32
F32
[2560]
blk.25.attn_norm_2.bias
F32
F32
[2560]
blk.25.attn_norm_2.weight
F32
F32
[2560]
blk.25.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.25.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.25.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.25.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.25.time_mix_decay.weight
F32
F32
[2560]
blk.25.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.25.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.25.time_mix_first.weight
F32
F32
[64, 40]
blk.25.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.time_mix_lerp_g.weight
F32
F32
[2560]
blk.25.time_mix_lerp_k.weight
F32
F32
[2560]
blk.25.time_mix_lerp_r.weight
F32
F32
[2560]
blk.25.time_mix_lerp_v.weight
F32
F32
[2560]
blk.25.time_mix_lerp_w.weight
F32
F32
[2560]
blk.25.time_mix_lerp_x.weight
F32
F32
[2560]
blk.25.time_mix_ln.bias
F32
F32
[2560]
blk.25.time_mix_ln.weight
F32
F32
[2560]
blk.25.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.time_mix_w1.weight
F32
F32
[2560, 160]
blk.25.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.26
blk.26.attn_norm.bias
F32
F32
[2560]
blk.26.attn_norm.weight
F32
F32
[2560]
blk.26.attn_norm_2.bias
F32
F32
[2560]
blk.26.attn_norm_2.weight
F32
F32
[2560]
blk.26.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.26.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.26.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.26.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.26.time_mix_decay.weight
F32
F32
[2560]
blk.26.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.26.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.26.time_mix_first.weight
F32
F32
[64, 40]
blk.26.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.time_mix_lerp_g.weight
F32
F32
[2560]
blk.26.time_mix_lerp_k.weight
F32
F32
[2560]
blk.26.time_mix_lerp_r.weight
F32
F32
[2560]
blk.26.time_mix_lerp_v.weight
F32
F32
[2560]
blk.26.time_mix_lerp_w.weight
F32
F32
[2560]
blk.26.time_mix_lerp_x.weight
F32
F32
[2560]
blk.26.time_mix_ln.bias
F32
F32
[2560]
blk.26.time_mix_ln.weight
F32
F32
[2560]
blk.26.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.time_mix_w1.weight
F32
F32
[2560, 160]
blk.26.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.27
blk.27.attn_norm.bias
F32
F32
[2560]
blk.27.attn_norm.weight
F32
F32
[2560]
blk.27.attn_norm_2.bias
F32
F32
[2560]
blk.27.attn_norm_2.weight
F32
F32
[2560]
blk.27.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.27.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.27.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.27.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.27.time_mix_decay.weight
F32
F32
[2560]
blk.27.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.27.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.27.time_mix_first.weight
F32
F32
[64, 40]
blk.27.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.time_mix_lerp_g.weight
F32
F32
[2560]
blk.27.time_mix_lerp_k.weight
F32
F32
[2560]
blk.27.time_mix_lerp_r.weight
F32
F32
[2560]
blk.27.time_mix_lerp_v.weight
F32
F32
[2560]
blk.27.time_mix_lerp_w.weight
F32
F32
[2560]
blk.27.time_mix_lerp_x.weight
F32
F32
[2560]
blk.27.time_mix_ln.bias
F32
F32
[2560]
blk.27.time_mix_ln.weight
F32
F32
[2560]
blk.27.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.time_mix_w1.weight
F32
F32
[2560, 160]
blk.27.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.28
blk.28.attn_norm.bias
F32
F32
[2560]
blk.28.attn_norm.weight
F32
F32
[2560]
blk.28.attn_norm_2.bias
F32
F32
[2560]
blk.28.attn_norm_2.weight
F32
F32
[2560]
blk.28.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.28.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.28.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.28.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.28.time_mix_decay.weight
F32
F32
[2560]
blk.28.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.28.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.28.time_mix_first.weight
F32
F32
[64, 40]
blk.28.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.time_mix_lerp_g.weight
F32
F32
[2560]
blk.28.time_mix_lerp_k.weight
F32
F32
[2560]
blk.28.time_mix_lerp_r.weight
F32
F32
[2560]
blk.28.time_mix_lerp_v.weight
F32
F32
[2560]
blk.28.time_mix_lerp_w.weight
F32
F32
[2560]
blk.28.time_mix_lerp_x.weight
F32
F32
[2560]
blk.28.time_mix_ln.bias
F32
F32
[2560]
blk.28.time_mix_ln.weight
F32
F32
[2560]
blk.28.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.time_mix_w1.weight
F32
F32
[2560, 160]
blk.28.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.29
blk.29.attn_norm.bias
F32
F32
[2560]
blk.29.attn_norm.weight
F32
F32
[2560]
blk.29.attn_norm_2.bias
F32
F32
[2560]
blk.29.attn_norm_2.weight
F32
F32
[2560]
blk.29.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.29.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.29.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.29.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.29.time_mix_decay.weight
F32
F32
[2560]
blk.29.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.29.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.29.time_mix_first.weight
F32
F32
[64, 40]
blk.29.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.time_mix_lerp_g.weight
F32
F32
[2560]
blk.29.time_mix_lerp_k.weight
F32
F32
[2560]
blk.29.time_mix_lerp_r.weight
F32
F32
[2560]
blk.29.time_mix_lerp_v.weight
F32
F32
[2560]
blk.29.time_mix_lerp_w.weight
F32
F32
[2560]
blk.29.time_mix_lerp_x.weight
F32
F32
[2560]
blk.29.time_mix_ln.bias
F32
F32
[2560]
blk.29.time_mix_ln.weight
F32
F32
[2560]
blk.29.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.time_mix_w1.weight
F32
F32
[2560, 160]
blk.29.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.30
blk.30.attn_norm.bias
F32
F32
[2560]
blk.30.attn_norm.weight
F32
F32
[2560]
blk.30.attn_norm_2.bias
F32
F32
[2560]
blk.30.attn_norm_2.weight
F32
F32
[2560]
blk.30.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.30.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.30.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.30.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.30.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.30.time_mix_decay.weight
F32
F32
[2560]
blk.30.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.30.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.30.time_mix_first.weight
F32
F32
[64, 40]
blk.30.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.30.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.30.time_mix_lerp_g.weight
F32
F32
[2560]
blk.30.time_mix_lerp_k.weight
F32
F32
[2560]
blk.30.time_mix_lerp_r.weight
F32
F32
[2560]
blk.30.time_mix_lerp_v.weight
F32
F32
[2560]
blk.30.time_mix_lerp_w.weight
F32
F32
[2560]
blk.30.time_mix_lerp_x.weight
F32
F32
[2560]
blk.30.time_mix_ln.bias
F32
F32
[2560]
blk.30.time_mix_ln.weight
F32
F32
[2560]
blk.30.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.30.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.30.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.30.time_mix_w1.weight
F32
F32
[2560, 160]
blk.30.time_mix_w2.weight
F32
F32
[32, 2560, 5]
blk.31
blk.31.attn_norm.bias
F32
F32
[2560]
blk.31.attn_norm.weight
F32
F32
[2560]
blk.31.attn_norm_2.bias
F32
F32
[2560]
blk.31.attn_norm_2.weight
F32
F32
[2560]
blk.31.channel_mix_key.weight
Q4_K
Q4_K
[2560, 8960]
blk.31.channel_mix_lerp_k.weight
F32
F32
[2560]
blk.31.channel_mix_lerp_r.weight
F32
F32
[2560]
blk.31.channel_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.31.channel_mix_value.weight
Q4_K
Q4_K
[8960, 2560]
blk.31.time_mix_decay.weight
F32
F32
[2560]
blk.31.time_mix_decay_w1.weight
F32
F32
[2560, 64]
blk.31.time_mix_decay_w2.weight
F32
F32
[64, 2560]
blk.31.time_mix_first.weight
F32
F32
[64, 40]
blk.31.time_mix_gate.weight
Q4_K
Q4_K
[2560, 2560]
blk.31.time_mix_key.weight
Q4_K
Q4_K
[2560, 2560]
blk.31.time_mix_lerp_g.weight
F32
F32
[2560]
blk.31.time_mix_lerp_k.weight
F32
F32
[2560]
blk.31.time_mix_lerp_r.weight
F32
F32
[2560]
blk.31.time_mix_lerp_v.weight
F32
F32
[2560]
blk.31.time_mix_lerp_w.weight
F32
F32
[2560]
blk.31.time_mix_lerp_x.weight
F32
F32
[2560]
blk.31.time_mix_ln.bias
F32
F32
[2560]
blk.31.time_mix_ln.weight
F32
F32
[2560]
blk.31.time_mix_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.31.time_mix_receptance.weight
Q4_K
Q4_K
[2560, 2560]
blk.31.time_mix_value.weight
Q4_K
Q4_K
[2560, 2560]
blk.31.time_mix_w1.weight
F32
F32
[2560, 160]
blk.31.time_mix_w2.weight
F32
F32
[32, 2560, 5]
output.weight
Q6_K
Q6_K
[2560, 65536]
output_norm.bias
F32
F32
[2560]
token_embd_norm.bias
F32
F32
[2560]
token_embd_norm.weight
F32
F32
[2560]
output_norm.weight
F32
F32
[2560]