RWKV's 6th generation models. RWKV (pronounced RwaKuv) is an RNN with great LLM performance.
1.6b
3b
7b
14b
1,453 Pulls Updated 2 months ago
1852bb884e87 · 1.0GB
-
general.architecturerwkv6rwkv6
-
general.file_typeQ4_K_MQ4_K_M
-
rwkv6.attention.head_count00
-
rwkv6.attention.layer_norm_epsilon1e-051e-05
-
rwkv6.block_count2424
-
rwkv6.context_length10485761048576
-
rwkv6.embedding_length20482048
-
rwkv6.feed_forward_length71687168
-
rwkv6.rescale_every_n_layers66
-
rwkv6.time_decay_extra_dim6464
-
rwkv6.time_mix_extra_dim3232
-
rwkv6.wkv.head_size6464
-
tokenizer.ggml.bos_token_id00
-
tokenizer.ggml.eos_token_id00
-
tokenizer.ggml.eot_token_id261261
-
tokenizer.ggml.modelrwkvrwkv
-
tokenizer.ggml.token_type[3, 1, 1, 1, 1, ...][3, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[<s>, \x00, \x01, \x02, \x03, ...][<s>, \x00, \x01, \x02, \x03, ...]
-
token_embd.weightQ4_K[2048, 65536]
-
blk.0.attn_norm.biasF32[2048]
-
blk.0.attn_norm.weightF32[2048]
-
blk.0.attn_norm_2.biasF32[2048]
-
blk.0.attn_norm_2.weightF32[2048]
-
blk.0.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.0.channel_mix_lerp_k.weightF32[2048]
-
blk.0.channel_mix_lerp_r.weightF32[2048]
-
blk.0.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.0.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.0.time_mix_decay.weightF32[2048]
-
blk.0.time_mix_decay_w1.weightF32[2048, 64]
-
blk.0.time_mix_decay_w2.weightF32[64, 2048]
-
blk.0.time_mix_first.weightF32[64, 32]
-
blk.0.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.0.time_mix_key.weightQ4_K[2048, 2048]
-
blk.0.time_mix_lerp_g.weightF32[2048]
-
blk.0.time_mix_lerp_k.weightF32[2048]
-
blk.0.time_mix_lerp_r.weightF32[2048]
-
blk.0.time_mix_lerp_v.weightF32[2048]
-
blk.0.time_mix_lerp_w.weightF32[2048]
-
blk.0.time_mix_lerp_x.weightF32[2048]
-
blk.0.time_mix_ln.biasF32[2048]
-
blk.0.time_mix_ln.weightF32[2048]
-
blk.0.time_mix_output.weightQ4_K[2048, 2048]
-
blk.0.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.0.time_mix_value.weightQ4_K[2048, 2048]
-
blk.0.time_mix_w1.weightF32[2048, 160]
-
blk.0.time_mix_w2.weightF32[32, 2048, 5]
-
blk.1.attn_norm.biasF32[2048]
-
blk.1.attn_norm.weightF32[2048]
-
blk.1.attn_norm_2.biasF32[2048]
-
blk.1.attn_norm_2.weightF32[2048]
-
blk.1.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.1.channel_mix_lerp_k.weightF32[2048]
-
blk.1.channel_mix_lerp_r.weightF32[2048]
-
blk.1.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.1.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.1.time_mix_decay.weightF32[2048]
-
blk.1.time_mix_decay_w1.weightF32[2048, 64]
-
blk.1.time_mix_decay_w2.weightF32[64, 2048]
-
blk.1.time_mix_first.weightF32[64, 32]
-
blk.1.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.1.time_mix_key.weightQ4_K[2048, 2048]
-
blk.1.time_mix_lerp_g.weightF32[2048]
-
blk.1.time_mix_lerp_k.weightF32[2048]
-
blk.1.time_mix_lerp_r.weightF32[2048]
-
blk.1.time_mix_lerp_v.weightF32[2048]
-
blk.1.time_mix_lerp_w.weightF32[2048]
-
blk.1.time_mix_lerp_x.weightF32[2048]
-
blk.1.time_mix_ln.biasF32[2048]
-
blk.1.time_mix_ln.weightF32[2048]
-
blk.1.time_mix_output.weightQ4_K[2048, 2048]
-
blk.1.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.1.time_mix_value.weightQ4_K[2048, 2048]
-
blk.1.time_mix_w1.weightF32[2048, 160]
-
blk.1.time_mix_w2.weightF32[32, 2048, 5]
-
blk.2.attn_norm.biasF32[2048]
-
blk.2.attn_norm.weightF32[2048]
-
blk.2.attn_norm_2.biasF32[2048]
-
blk.2.attn_norm_2.weightF32[2048]
-
blk.2.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.2.channel_mix_lerp_k.weightF32[2048]
-
blk.2.channel_mix_lerp_r.weightF32[2048]
-
blk.2.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.2.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.2.time_mix_decay.weightF32[2048]
-
blk.2.time_mix_decay_w1.weightF32[2048, 64]
-
blk.2.time_mix_decay_w2.weightF32[64, 2048]
-
blk.2.time_mix_first.weightF32[64, 32]
-
blk.2.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.2.time_mix_key.weightQ4_K[2048, 2048]
-
blk.2.time_mix_lerp_g.weightF32[2048]
-
blk.2.time_mix_lerp_k.weightF32[2048]
-
blk.2.time_mix_lerp_r.weightF32[2048]
-
blk.2.time_mix_lerp_v.weightF32[2048]
-
blk.2.time_mix_lerp_w.weightF32[2048]
-
blk.2.time_mix_lerp_x.weightF32[2048]
-
blk.2.time_mix_ln.biasF32[2048]
-
blk.2.time_mix_ln.weightF32[2048]
-
blk.2.time_mix_output.weightQ4_K[2048, 2048]
-
blk.2.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.2.time_mix_value.weightQ4_K[2048, 2048]
-
blk.2.time_mix_w1.weightF32[2048, 160]
-
blk.2.time_mix_w2.weightF32[32, 2048, 5]
-
blk.3.attn_norm.biasF32[2048]
-
blk.3.attn_norm.weightF32[2048]
-
blk.3.attn_norm_2.biasF32[2048]
-
blk.3.attn_norm_2.weightF32[2048]
-
blk.3.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.3.channel_mix_lerp_k.weightF32[2048]
-
blk.3.channel_mix_lerp_r.weightF32[2048]
-
blk.3.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.3.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.3.time_mix_decay.weightF32[2048]
-
blk.3.time_mix_decay_w1.weightF32[2048, 64]
-
blk.3.time_mix_decay_w2.weightF32[64, 2048]
-
blk.3.time_mix_first.weightF32[64, 32]
-
blk.3.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.3.time_mix_key.weightQ4_K[2048, 2048]
-
blk.3.time_mix_lerp_g.weightF32[2048]
-
blk.3.time_mix_lerp_k.weightF32[2048]
-
blk.3.time_mix_lerp_r.weightF32[2048]
-
blk.3.time_mix_lerp_v.weightF32[2048]
-
blk.3.time_mix_lerp_w.weightF32[2048]
-
blk.3.time_mix_lerp_x.weightF32[2048]
-
blk.3.time_mix_ln.biasF32[2048]
-
blk.3.time_mix_ln.weightF32[2048]
-
blk.3.time_mix_output.weightQ4_K[2048, 2048]
-
blk.3.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.3.time_mix_value.weightQ4_K[2048, 2048]
-
blk.3.time_mix_w1.weightF32[2048, 160]
-
blk.3.time_mix_w2.weightF32[32, 2048, 5]
-
blk.4.attn_norm.biasF32[2048]
-
blk.4.attn_norm.weightF32[2048]
-
blk.4.attn_norm_2.biasF32[2048]
-
blk.4.attn_norm_2.weightF32[2048]
-
blk.4.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.4.channel_mix_lerp_k.weightF32[2048]
-
blk.4.channel_mix_lerp_r.weightF32[2048]
-
blk.4.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.4.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.4.time_mix_decay.weightF32[2048]
-
blk.4.time_mix_decay_w1.weightF32[2048, 64]
-
blk.4.time_mix_decay_w2.weightF32[64, 2048]
-
blk.4.time_mix_first.weightF32[64, 32]
-
blk.4.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.4.time_mix_key.weightQ4_K[2048, 2048]
-
blk.4.time_mix_lerp_g.weightF32[2048]
-
blk.4.time_mix_lerp_k.weightF32[2048]
-
blk.4.time_mix_lerp_r.weightF32[2048]
-
blk.4.time_mix_lerp_v.weightF32[2048]
-
blk.4.time_mix_lerp_w.weightF32[2048]
-
blk.4.time_mix_lerp_x.weightF32[2048]
-
blk.4.time_mix_ln.biasF32[2048]
-
blk.4.time_mix_ln.weightF32[2048]
-
blk.4.time_mix_output.weightQ4_K[2048, 2048]
-
blk.4.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.4.time_mix_value.weightQ4_K[2048, 2048]
-
blk.4.time_mix_w1.weightF32[2048, 160]
-
blk.4.time_mix_w2.weightF32[32, 2048, 5]
-
blk.5.attn_norm.biasF32[2048]
-
blk.5.attn_norm.weightF32[2048]
-
blk.5.attn_norm_2.biasF32[2048]
-
blk.5.attn_norm_2.weightF32[2048]
-
blk.5.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.5.channel_mix_lerp_k.weightF32[2048]
-
blk.5.channel_mix_lerp_r.weightF32[2048]
-
blk.5.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.5.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.5.time_mix_decay.weightF32[2048]
-
blk.5.time_mix_decay_w1.weightF32[2048, 64]
-
blk.5.time_mix_decay_w2.weightF32[64, 2048]
-
blk.5.time_mix_first.weightF32[64, 32]
-
blk.5.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.5.time_mix_key.weightQ4_K[2048, 2048]
-
blk.5.time_mix_lerp_g.weightF32[2048]
-
blk.5.time_mix_lerp_k.weightF32[2048]
-
blk.5.time_mix_lerp_r.weightF32[2048]
-
blk.5.time_mix_lerp_v.weightF32[2048]
-
blk.5.time_mix_lerp_w.weightF32[2048]
-
blk.5.time_mix_lerp_x.weightF32[2048]
-
blk.5.time_mix_ln.biasF32[2048]
-
blk.5.time_mix_ln.weightF32[2048]
-
blk.5.time_mix_output.weightQ4_K[2048, 2048]
-
blk.5.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.5.time_mix_value.weightQ4_K[2048, 2048]
-
blk.5.time_mix_w1.weightF32[2048, 160]
-
blk.5.time_mix_w2.weightF32[32, 2048, 5]
-
blk.6.attn_norm.biasF32[2048]
-
blk.6.attn_norm.weightF32[2048]
-
blk.6.attn_norm_2.biasF32[2048]
-
blk.6.attn_norm_2.weightF32[2048]
-
blk.6.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.6.channel_mix_lerp_k.weightF32[2048]
-
blk.6.channel_mix_lerp_r.weightF32[2048]
-
blk.6.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.6.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.6.time_mix_decay.weightF32[2048]
-
blk.6.time_mix_decay_w1.weightF32[2048, 64]
-
blk.6.time_mix_decay_w2.weightF32[64, 2048]
-
blk.6.time_mix_first.weightF32[64, 32]
-
blk.6.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.6.time_mix_key.weightQ4_K[2048, 2048]
-
blk.6.time_mix_lerp_g.weightF32[2048]
-
blk.6.time_mix_lerp_k.weightF32[2048]
-
blk.6.time_mix_lerp_r.weightF32[2048]
-
blk.6.time_mix_lerp_v.weightF32[2048]
-
blk.6.time_mix_lerp_w.weightF32[2048]
-
blk.6.time_mix_lerp_x.weightF32[2048]
-
blk.6.time_mix_ln.biasF32[2048]
-
blk.6.time_mix_ln.weightF32[2048]
-
blk.6.time_mix_output.weightQ4_K[2048, 2048]
-
blk.6.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.6.time_mix_value.weightQ4_K[2048, 2048]
-
blk.6.time_mix_w1.weightF32[2048, 160]
-
blk.6.time_mix_w2.weightF32[32, 2048, 5]
-
blk.7.attn_norm.biasF32[2048]
-
blk.7.attn_norm.weightF32[2048]
-
blk.7.attn_norm_2.biasF32[2048]
-
blk.7.attn_norm_2.weightF32[2048]
-
blk.7.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.7.channel_mix_lerp_k.weightF32[2048]
-
blk.7.channel_mix_lerp_r.weightF32[2048]
-
blk.7.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.7.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.7.time_mix_decay.weightF32[2048]
-
blk.7.time_mix_decay_w1.weightF32[2048, 64]
-
blk.7.time_mix_decay_w2.weightF32[64, 2048]
-
blk.7.time_mix_first.weightF32[64, 32]
-
blk.7.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.7.time_mix_key.weightQ4_K[2048, 2048]
-
blk.7.time_mix_lerp_g.weightF32[2048]
-
blk.7.time_mix_lerp_k.weightF32[2048]
-
blk.7.time_mix_lerp_r.weightF32[2048]
-
blk.7.time_mix_lerp_v.weightF32[2048]
-
blk.7.time_mix_lerp_w.weightF32[2048]
-
blk.7.time_mix_lerp_x.weightF32[2048]
-
blk.7.time_mix_ln.biasF32[2048]
-
blk.7.time_mix_ln.weightF32[2048]
-
blk.7.time_mix_output.weightQ4_K[2048, 2048]
-
blk.7.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.7.time_mix_value.weightQ4_K[2048, 2048]
-
blk.7.time_mix_w1.weightF32[2048, 160]
-
blk.7.time_mix_w2.weightF32[32, 2048, 5]
-
blk.8.attn_norm.biasF32[2048]
-
blk.8.attn_norm.weightF32[2048]
-
blk.8.attn_norm_2.biasF32[2048]
-
blk.8.attn_norm_2.weightF32[2048]
-
blk.8.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.8.channel_mix_lerp_k.weightF32[2048]
-
blk.8.channel_mix_lerp_r.weightF32[2048]
-
blk.8.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.8.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.8.time_mix_decay.weightF32[2048]
-
blk.8.time_mix_decay_w1.weightF32[2048, 64]
-
blk.8.time_mix_decay_w2.weightF32[64, 2048]
-
blk.8.time_mix_first.weightF32[64, 32]
-
blk.8.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.8.time_mix_key.weightQ4_K[2048, 2048]
-
blk.8.time_mix_lerp_g.weightF32[2048]
-
blk.8.time_mix_lerp_k.weightF32[2048]
-
blk.8.time_mix_lerp_r.weightF32[2048]
-
blk.8.time_mix_lerp_v.weightF32[2048]
-
blk.8.time_mix_lerp_w.weightF32[2048]
-
blk.8.time_mix_lerp_x.weightF32[2048]
-
blk.8.time_mix_ln.biasF32[2048]
-
blk.8.time_mix_ln.weightF32[2048]
-
blk.8.time_mix_output.weightQ4_K[2048, 2048]
-
blk.8.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.8.time_mix_value.weightQ4_K[2048, 2048]
-
blk.8.time_mix_w1.weightF32[2048, 160]
-
blk.8.time_mix_w2.weightF32[32, 2048, 5]
-
blk.9.attn_norm.biasF32[2048]
-
blk.9.attn_norm.weightF32[2048]
-
blk.9.attn_norm_2.biasF32[2048]
-
blk.9.attn_norm_2.weightF32[2048]
-
blk.9.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.9.channel_mix_lerp_k.weightF32[2048]
-
blk.9.channel_mix_lerp_r.weightF32[2048]
-
blk.9.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.9.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.9.time_mix_decay.weightF32[2048]
-
blk.9.time_mix_decay_w1.weightF32[2048, 64]
-
blk.9.time_mix_decay_w2.weightF32[64, 2048]
-
blk.9.time_mix_first.weightF32[64, 32]
-
blk.9.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.9.time_mix_key.weightQ4_K[2048, 2048]
-
blk.9.time_mix_lerp_g.weightF32[2048]
-
blk.9.time_mix_lerp_k.weightF32[2048]
-
blk.9.time_mix_lerp_r.weightF32[2048]
-
blk.9.time_mix_lerp_v.weightF32[2048]
-
blk.9.time_mix_lerp_w.weightF32[2048]
-
blk.9.time_mix_lerp_x.weightF32[2048]
-
blk.9.time_mix_ln.biasF32[2048]
-
blk.9.time_mix_ln.weightF32[2048]
-
blk.9.time_mix_output.weightQ4_K[2048, 2048]
-
blk.9.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.9.time_mix_value.weightQ4_K[2048, 2048]
-
blk.9.time_mix_w1.weightF32[2048, 160]
-
blk.9.time_mix_w2.weightF32[32, 2048, 5]
-
blk.10.attn_norm.biasF32[2048]
-
blk.10.attn_norm.weightF32[2048]
-
blk.10.attn_norm_2.biasF32[2048]
-
blk.10.attn_norm_2.weightF32[2048]
-
blk.10.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.10.channel_mix_lerp_k.weightF32[2048]
-
blk.10.channel_mix_lerp_r.weightF32[2048]
-
blk.10.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.10.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.10.time_mix_decay.weightF32[2048]
-
blk.10.time_mix_decay_w1.weightF32[2048, 64]
-
blk.10.time_mix_decay_w2.weightF32[64, 2048]
-
blk.10.time_mix_first.weightF32[64, 32]
-
blk.10.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.10.time_mix_key.weightQ4_K[2048, 2048]
-
blk.10.time_mix_lerp_g.weightF32[2048]
-
blk.10.time_mix_lerp_k.weightF32[2048]
-
blk.10.time_mix_lerp_r.weightF32[2048]
-
blk.10.time_mix_lerp_v.weightF32[2048]
-
blk.10.time_mix_lerp_w.weightF32[2048]
-
blk.10.time_mix_lerp_x.weightF32[2048]
-
blk.10.time_mix_ln.biasF32[2048]
-
blk.10.time_mix_ln.weightF32[2048]
-
blk.10.time_mix_output.weightQ4_K[2048, 2048]
-
blk.10.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.10.time_mix_value.weightQ4_K[2048, 2048]
-
blk.10.time_mix_w1.weightF32[2048, 160]
-
blk.10.time_mix_w2.weightF32[32, 2048, 5]
-
blk.11.attn_norm.biasF32[2048]
-
blk.11.attn_norm.weightF32[2048]
-
blk.11.attn_norm_2.biasF32[2048]
-
blk.11.attn_norm_2.weightF32[2048]
-
blk.11.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.11.channel_mix_lerp_k.weightF32[2048]
-
blk.11.channel_mix_lerp_r.weightF32[2048]
-
blk.11.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.11.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.11.time_mix_decay.weightF32[2048]
-
blk.11.time_mix_decay_w1.weightF32[2048, 64]
-
blk.11.time_mix_decay_w2.weightF32[64, 2048]
-
blk.11.time_mix_first.weightF32[64, 32]
-
blk.11.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.11.time_mix_key.weightQ4_K[2048, 2048]
-
blk.11.time_mix_lerp_g.weightF32[2048]
-
blk.11.time_mix_lerp_k.weightF32[2048]
-
blk.11.time_mix_lerp_r.weightF32[2048]
-
blk.11.time_mix_lerp_v.weightF32[2048]
-
blk.11.time_mix_lerp_w.weightF32[2048]
-
blk.11.time_mix_lerp_x.weightF32[2048]
-
blk.11.time_mix_ln.biasF32[2048]
-
blk.11.time_mix_ln.weightF32[2048]
-
blk.11.time_mix_output.weightQ4_K[2048, 2048]
-
blk.11.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.11.time_mix_value.weightQ4_K[2048, 2048]
-
blk.11.time_mix_w1.weightF32[2048, 160]
-
blk.11.time_mix_w2.weightF32[32, 2048, 5]
-
blk.12.attn_norm.biasF32[2048]
-
blk.12.attn_norm.weightF32[2048]
-
blk.12.attn_norm_2.biasF32[2048]
-
blk.12.attn_norm_2.weightF32[2048]
-
blk.12.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.12.channel_mix_lerp_k.weightF32[2048]
-
blk.12.channel_mix_lerp_r.weightF32[2048]
-
blk.12.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.12.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.12.time_mix_decay.weightF32[2048]
-
blk.12.time_mix_decay_w1.weightF32[2048, 64]
-
blk.12.time_mix_decay_w2.weightF32[64, 2048]
-
blk.12.time_mix_first.weightF32[64, 32]
-
blk.12.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.12.time_mix_key.weightQ4_K[2048, 2048]
-
blk.12.time_mix_lerp_g.weightF32[2048]
-
blk.12.time_mix_lerp_k.weightF32[2048]
-
blk.12.time_mix_lerp_r.weightF32[2048]
-
blk.12.time_mix_lerp_v.weightF32[2048]
-
blk.12.time_mix_lerp_w.weightF32[2048]
-
blk.12.time_mix_lerp_x.weightF32[2048]
-
blk.12.time_mix_ln.biasF32[2048]
-
blk.12.time_mix_ln.weightF32[2048]
-
blk.12.time_mix_output.weightQ4_K[2048, 2048]
-
blk.12.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.12.time_mix_value.weightQ4_K[2048, 2048]
-
blk.12.time_mix_w1.weightF32[2048, 160]
-
blk.12.time_mix_w2.weightF32[32, 2048, 5]
-
blk.13.attn_norm.biasF32[2048]
-
blk.13.attn_norm.weightF32[2048]
-
blk.13.attn_norm_2.biasF32[2048]
-
blk.13.attn_norm_2.weightF32[2048]
-
blk.13.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.13.channel_mix_lerp_k.weightF32[2048]
-
blk.13.channel_mix_lerp_r.weightF32[2048]
-
blk.13.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.13.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.13.time_mix_decay.weightF32[2048]
-
blk.13.time_mix_decay_w1.weightF32[2048, 64]
-
blk.13.time_mix_decay_w2.weightF32[64, 2048]
-
blk.13.time_mix_first.weightF32[64, 32]
-
blk.13.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.13.time_mix_key.weightQ4_K[2048, 2048]
-
blk.13.time_mix_lerp_g.weightF32[2048]
-
blk.13.time_mix_lerp_k.weightF32[2048]
-
blk.13.time_mix_lerp_r.weightF32[2048]
-
blk.13.time_mix_lerp_v.weightF32[2048]
-
blk.13.time_mix_lerp_w.weightF32[2048]
-
blk.13.time_mix_lerp_x.weightF32[2048]
-
blk.13.time_mix_ln.biasF32[2048]
-
blk.13.time_mix_ln.weightF32[2048]
-
blk.13.time_mix_output.weightQ4_K[2048, 2048]
-
blk.13.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.13.time_mix_value.weightQ4_K[2048, 2048]
-
blk.13.time_mix_w1.weightF32[2048, 160]
-
blk.13.time_mix_w2.weightF32[32, 2048, 5]
-
blk.14.attn_norm.biasF32[2048]
-
blk.14.attn_norm.weightF32[2048]
-
blk.14.attn_norm_2.biasF32[2048]
-
blk.14.attn_norm_2.weightF32[2048]
-
blk.14.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.14.channel_mix_lerp_k.weightF32[2048]
-
blk.14.channel_mix_lerp_r.weightF32[2048]
-
blk.14.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.14.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.14.time_mix_decay.weightF32[2048]
-
blk.14.time_mix_decay_w1.weightF32[2048, 64]
-
blk.14.time_mix_decay_w2.weightF32[64, 2048]
-
blk.14.time_mix_first.weightF32[64, 32]
-
blk.14.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.14.time_mix_key.weightQ4_K[2048, 2048]
-
blk.14.time_mix_lerp_g.weightF32[2048]
-
blk.14.time_mix_lerp_k.weightF32[2048]
-
blk.14.time_mix_lerp_r.weightF32[2048]
-
blk.14.time_mix_lerp_v.weightF32[2048]
-
blk.14.time_mix_lerp_w.weightF32[2048]
-
blk.14.time_mix_lerp_x.weightF32[2048]
-
blk.14.time_mix_ln.biasF32[2048]
-
blk.14.time_mix_ln.weightF32[2048]
-
blk.14.time_mix_output.weightQ4_K[2048, 2048]
-
blk.14.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.14.time_mix_value.weightQ4_K[2048, 2048]
-
blk.14.time_mix_w1.weightF32[2048, 160]
-
blk.14.time_mix_w2.weightF32[32, 2048, 5]
-
blk.15.attn_norm.biasF32[2048]
-
blk.15.attn_norm.weightF32[2048]
-
blk.15.attn_norm_2.biasF32[2048]
-
blk.15.attn_norm_2.weightF32[2048]
-
blk.15.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.15.channel_mix_lerp_k.weightF32[2048]
-
blk.15.channel_mix_lerp_r.weightF32[2048]
-
blk.15.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.15.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.15.time_mix_decay.weightF32[2048]
-
blk.15.time_mix_decay_w1.weightF32[2048, 64]
-
blk.15.time_mix_decay_w2.weightF32[64, 2048]
-
blk.15.time_mix_first.weightF32[64, 32]
-
blk.15.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.15.time_mix_key.weightQ4_K[2048, 2048]
-
blk.15.time_mix_lerp_g.weightF32[2048]
-
blk.15.time_mix_lerp_k.weightF32[2048]
-
blk.15.time_mix_lerp_r.weightF32[2048]
-
blk.15.time_mix_lerp_v.weightF32[2048]
-
blk.15.time_mix_lerp_w.weightF32[2048]
-
blk.15.time_mix_lerp_x.weightF32[2048]
-
blk.15.time_mix_ln.biasF32[2048]
-
blk.15.time_mix_ln.weightF32[2048]
-
blk.15.time_mix_output.weightQ4_K[2048, 2048]
-
blk.15.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.15.time_mix_value.weightQ4_K[2048, 2048]
-
blk.15.time_mix_w1.weightF32[2048, 160]
-
blk.15.time_mix_w2.weightF32[32, 2048, 5]
-
blk.16.attn_norm.biasF32[2048]
-
blk.16.attn_norm.weightF32[2048]
-
blk.16.attn_norm_2.biasF32[2048]
-
blk.16.attn_norm_2.weightF32[2048]
-
blk.16.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.16.channel_mix_lerp_k.weightF32[2048]
-
blk.16.channel_mix_lerp_r.weightF32[2048]
-
blk.16.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.16.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.16.time_mix_decay.weightF32[2048]
-
blk.16.time_mix_decay_w1.weightF32[2048, 64]
-
blk.16.time_mix_decay_w2.weightF32[64, 2048]
-
blk.16.time_mix_first.weightF32[64, 32]
-
blk.16.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.16.time_mix_key.weightQ4_K[2048, 2048]
-
blk.16.time_mix_lerp_g.weightF32[2048]
-
blk.16.time_mix_lerp_k.weightF32[2048]
-
blk.16.time_mix_lerp_r.weightF32[2048]
-
blk.16.time_mix_lerp_v.weightF32[2048]
-
blk.16.time_mix_lerp_w.weightF32[2048]
-
blk.16.time_mix_lerp_x.weightF32[2048]
-
blk.16.time_mix_ln.biasF32[2048]
-
blk.16.time_mix_ln.weightF32[2048]
-
blk.16.time_mix_output.weightQ4_K[2048, 2048]
-
blk.16.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.16.time_mix_value.weightQ4_K[2048, 2048]
-
blk.16.time_mix_w1.weightF32[2048, 160]
-
blk.16.time_mix_w2.weightF32[32, 2048, 5]
-
blk.17.attn_norm.biasF32[2048]
-
blk.17.attn_norm.weightF32[2048]
-
blk.17.attn_norm_2.biasF32[2048]
-
blk.17.attn_norm_2.weightF32[2048]
-
blk.17.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.17.channel_mix_lerp_k.weightF32[2048]
-
blk.17.channel_mix_lerp_r.weightF32[2048]
-
blk.17.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.17.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.17.time_mix_decay.weightF32[2048]
-
blk.17.time_mix_decay_w1.weightF32[2048, 64]
-
blk.17.time_mix_decay_w2.weightF32[64, 2048]
-
blk.17.time_mix_first.weightF32[64, 32]
-
blk.17.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.17.time_mix_key.weightQ4_K[2048, 2048]
-
blk.17.time_mix_lerp_g.weightF32[2048]
-
blk.17.time_mix_lerp_k.weightF32[2048]
-
blk.17.time_mix_lerp_r.weightF32[2048]
-
blk.17.time_mix_lerp_v.weightF32[2048]
-
blk.17.time_mix_lerp_w.weightF32[2048]
-
blk.17.time_mix_lerp_x.weightF32[2048]
-
blk.17.time_mix_ln.biasF32[2048]
-
blk.17.time_mix_ln.weightF32[2048]
-
blk.17.time_mix_output.weightQ4_K[2048, 2048]
-
blk.17.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.17.time_mix_value.weightQ4_K[2048, 2048]
-
blk.17.time_mix_w1.weightF32[2048, 160]
-
blk.17.time_mix_w2.weightF32[32, 2048, 5]
-
blk.18.attn_norm.biasF32[2048]
-
blk.18.attn_norm.weightF32[2048]
-
blk.18.attn_norm_2.biasF32[2048]
-
blk.18.attn_norm_2.weightF32[2048]
-
blk.18.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.18.channel_mix_lerp_k.weightF32[2048]
-
blk.18.channel_mix_lerp_r.weightF32[2048]
-
blk.18.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.18.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.18.time_mix_decay.weightF32[2048]
-
blk.18.time_mix_decay_w1.weightF32[2048, 64]
-
blk.18.time_mix_decay_w2.weightF32[64, 2048]
-
blk.18.time_mix_first.weightF32[64, 32]
-
blk.18.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.18.time_mix_key.weightQ4_K[2048, 2048]
-
blk.18.time_mix_lerp_g.weightF32[2048]
-
blk.18.time_mix_lerp_k.weightF32[2048]
-
blk.18.time_mix_lerp_r.weightF32[2048]
-
blk.18.time_mix_lerp_v.weightF32[2048]
-
blk.18.time_mix_lerp_w.weightF32[2048]
-
blk.18.time_mix_lerp_x.weightF32[2048]
-
blk.18.time_mix_ln.biasF32[2048]
-
blk.18.time_mix_ln.weightF32[2048]
-
blk.18.time_mix_output.weightQ4_K[2048, 2048]
-
blk.18.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.18.time_mix_value.weightQ4_K[2048, 2048]
-
blk.18.time_mix_w1.weightF32[2048, 160]
-
blk.18.time_mix_w2.weightF32[32, 2048, 5]
-
blk.19.attn_norm.biasF32[2048]
-
blk.19.attn_norm.weightF32[2048]
-
blk.19.attn_norm_2.biasF32[2048]
-
blk.19.attn_norm_2.weightF32[2048]
-
blk.19.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.19.channel_mix_lerp_k.weightF32[2048]
-
blk.19.channel_mix_lerp_r.weightF32[2048]
-
blk.19.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.19.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.19.time_mix_decay.weightF32[2048]
-
blk.19.time_mix_decay_w1.weightF32[2048, 64]
-
blk.19.time_mix_decay_w2.weightF32[64, 2048]
-
blk.19.time_mix_first.weightF32[64, 32]
-
blk.19.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.19.time_mix_key.weightQ4_K[2048, 2048]
-
blk.19.time_mix_lerp_g.weightF32[2048]
-
blk.19.time_mix_lerp_k.weightF32[2048]
-
blk.19.time_mix_lerp_r.weightF32[2048]
-
blk.19.time_mix_lerp_v.weightF32[2048]
-
blk.19.time_mix_lerp_w.weightF32[2048]
-
blk.19.time_mix_lerp_x.weightF32[2048]
-
blk.19.time_mix_ln.biasF32[2048]
-
blk.19.time_mix_ln.weightF32[2048]
-
blk.19.time_mix_output.weightQ4_K[2048, 2048]
-
blk.19.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.19.time_mix_value.weightQ4_K[2048, 2048]
-
blk.19.time_mix_w1.weightF32[2048, 160]
-
blk.19.time_mix_w2.weightF32[32, 2048, 5]
-
blk.20.attn_norm.biasF32[2048]
-
blk.20.attn_norm.weightF32[2048]
-
blk.20.attn_norm_2.biasF32[2048]
-
blk.20.attn_norm_2.weightF32[2048]
-
blk.20.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.20.channel_mix_lerp_k.weightF32[2048]
-
blk.20.channel_mix_lerp_r.weightF32[2048]
-
blk.20.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.20.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.20.time_mix_decay.weightF32[2048]
-
blk.20.time_mix_decay_w1.weightF32[2048, 64]
-
blk.20.time_mix_decay_w2.weightF32[64, 2048]
-
blk.20.time_mix_first.weightF32[64, 32]
-
blk.20.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.20.time_mix_key.weightQ4_K[2048, 2048]
-
blk.20.time_mix_lerp_g.weightF32[2048]
-
blk.20.time_mix_lerp_k.weightF32[2048]
-
blk.20.time_mix_lerp_r.weightF32[2048]
-
blk.20.time_mix_lerp_v.weightF32[2048]
-
blk.20.time_mix_lerp_w.weightF32[2048]
-
blk.20.time_mix_lerp_x.weightF32[2048]
-
blk.20.time_mix_ln.biasF32[2048]
-
blk.20.time_mix_ln.weightF32[2048]
-
blk.20.time_mix_output.weightQ4_K[2048, 2048]
-
blk.20.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.20.time_mix_value.weightQ4_K[2048, 2048]
-
blk.20.time_mix_w1.weightF32[2048, 160]
-
blk.20.time_mix_w2.weightF32[32, 2048, 5]
-
blk.21.attn_norm.biasF32[2048]
-
blk.21.attn_norm.weightF32[2048]
-
blk.21.attn_norm_2.biasF32[2048]
-
blk.21.attn_norm_2.weightF32[2048]
-
blk.21.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.21.channel_mix_lerp_k.weightF32[2048]
-
blk.21.channel_mix_lerp_r.weightF32[2048]
-
blk.21.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.21.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.21.time_mix_decay.weightF32[2048]
-
blk.21.time_mix_decay_w1.weightF32[2048, 64]
-
blk.21.time_mix_decay_w2.weightF32[64, 2048]
-
blk.21.time_mix_first.weightF32[64, 32]
-
blk.21.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.21.time_mix_key.weightQ4_K[2048, 2048]
-
blk.21.time_mix_lerp_g.weightF32[2048]
-
blk.21.time_mix_lerp_k.weightF32[2048]
-
blk.21.time_mix_lerp_r.weightF32[2048]
-
blk.21.time_mix_lerp_v.weightF32[2048]
-
blk.21.time_mix_lerp_w.weightF32[2048]
-
blk.21.time_mix_lerp_x.weightF32[2048]
-
blk.21.time_mix_ln.biasF32[2048]
-
blk.21.time_mix_ln.weightF32[2048]
-
blk.21.time_mix_output.weightQ4_K[2048, 2048]
-
blk.21.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.21.time_mix_value.weightQ4_K[2048, 2048]
-
blk.21.time_mix_w1.weightF32[2048, 160]
-
blk.21.time_mix_w2.weightF32[32, 2048, 5]
-
blk.22.attn_norm.biasF32[2048]
-
blk.22.attn_norm.weightF32[2048]
-
blk.22.attn_norm_2.biasF32[2048]
-
blk.22.attn_norm_2.weightF32[2048]
-
blk.22.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.22.channel_mix_lerp_k.weightF32[2048]
-
blk.22.channel_mix_lerp_r.weightF32[2048]
-
blk.22.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.22.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.22.time_mix_decay.weightF32[2048]
-
blk.22.time_mix_decay_w1.weightF32[2048, 64]
-
blk.22.time_mix_decay_w2.weightF32[64, 2048]
-
blk.22.time_mix_first.weightF32[64, 32]
-
blk.22.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.22.time_mix_key.weightQ4_K[2048, 2048]
-
blk.22.time_mix_lerp_g.weightF32[2048]
-
blk.22.time_mix_lerp_k.weightF32[2048]
-
blk.22.time_mix_lerp_r.weightF32[2048]
-
blk.22.time_mix_lerp_v.weightF32[2048]
-
blk.22.time_mix_lerp_w.weightF32[2048]
-
blk.22.time_mix_lerp_x.weightF32[2048]
-
blk.22.time_mix_ln.biasF32[2048]
-
blk.22.time_mix_ln.weightF32[2048]
-
blk.22.time_mix_output.weightQ4_K[2048, 2048]
-
blk.22.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.22.time_mix_value.weightQ4_K[2048, 2048]
-
blk.22.time_mix_w1.weightF32[2048, 160]
-
blk.22.time_mix_w2.weightF32[32, 2048, 5]
-
blk.23.attn_norm.biasF32[2048]
-
blk.23.attn_norm.weightF32[2048]
-
blk.23.attn_norm_2.biasF32[2048]
-
blk.23.attn_norm_2.weightF32[2048]
-
blk.23.channel_mix_key.weightQ4_K[2048, 7168]
-
blk.23.channel_mix_lerp_k.weightF32[2048]
-
blk.23.channel_mix_lerp_r.weightF32[2048]
-
blk.23.channel_mix_receptance.weightQ4_K[2048, 2048]
-
blk.23.channel_mix_value.weightQ4_K[7168, 2048]
-
blk.23.time_mix_decay.weightF32[2048]
-
blk.23.time_mix_decay_w1.weightF32[2048, 64]
-
blk.23.time_mix_decay_w2.weightF32[64, 2048]
-
blk.23.time_mix_first.weightF32[64, 32]
-
blk.23.time_mix_gate.weightQ4_K[2048, 2048]
-
blk.23.time_mix_key.weightQ4_K[2048, 2048]
-
blk.23.time_mix_lerp_g.weightF32[2048]
-
blk.23.time_mix_lerp_k.weightF32[2048]
-
blk.23.time_mix_lerp_r.weightF32[2048]
-
blk.23.time_mix_lerp_v.weightF32[2048]
-
blk.23.time_mix_lerp_w.weightF32[2048]
-
blk.23.time_mix_lerp_x.weightF32[2048]
-
blk.23.time_mix_ln.biasF32[2048]
-
blk.23.time_mix_ln.weightF32[2048]
-
blk.23.time_mix_output.weightQ4_K[2048, 2048]
-
blk.23.time_mix_receptance.weightQ4_K[2048, 2048]
-
blk.23.time_mix_value.weightQ4_K[2048, 2048]
-
blk.23.time_mix_w1.weightF32[2048, 160]
-
blk.23.time_mix_w2.weightF32[32, 2048, 5]
-
output.weightQ6_K[2048, 65536]
-
output_norm.biasF32[2048]
-
token_embd_norm.biasF32[2048]
-
token_embd_norm.weightF32[2048]
-
output_norm.weightF32[2048]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23