RWKV (pronounced RwaKuv) is an RNN with great LLM performance.
278 Pulls Updated 4 months ago
be8516a49572 · 8.5GB
-
general.architecturerwkv6rwkv6
-
general.file_typeQ8_0Q8_0
-
rwkv6.attention.head_count00
-
rwkv6.attention.layer_norm_epsilon1e-051e-05
-
rwkv6.block_count3232
-
rwkv6.context_length10485761048576
-
rwkv6.embedding_length40964096
-
rwkv6.feed_forward_length1433614336
-
rwkv6.rescale_every_n_layers66
-
rwkv6.time_decay_extra_dim128128
-
rwkv6.time_mix_extra_dim6464
-
rwkv6.wkv.head_size6464
-
tokenizer.ggml.bos_token_id00
-
tokenizer.ggml.eos_token_id00
-
tokenizer.ggml.modelrwkvrwkv
-
tokenizer.ggml.token_type[3, 1, 1, 1, 1, ...][3, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[<s>, \x00, \x01, \x02, \x03, ...][<s>, \x00, \x01, \x02, \x03, ...]
-
quantize.imatrix.chunks_count131131
-
quantize.imatrix.dataset/training_dir/calibration_datav3.txt/training_dir/calibration_datav3.txt
-
quantize.imatrix.entries_count352352
-
quantize.imatrix.file/models_out/rwkv-6-world-7b-GGUF/rwkv-6-world-7b.imatrix/models_out/rwkv-6-world-7b-GGUF/rwkv-6-world-7b.imatrix
-
token_embd.weightQ8_0[4096, 65536]
-
blk.0.attn_norm.biasF32[4096]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_norm_2.biasF32[4096]
-
blk.0.attn_norm_2.weightF32[4096]
-
blk.0.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.0.channel_mix_lerp_k.weightF32[4096]
-
blk.0.channel_mix_lerp_r.weightF32[4096]
-
blk.0.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.0.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.0.time_mix_decay.weightF32[4096]
-
blk.0.time_mix_decay_w1.weightF32[4096, 128]
-
blk.0.time_mix_decay_w2.weightF32[128, 4096]
-
blk.0.time_mix_first.weightF32[64, 64]
-
blk.0.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.0.time_mix_key.weightQ8_0[4096, 4096]
-
blk.0.time_mix_lerp_g.weightF32[4096]
-
blk.0.time_mix_lerp_k.weightF32[4096]
-
blk.0.time_mix_lerp_r.weightF32[4096]
-
blk.0.time_mix_lerp_v.weightF32[4096]
-
blk.0.time_mix_lerp_w.weightF32[4096]
-
blk.0.time_mix_lerp_x.weightF32[4096]
-
blk.0.time_mix_ln.biasF32[4096]
-
blk.0.time_mix_ln.weightF32[4096]
-
blk.0.time_mix_output.weightQ8_0[4096, 4096]
-
blk.0.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.0.time_mix_value.weightQ8_0[4096, 4096]
-
blk.0.time_mix_w1.weightF32[4096, 320]
-
blk.0.time_mix_w2.weightF32[64, 4096, 5]
-
blk.1.attn_norm.biasF32[4096]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_norm_2.biasF32[4096]
-
blk.1.attn_norm_2.weightF32[4096]
-
blk.1.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.1.channel_mix_lerp_k.weightF32[4096]
-
blk.1.channel_mix_lerp_r.weightF32[4096]
-
blk.1.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.1.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.1.time_mix_decay.weightF32[4096]
-
blk.1.time_mix_decay_w1.weightF32[4096, 128]
-
blk.1.time_mix_decay_w2.weightF32[128, 4096]
-
blk.1.time_mix_first.weightF32[64, 64]
-
blk.1.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.1.time_mix_key.weightQ8_0[4096, 4096]
-
blk.1.time_mix_lerp_g.weightF32[4096]
-
blk.1.time_mix_lerp_k.weightF32[4096]
-
blk.1.time_mix_lerp_r.weightF32[4096]
-
blk.1.time_mix_lerp_v.weightF32[4096]
-
blk.1.time_mix_lerp_w.weightF32[4096]
-
blk.1.time_mix_lerp_x.weightF32[4096]
-
blk.1.time_mix_ln.biasF32[4096]
-
blk.1.time_mix_ln.weightF32[4096]
-
blk.1.time_mix_output.weightQ8_0[4096, 4096]
-
blk.1.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.1.time_mix_value.weightQ8_0[4096, 4096]
-
blk.1.time_mix_w1.weightF32[4096, 320]
-
blk.1.time_mix_w2.weightF32[64, 4096, 5]
-
blk.2.attn_norm.biasF32[4096]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_norm_2.biasF32[4096]
-
blk.2.attn_norm_2.weightF32[4096]
-
blk.2.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.2.channel_mix_lerp_k.weightF32[4096]
-
blk.2.channel_mix_lerp_r.weightF32[4096]
-
blk.2.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.2.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.2.time_mix_decay.weightF32[4096]
-
blk.2.time_mix_decay_w1.weightF32[4096, 128]
-
blk.2.time_mix_decay_w2.weightF32[128, 4096]
-
blk.2.time_mix_first.weightF32[64, 64]
-
blk.2.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.2.time_mix_key.weightQ8_0[4096, 4096]
-
blk.2.time_mix_lerp_g.weightF32[4096]
-
blk.2.time_mix_lerp_k.weightF32[4096]
-
blk.2.time_mix_lerp_r.weightF32[4096]
-
blk.2.time_mix_lerp_v.weightF32[4096]
-
blk.2.time_mix_lerp_w.weightF32[4096]
-
blk.2.time_mix_lerp_x.weightF32[4096]
-
blk.2.time_mix_ln.biasF32[4096]
-
blk.2.time_mix_ln.weightF32[4096]
-
blk.2.time_mix_output.weightQ8_0[4096, 4096]
-
blk.2.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.2.time_mix_value.weightQ8_0[4096, 4096]
-
blk.2.time_mix_w1.weightF32[4096, 320]
-
blk.2.time_mix_w2.weightF32[64, 4096, 5]
-
blk.3.attn_norm.biasF32[4096]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_norm_2.biasF32[4096]
-
blk.3.attn_norm_2.weightF32[4096]
-
blk.3.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.3.channel_mix_lerp_k.weightF32[4096]
-
blk.3.channel_mix_lerp_r.weightF32[4096]
-
blk.3.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.3.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.3.time_mix_decay.weightF32[4096]
-
blk.3.time_mix_decay_w1.weightF32[4096, 128]
-
blk.3.time_mix_decay_w2.weightF32[128, 4096]
-
blk.3.time_mix_first.weightF32[64, 64]
-
blk.3.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.3.time_mix_key.weightQ8_0[4096, 4096]
-
blk.3.time_mix_lerp_g.weightF32[4096]
-
blk.3.time_mix_lerp_k.weightF32[4096]
-
blk.3.time_mix_lerp_r.weightF32[4096]
-
blk.3.time_mix_lerp_v.weightF32[4096]
-
blk.3.time_mix_lerp_w.weightF32[4096]
-
blk.3.time_mix_lerp_x.weightF32[4096]
-
blk.3.time_mix_ln.biasF32[4096]
-
blk.3.time_mix_ln.weightF32[4096]
-
blk.3.time_mix_output.weightQ8_0[4096, 4096]
-
blk.3.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.3.time_mix_value.weightQ8_0[4096, 4096]
-
blk.3.time_mix_w1.weightF32[4096, 320]
-
blk.3.time_mix_w2.weightF32[64, 4096, 5]
-
blk.4.attn_norm.biasF32[4096]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_norm_2.biasF32[4096]
-
blk.4.attn_norm_2.weightF32[4096]
-
blk.4.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.4.channel_mix_lerp_k.weightF32[4096]
-
blk.4.channel_mix_lerp_r.weightF32[4096]
-
blk.4.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.4.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.4.time_mix_decay.weightF32[4096]
-
blk.4.time_mix_decay_w1.weightF32[4096, 128]
-
blk.4.time_mix_decay_w2.weightF32[128, 4096]
-
blk.4.time_mix_first.weightF32[64, 64]
-
blk.4.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.4.time_mix_key.weightQ8_0[4096, 4096]
-
blk.4.time_mix_lerp_g.weightF32[4096]
-
blk.4.time_mix_lerp_k.weightF32[4096]
-
blk.4.time_mix_lerp_r.weightF32[4096]
-
blk.4.time_mix_lerp_v.weightF32[4096]
-
blk.4.time_mix_lerp_w.weightF32[4096]
-
blk.4.time_mix_lerp_x.weightF32[4096]
-
blk.4.time_mix_ln.biasF32[4096]
-
blk.4.time_mix_ln.weightF32[4096]
-
blk.4.time_mix_output.weightQ8_0[4096, 4096]
-
blk.4.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.4.time_mix_value.weightQ8_0[4096, 4096]
-
blk.4.time_mix_w1.weightF32[4096, 320]
-
blk.4.time_mix_w2.weightF32[64, 4096, 5]
-
blk.5.attn_norm.biasF32[4096]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_norm_2.biasF32[4096]
-
blk.5.attn_norm_2.weightF32[4096]
-
blk.5.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.5.channel_mix_lerp_k.weightF32[4096]
-
blk.5.channel_mix_lerp_r.weightF32[4096]
-
blk.5.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.5.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.5.time_mix_decay.weightF32[4096]
-
blk.5.time_mix_decay_w1.weightF32[4096, 128]
-
blk.5.time_mix_decay_w2.weightF32[128, 4096]
-
blk.5.time_mix_first.weightF32[64, 64]
-
blk.5.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.5.time_mix_key.weightQ8_0[4096, 4096]
-
blk.5.time_mix_lerp_g.weightF32[4096]
-
blk.5.time_mix_lerp_k.weightF32[4096]
-
blk.5.time_mix_lerp_r.weightF32[4096]
-
blk.5.time_mix_lerp_v.weightF32[4096]
-
blk.5.time_mix_lerp_w.weightF32[4096]
-
blk.5.time_mix_lerp_x.weightF32[4096]
-
blk.5.time_mix_ln.biasF32[4096]
-
blk.5.time_mix_ln.weightF32[4096]
-
blk.5.time_mix_output.weightQ8_0[4096, 4096]
-
blk.5.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.5.time_mix_value.weightQ8_0[4096, 4096]
-
blk.5.time_mix_w1.weightF32[4096, 320]
-
blk.5.time_mix_w2.weightF32[64, 4096, 5]
-
blk.6.attn_norm.biasF32[4096]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_norm_2.biasF32[4096]
-
blk.6.attn_norm_2.weightF32[4096]
-
blk.6.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.6.channel_mix_lerp_k.weightF32[4096]
-
blk.6.channel_mix_lerp_r.weightF32[4096]
-
blk.6.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.6.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.6.time_mix_decay.weightF32[4096]
-
blk.6.time_mix_decay_w1.weightF32[4096, 128]
-
blk.6.time_mix_decay_w2.weightF32[128, 4096]
-
blk.6.time_mix_first.weightF32[64, 64]
-
blk.6.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.6.time_mix_key.weightQ8_0[4096, 4096]
-
blk.6.time_mix_lerp_g.weightF32[4096]
-
blk.6.time_mix_lerp_k.weightF32[4096]
-
blk.6.time_mix_lerp_r.weightF32[4096]
-
blk.6.time_mix_lerp_v.weightF32[4096]
-
blk.6.time_mix_lerp_w.weightF32[4096]
-
blk.6.time_mix_lerp_x.weightF32[4096]
-
blk.6.time_mix_ln.biasF32[4096]
-
blk.6.time_mix_ln.weightF32[4096]
-
blk.6.time_mix_output.weightQ8_0[4096, 4096]
-
blk.6.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.6.time_mix_value.weightQ8_0[4096, 4096]
-
blk.6.time_mix_w1.weightF32[4096, 320]
-
blk.6.time_mix_w2.weightF32[64, 4096, 5]
-
blk.7.attn_norm.biasF32[4096]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_norm_2.biasF32[4096]
-
blk.7.attn_norm_2.weightF32[4096]
-
blk.7.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.7.channel_mix_lerp_k.weightF32[4096]
-
blk.7.channel_mix_lerp_r.weightF32[4096]
-
blk.7.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.7.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.7.time_mix_decay.weightF32[4096]
-
blk.7.time_mix_decay_w1.weightF32[4096, 128]
-
blk.7.time_mix_decay_w2.weightF32[128, 4096]
-
blk.7.time_mix_first.weightF32[64, 64]
-
blk.7.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.7.time_mix_key.weightQ8_0[4096, 4096]
-
blk.7.time_mix_lerp_g.weightF32[4096]
-
blk.7.time_mix_lerp_k.weightF32[4096]
-
blk.7.time_mix_lerp_r.weightF32[4096]
-
blk.7.time_mix_lerp_v.weightF32[4096]
-
blk.7.time_mix_lerp_w.weightF32[4096]
-
blk.7.time_mix_lerp_x.weightF32[4096]
-
blk.7.time_mix_ln.biasF32[4096]
-
blk.7.time_mix_ln.weightF32[4096]
-
blk.7.time_mix_output.weightQ8_0[4096, 4096]
-
blk.7.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.7.time_mix_value.weightQ8_0[4096, 4096]
-
blk.7.time_mix_w1.weightF32[4096, 320]
-
blk.7.time_mix_w2.weightF32[64, 4096, 5]
-
blk.8.attn_norm.biasF32[4096]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_norm_2.biasF32[4096]
-
blk.8.attn_norm_2.weightF32[4096]
-
blk.8.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.8.channel_mix_lerp_k.weightF32[4096]
-
blk.8.channel_mix_lerp_r.weightF32[4096]
-
blk.8.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.8.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.8.time_mix_decay.weightF32[4096]
-
blk.8.time_mix_decay_w1.weightF32[4096, 128]
-
blk.8.time_mix_decay_w2.weightF32[128, 4096]
-
blk.8.time_mix_first.weightF32[64, 64]
-
blk.8.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.8.time_mix_key.weightQ8_0[4096, 4096]
-
blk.8.time_mix_lerp_g.weightF32[4096]
-
blk.8.time_mix_lerp_k.weightF32[4096]
-
blk.8.time_mix_lerp_r.weightF32[4096]
-
blk.8.time_mix_lerp_v.weightF32[4096]
-
blk.8.time_mix_lerp_w.weightF32[4096]
-
blk.8.time_mix_lerp_x.weightF32[4096]
-
blk.8.time_mix_ln.biasF32[4096]
-
blk.8.time_mix_ln.weightF32[4096]
-
blk.8.time_mix_output.weightQ8_0[4096, 4096]
-
blk.8.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.8.time_mix_value.weightQ8_0[4096, 4096]
-
blk.8.time_mix_w1.weightF32[4096, 320]
-
blk.8.time_mix_w2.weightF32[64, 4096, 5]
-
blk.9.attn_norm.biasF32[4096]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_norm_2.biasF32[4096]
-
blk.9.attn_norm_2.weightF32[4096]
-
blk.9.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.9.channel_mix_lerp_k.weightF32[4096]
-
blk.9.channel_mix_lerp_r.weightF32[4096]
-
blk.9.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.9.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.9.time_mix_decay.weightF32[4096]
-
blk.9.time_mix_decay_w1.weightF32[4096, 128]
-
blk.9.time_mix_decay_w2.weightF32[128, 4096]
-
blk.9.time_mix_first.weightF32[64, 64]
-
blk.9.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.9.time_mix_key.weightQ8_0[4096, 4096]
-
blk.9.time_mix_lerp_g.weightF32[4096]
-
blk.9.time_mix_lerp_k.weightF32[4096]
-
blk.9.time_mix_lerp_r.weightF32[4096]
-
blk.9.time_mix_lerp_v.weightF32[4096]
-
blk.9.time_mix_lerp_w.weightF32[4096]
-
blk.9.time_mix_lerp_x.weightF32[4096]
-
blk.9.time_mix_ln.biasF32[4096]
-
blk.9.time_mix_ln.weightF32[4096]
-
blk.9.time_mix_output.weightQ8_0[4096, 4096]
-
blk.9.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.9.time_mix_value.weightQ8_0[4096, 4096]
-
blk.9.time_mix_w1.weightF32[4096, 320]
-
blk.9.time_mix_w2.weightF32[64, 4096, 5]
-
blk.10.attn_norm.biasF32[4096]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_norm_2.biasF32[4096]
-
blk.10.attn_norm_2.weightF32[4096]
-
blk.10.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.10.channel_mix_lerp_k.weightF32[4096]
-
blk.10.channel_mix_lerp_r.weightF32[4096]
-
blk.10.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.10.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.10.time_mix_decay.weightF32[4096]
-
blk.10.time_mix_decay_w1.weightF32[4096, 128]
-
blk.10.time_mix_decay_w2.weightF32[128, 4096]
-
blk.10.time_mix_first.weightF32[64, 64]
-
blk.10.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.10.time_mix_key.weightQ8_0[4096, 4096]
-
blk.10.time_mix_lerp_g.weightF32[4096]
-
blk.10.time_mix_lerp_k.weightF32[4096]
-
blk.10.time_mix_lerp_r.weightF32[4096]
-
blk.10.time_mix_lerp_v.weightF32[4096]
-
blk.10.time_mix_lerp_w.weightF32[4096]
-
blk.10.time_mix_lerp_x.weightF32[4096]
-
blk.10.time_mix_ln.biasF32[4096]
-
blk.10.time_mix_ln.weightF32[4096]
-
blk.10.time_mix_output.weightQ8_0[4096, 4096]
-
blk.10.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.10.time_mix_value.weightQ8_0[4096, 4096]
-
blk.10.time_mix_w1.weightF32[4096, 320]
-
blk.10.time_mix_w2.weightF32[64, 4096, 5]
-
blk.11.attn_norm.biasF32[4096]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_norm_2.biasF32[4096]
-
blk.11.attn_norm_2.weightF32[4096]
-
blk.11.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.11.channel_mix_lerp_k.weightF32[4096]
-
blk.11.channel_mix_lerp_r.weightF32[4096]
-
blk.11.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.11.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.11.time_mix_decay.weightF32[4096]
-
blk.11.time_mix_decay_w1.weightF32[4096, 128]
-
blk.11.time_mix_decay_w2.weightF32[128, 4096]
-
blk.11.time_mix_first.weightF32[64, 64]
-
blk.11.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.11.time_mix_key.weightQ8_0[4096, 4096]
-
blk.11.time_mix_lerp_g.weightF32[4096]
-
blk.11.time_mix_lerp_k.weightF32[4096]
-
blk.11.time_mix_lerp_r.weightF32[4096]
-
blk.11.time_mix_lerp_v.weightF32[4096]
-
blk.11.time_mix_lerp_w.weightF32[4096]
-
blk.11.time_mix_lerp_x.weightF32[4096]
-
blk.11.time_mix_ln.biasF32[4096]
-
blk.11.time_mix_ln.weightF32[4096]
-
blk.11.time_mix_output.weightQ8_0[4096, 4096]
-
blk.11.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.11.time_mix_value.weightQ8_0[4096, 4096]
-
blk.11.time_mix_w1.weightF32[4096, 320]
-
blk.11.time_mix_w2.weightF32[64, 4096, 5]
-
blk.12.attn_norm.biasF32[4096]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_norm_2.biasF32[4096]
-
blk.12.attn_norm_2.weightF32[4096]
-
blk.12.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.12.channel_mix_lerp_k.weightF32[4096]
-
blk.12.channel_mix_lerp_r.weightF32[4096]
-
blk.12.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.12.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.12.time_mix_decay.weightF32[4096]
-
blk.12.time_mix_decay_w1.weightF32[4096, 128]
-
blk.12.time_mix_decay_w2.weightF32[128, 4096]
-
blk.12.time_mix_first.weightF32[64, 64]
-
blk.12.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.12.time_mix_key.weightQ8_0[4096, 4096]
-
blk.12.time_mix_lerp_g.weightF32[4096]
-
blk.12.time_mix_lerp_k.weightF32[4096]
-
blk.12.time_mix_lerp_r.weightF32[4096]
-
blk.12.time_mix_lerp_v.weightF32[4096]
-
blk.12.time_mix_lerp_w.weightF32[4096]
-
blk.12.time_mix_lerp_x.weightF32[4096]
-
blk.12.time_mix_ln.biasF32[4096]
-
blk.12.time_mix_ln.weightF32[4096]
-
blk.12.time_mix_output.weightQ8_0[4096, 4096]
-
blk.12.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.12.time_mix_value.weightQ8_0[4096, 4096]
-
blk.12.time_mix_w1.weightF32[4096, 320]
-
blk.12.time_mix_w2.weightF32[64, 4096, 5]
-
blk.13.attn_norm.biasF32[4096]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_norm_2.biasF32[4096]
-
blk.13.attn_norm_2.weightF32[4096]
-
blk.13.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.13.channel_mix_lerp_k.weightF32[4096]
-
blk.13.channel_mix_lerp_r.weightF32[4096]
-
blk.13.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.13.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.13.time_mix_decay.weightF32[4096]
-
blk.13.time_mix_decay_w1.weightF32[4096, 128]
-
blk.13.time_mix_decay_w2.weightF32[128, 4096]
-
blk.13.time_mix_first.weightF32[64, 64]
-
blk.13.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.13.time_mix_key.weightQ8_0[4096, 4096]
-
blk.13.time_mix_lerp_g.weightF32[4096]
-
blk.13.time_mix_lerp_k.weightF32[4096]
-
blk.13.time_mix_lerp_r.weightF32[4096]
-
blk.13.time_mix_lerp_v.weightF32[4096]
-
blk.13.time_mix_lerp_w.weightF32[4096]
-
blk.13.time_mix_lerp_x.weightF32[4096]
-
blk.13.time_mix_ln.biasF32[4096]
-
blk.13.time_mix_ln.weightF32[4096]
-
blk.13.time_mix_output.weightQ8_0[4096, 4096]
-
blk.13.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.13.time_mix_value.weightQ8_0[4096, 4096]
-
blk.13.time_mix_w1.weightF32[4096, 320]
-
blk.13.time_mix_w2.weightF32[64, 4096, 5]
-
blk.14.attn_norm.biasF32[4096]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_norm_2.biasF32[4096]
-
blk.14.attn_norm_2.weightF32[4096]
-
blk.14.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.14.channel_mix_lerp_k.weightF32[4096]
-
blk.14.channel_mix_lerp_r.weightF32[4096]
-
blk.14.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.14.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.14.time_mix_decay.weightF32[4096]
-
blk.14.time_mix_decay_w1.weightF32[4096, 128]
-
blk.14.time_mix_decay_w2.weightF32[128, 4096]
-
blk.14.time_mix_first.weightF32[64, 64]
-
blk.14.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.14.time_mix_key.weightQ8_0[4096, 4096]
-
blk.14.time_mix_lerp_g.weightF32[4096]
-
blk.14.time_mix_lerp_k.weightF32[4096]
-
blk.14.time_mix_lerp_r.weightF32[4096]
-
blk.14.time_mix_lerp_v.weightF32[4096]
-
blk.14.time_mix_lerp_w.weightF32[4096]
-
blk.14.time_mix_lerp_x.weightF32[4096]
-
blk.14.time_mix_ln.biasF32[4096]
-
blk.14.time_mix_ln.weightF32[4096]
-
blk.14.time_mix_output.weightQ8_0[4096, 4096]
-
blk.14.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.14.time_mix_value.weightQ8_0[4096, 4096]
-
blk.14.time_mix_w1.weightF32[4096, 320]
-
blk.14.time_mix_w2.weightF32[64, 4096, 5]
-
blk.15.attn_norm.biasF32[4096]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_norm_2.biasF32[4096]
-
blk.15.attn_norm_2.weightF32[4096]
-
blk.15.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.15.channel_mix_lerp_k.weightF32[4096]
-
blk.15.channel_mix_lerp_r.weightF32[4096]
-
blk.15.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.15.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.15.time_mix_decay.weightF32[4096]
-
blk.15.time_mix_decay_w1.weightF32[4096, 128]
-
blk.15.time_mix_decay_w2.weightF32[128, 4096]
-
blk.15.time_mix_first.weightF32[64, 64]
-
blk.15.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.15.time_mix_key.weightQ8_0[4096, 4096]
-
blk.15.time_mix_lerp_g.weightF32[4096]
-
blk.15.time_mix_lerp_k.weightF32[4096]
-
blk.15.time_mix_lerp_r.weightF32[4096]
-
blk.15.time_mix_lerp_v.weightF32[4096]
-
blk.15.time_mix_lerp_w.weightF32[4096]
-
blk.15.time_mix_lerp_x.weightF32[4096]
-
blk.15.time_mix_ln.biasF32[4096]
-
blk.15.time_mix_ln.weightF32[4096]
-
blk.15.time_mix_output.weightQ8_0[4096, 4096]
-
blk.15.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.15.time_mix_value.weightQ8_0[4096, 4096]
-
blk.15.time_mix_w1.weightF32[4096, 320]
-
blk.15.time_mix_w2.weightF32[64, 4096, 5]
-
blk.16.attn_norm.biasF32[4096]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_norm_2.biasF32[4096]
-
blk.16.attn_norm_2.weightF32[4096]
-
blk.16.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.16.channel_mix_lerp_k.weightF32[4096]
-
blk.16.channel_mix_lerp_r.weightF32[4096]
-
blk.16.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.16.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.16.time_mix_decay.weightF32[4096]
-
blk.16.time_mix_decay_w1.weightF32[4096, 128]
-
blk.16.time_mix_decay_w2.weightF32[128, 4096]
-
blk.16.time_mix_first.weightF32[64, 64]
-
blk.16.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.16.time_mix_key.weightQ8_0[4096, 4096]
-
blk.16.time_mix_lerp_g.weightF32[4096]
-
blk.16.time_mix_lerp_k.weightF32[4096]
-
blk.16.time_mix_lerp_r.weightF32[4096]
-
blk.16.time_mix_lerp_v.weightF32[4096]
-
blk.16.time_mix_lerp_w.weightF32[4096]
-
blk.16.time_mix_lerp_x.weightF32[4096]
-
blk.16.time_mix_ln.biasF32[4096]
-
blk.16.time_mix_ln.weightF32[4096]
-
blk.16.time_mix_output.weightQ8_0[4096, 4096]
-
blk.16.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.16.time_mix_value.weightQ8_0[4096, 4096]
-
blk.16.time_mix_w1.weightF32[4096, 320]
-
blk.16.time_mix_w2.weightF32[64, 4096, 5]
-
blk.17.attn_norm.biasF32[4096]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_norm_2.biasF32[4096]
-
blk.17.attn_norm_2.weightF32[4096]
-
blk.17.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.17.channel_mix_lerp_k.weightF32[4096]
-
blk.17.channel_mix_lerp_r.weightF32[4096]
-
blk.17.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.17.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.17.time_mix_decay.weightF32[4096]
-
blk.17.time_mix_decay_w1.weightF32[4096, 128]
-
blk.17.time_mix_decay_w2.weightF32[128, 4096]
-
blk.17.time_mix_first.weightF32[64, 64]
-
blk.17.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.17.time_mix_key.weightQ8_0[4096, 4096]
-
blk.17.time_mix_lerp_g.weightF32[4096]
-
blk.17.time_mix_lerp_k.weightF32[4096]
-
blk.17.time_mix_lerp_r.weightF32[4096]
-
blk.17.time_mix_lerp_v.weightF32[4096]
-
blk.17.time_mix_lerp_w.weightF32[4096]
-
blk.17.time_mix_lerp_x.weightF32[4096]
-
blk.17.time_mix_ln.biasF32[4096]
-
blk.17.time_mix_ln.weightF32[4096]
-
blk.17.time_mix_output.weightQ8_0[4096, 4096]
-
blk.17.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.17.time_mix_value.weightQ8_0[4096, 4096]
-
blk.17.time_mix_w1.weightF32[4096, 320]
-
blk.17.time_mix_w2.weightF32[64, 4096, 5]
-
blk.18.attn_norm.biasF32[4096]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_norm_2.biasF32[4096]
-
blk.18.attn_norm_2.weightF32[4096]
-
blk.18.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.18.channel_mix_lerp_k.weightF32[4096]
-
blk.18.channel_mix_lerp_r.weightF32[4096]
-
blk.18.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.18.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.18.time_mix_decay.weightF32[4096]
-
blk.18.time_mix_decay_w1.weightF32[4096, 128]
-
blk.18.time_mix_decay_w2.weightF32[128, 4096]
-
blk.18.time_mix_first.weightF32[64, 64]
-
blk.18.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.18.time_mix_key.weightQ8_0[4096, 4096]
-
blk.18.time_mix_lerp_g.weightF32[4096]
-
blk.18.time_mix_lerp_k.weightF32[4096]
-
blk.18.time_mix_lerp_r.weightF32[4096]
-
blk.18.time_mix_lerp_v.weightF32[4096]
-
blk.18.time_mix_lerp_w.weightF32[4096]
-
blk.18.time_mix_lerp_x.weightF32[4096]
-
blk.18.time_mix_ln.biasF32[4096]
-
blk.18.time_mix_ln.weightF32[4096]
-
blk.18.time_mix_output.weightQ8_0[4096, 4096]
-
blk.18.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.18.time_mix_value.weightQ8_0[4096, 4096]
-
blk.18.time_mix_w1.weightF32[4096, 320]
-
blk.18.time_mix_w2.weightF32[64, 4096, 5]
-
blk.19.attn_norm.biasF32[4096]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_norm_2.biasF32[4096]
-
blk.19.attn_norm_2.weightF32[4096]
-
blk.19.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.19.channel_mix_lerp_k.weightF32[4096]
-
blk.19.channel_mix_lerp_r.weightF32[4096]
-
blk.19.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.19.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.19.time_mix_decay.weightF32[4096]
-
blk.19.time_mix_decay_w1.weightF32[4096, 128]
-
blk.19.time_mix_decay_w2.weightF32[128, 4096]
-
blk.19.time_mix_first.weightF32[64, 64]
-
blk.19.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.19.time_mix_key.weightQ8_0[4096, 4096]
-
blk.19.time_mix_lerp_g.weightF32[4096]
-
blk.19.time_mix_lerp_k.weightF32[4096]
-
blk.19.time_mix_lerp_r.weightF32[4096]
-
blk.19.time_mix_lerp_v.weightF32[4096]
-
blk.19.time_mix_lerp_w.weightF32[4096]
-
blk.19.time_mix_lerp_x.weightF32[4096]
-
blk.19.time_mix_ln.biasF32[4096]
-
blk.19.time_mix_ln.weightF32[4096]
-
blk.19.time_mix_output.weightQ8_0[4096, 4096]
-
blk.19.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.19.time_mix_value.weightQ8_0[4096, 4096]
-
blk.19.time_mix_w1.weightF32[4096, 320]
-
blk.19.time_mix_w2.weightF32[64, 4096, 5]
-
blk.20.attn_norm.biasF32[4096]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_norm_2.biasF32[4096]
-
blk.20.attn_norm_2.weightF32[4096]
-
blk.20.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.20.channel_mix_lerp_k.weightF32[4096]
-
blk.20.channel_mix_lerp_r.weightF32[4096]
-
blk.20.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.20.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.20.time_mix_decay.weightF32[4096]
-
blk.20.time_mix_decay_w1.weightF32[4096, 128]
-
blk.20.time_mix_decay_w2.weightF32[128, 4096]
-
blk.20.time_mix_first.weightF32[64, 64]
-
blk.20.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.20.time_mix_key.weightQ8_0[4096, 4096]
-
blk.20.time_mix_lerp_g.weightF32[4096]
-
blk.20.time_mix_lerp_k.weightF32[4096]
-
blk.20.time_mix_lerp_r.weightF32[4096]
-
blk.20.time_mix_lerp_v.weightF32[4096]
-
blk.20.time_mix_lerp_w.weightF32[4096]
-
blk.20.time_mix_lerp_x.weightF32[4096]
-
blk.20.time_mix_ln.biasF32[4096]
-
blk.20.time_mix_ln.weightF32[4096]
-
blk.20.time_mix_output.weightQ8_0[4096, 4096]
-
blk.20.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.20.time_mix_value.weightQ8_0[4096, 4096]
-
blk.20.time_mix_w1.weightF32[4096, 320]
-
blk.20.time_mix_w2.weightF32[64, 4096, 5]
-
blk.21.attn_norm.biasF32[4096]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_norm_2.biasF32[4096]
-
blk.21.attn_norm_2.weightF32[4096]
-
blk.21.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.21.channel_mix_lerp_k.weightF32[4096]
-
blk.21.channel_mix_lerp_r.weightF32[4096]
-
blk.21.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.21.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.21.time_mix_decay.weightF32[4096]
-
blk.21.time_mix_decay_w1.weightF32[4096, 128]
-
blk.21.time_mix_decay_w2.weightF32[128, 4096]
-
blk.21.time_mix_first.weightF32[64, 64]
-
blk.21.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.21.time_mix_key.weightQ8_0[4096, 4096]
-
blk.21.time_mix_lerp_g.weightF32[4096]
-
blk.21.time_mix_lerp_k.weightF32[4096]
-
blk.21.time_mix_lerp_r.weightF32[4096]
-
blk.21.time_mix_lerp_v.weightF32[4096]
-
blk.21.time_mix_lerp_w.weightF32[4096]
-
blk.21.time_mix_lerp_x.weightF32[4096]
-
blk.21.time_mix_ln.biasF32[4096]
-
blk.21.time_mix_ln.weightF32[4096]
-
blk.21.time_mix_output.weightQ8_0[4096, 4096]
-
blk.21.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.21.time_mix_value.weightQ8_0[4096, 4096]
-
blk.21.time_mix_w1.weightF32[4096, 320]
-
blk.21.time_mix_w2.weightF32[64, 4096, 5]
-
blk.22.attn_norm.biasF32[4096]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_norm_2.biasF32[4096]
-
blk.22.attn_norm_2.weightF32[4096]
-
blk.22.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.22.channel_mix_lerp_k.weightF32[4096]
-
blk.22.channel_mix_lerp_r.weightF32[4096]
-
blk.22.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.22.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.22.time_mix_decay.weightF32[4096]
-
blk.22.time_mix_decay_w1.weightF32[4096, 128]
-
blk.22.time_mix_decay_w2.weightF32[128, 4096]
-
blk.22.time_mix_first.weightF32[64, 64]
-
blk.22.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.22.time_mix_key.weightQ8_0[4096, 4096]
-
blk.22.time_mix_lerp_g.weightF32[4096]
-
blk.22.time_mix_lerp_k.weightF32[4096]
-
blk.22.time_mix_lerp_r.weightF32[4096]
-
blk.22.time_mix_lerp_v.weightF32[4096]
-
blk.22.time_mix_lerp_w.weightF32[4096]
-
blk.22.time_mix_lerp_x.weightF32[4096]
-
blk.22.time_mix_ln.biasF32[4096]
-
blk.22.time_mix_ln.weightF32[4096]
-
blk.22.time_mix_output.weightQ8_0[4096, 4096]
-
blk.22.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.22.time_mix_value.weightQ8_0[4096, 4096]
-
blk.22.time_mix_w1.weightF32[4096, 320]
-
blk.22.time_mix_w2.weightF32[64, 4096, 5]
-
blk.23.attn_norm.biasF32[4096]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_norm_2.biasF32[4096]
-
blk.23.attn_norm_2.weightF32[4096]
-
blk.23.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.23.channel_mix_lerp_k.weightF32[4096]
-
blk.23.channel_mix_lerp_r.weightF32[4096]
-
blk.23.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.23.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.23.time_mix_decay.weightF32[4096]
-
blk.23.time_mix_decay_w1.weightF32[4096, 128]
-
blk.23.time_mix_decay_w2.weightF32[128, 4096]
-
blk.23.time_mix_first.weightF32[64, 64]
-
blk.23.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.23.time_mix_key.weightQ8_0[4096, 4096]
-
blk.23.time_mix_lerp_g.weightF32[4096]
-
blk.23.time_mix_lerp_k.weightF32[4096]
-
blk.23.time_mix_lerp_r.weightF32[4096]
-
blk.23.time_mix_lerp_v.weightF32[4096]
-
blk.23.time_mix_lerp_w.weightF32[4096]
-
blk.23.time_mix_lerp_x.weightF32[4096]
-
blk.23.time_mix_ln.biasF32[4096]
-
blk.23.time_mix_ln.weightF32[4096]
-
blk.23.time_mix_output.weightQ8_0[4096, 4096]
-
blk.23.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.23.time_mix_value.weightQ8_0[4096, 4096]
-
blk.23.time_mix_w1.weightF32[4096, 320]
-
blk.23.time_mix_w2.weightF32[64, 4096, 5]
-
blk.24.attn_norm.biasF32[4096]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_norm_2.biasF32[4096]
-
blk.24.attn_norm_2.weightF32[4096]
-
blk.24.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.24.channel_mix_lerp_k.weightF32[4096]
-
blk.24.channel_mix_lerp_r.weightF32[4096]
-
blk.24.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.24.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.24.time_mix_decay.weightF32[4096]
-
blk.24.time_mix_decay_w1.weightF32[4096, 128]
-
blk.24.time_mix_decay_w2.weightF32[128, 4096]
-
blk.24.time_mix_first.weightF32[64, 64]
-
blk.24.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.24.time_mix_key.weightQ8_0[4096, 4096]
-
blk.24.time_mix_lerp_g.weightF32[4096]
-
blk.24.time_mix_lerp_k.weightF32[4096]
-
blk.24.time_mix_lerp_r.weightF32[4096]
-
blk.24.time_mix_lerp_v.weightF32[4096]
-
blk.24.time_mix_lerp_w.weightF32[4096]
-
blk.24.time_mix_lerp_x.weightF32[4096]
-
blk.24.time_mix_ln.biasF32[4096]
-
blk.24.time_mix_ln.weightF32[4096]
-
blk.24.time_mix_output.weightQ8_0[4096, 4096]
-
blk.24.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.24.time_mix_value.weightQ8_0[4096, 4096]
-
blk.24.time_mix_w1.weightF32[4096, 320]
-
blk.24.time_mix_w2.weightF32[64, 4096, 5]
-
blk.25.attn_norm.biasF32[4096]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_norm_2.biasF32[4096]
-
blk.25.attn_norm_2.weightF32[4096]
-
blk.25.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.25.channel_mix_lerp_k.weightF32[4096]
-
blk.25.channel_mix_lerp_r.weightF32[4096]
-
blk.25.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.25.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.25.time_mix_decay.weightF32[4096]
-
blk.25.time_mix_decay_w1.weightF32[4096, 128]
-
blk.25.time_mix_decay_w2.weightF32[128, 4096]
-
blk.25.time_mix_first.weightF32[64, 64]
-
blk.25.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.25.time_mix_key.weightQ8_0[4096, 4096]
-
blk.25.time_mix_lerp_g.weightF32[4096]
-
blk.25.time_mix_lerp_k.weightF32[4096]
-
blk.25.time_mix_lerp_r.weightF32[4096]
-
blk.25.time_mix_lerp_v.weightF32[4096]
-
blk.25.time_mix_lerp_w.weightF32[4096]
-
blk.25.time_mix_lerp_x.weightF32[4096]
-
blk.25.time_mix_ln.biasF32[4096]
-
blk.25.time_mix_ln.weightF32[4096]
-
blk.25.time_mix_output.weightQ8_0[4096, 4096]
-
blk.25.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.25.time_mix_value.weightQ8_0[4096, 4096]
-
blk.25.time_mix_w1.weightF32[4096, 320]
-
blk.25.time_mix_w2.weightF32[64, 4096, 5]
-
blk.26.attn_norm.biasF32[4096]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_norm_2.biasF32[4096]
-
blk.26.attn_norm_2.weightF32[4096]
-
blk.26.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.26.channel_mix_lerp_k.weightF32[4096]
-
blk.26.channel_mix_lerp_r.weightF32[4096]
-
blk.26.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.26.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.26.time_mix_decay.weightF32[4096]
-
blk.26.time_mix_decay_w1.weightF32[4096, 128]
-
blk.26.time_mix_decay_w2.weightF32[128, 4096]
-
blk.26.time_mix_first.weightF32[64, 64]
-
blk.26.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.26.time_mix_key.weightQ8_0[4096, 4096]
-
blk.26.time_mix_lerp_g.weightF32[4096]
-
blk.26.time_mix_lerp_k.weightF32[4096]
-
blk.26.time_mix_lerp_r.weightF32[4096]
-
blk.26.time_mix_lerp_v.weightF32[4096]
-
blk.26.time_mix_lerp_w.weightF32[4096]
-
blk.26.time_mix_lerp_x.weightF32[4096]
-
blk.26.time_mix_ln.biasF32[4096]
-
blk.26.time_mix_ln.weightF32[4096]
-
blk.26.time_mix_output.weightQ8_0[4096, 4096]
-
blk.26.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.26.time_mix_value.weightQ8_0[4096, 4096]
-
blk.26.time_mix_w1.weightF32[4096, 320]
-
blk.26.time_mix_w2.weightF32[64, 4096, 5]
-
blk.27.attn_norm.biasF32[4096]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_norm_2.biasF32[4096]
-
blk.27.attn_norm_2.weightF32[4096]
-
blk.27.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.27.channel_mix_lerp_k.weightF32[4096]
-
blk.27.channel_mix_lerp_r.weightF32[4096]
-
blk.27.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.27.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.27.time_mix_decay.weightF32[4096]
-
blk.27.time_mix_decay_w1.weightF32[4096, 128]
-
blk.27.time_mix_decay_w2.weightF32[128, 4096]
-
blk.27.time_mix_first.weightF32[64, 64]
-
blk.27.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.27.time_mix_key.weightQ8_0[4096, 4096]
-
blk.27.time_mix_lerp_g.weightF32[4096]
-
blk.27.time_mix_lerp_k.weightF32[4096]
-
blk.27.time_mix_lerp_r.weightF32[4096]
-
blk.27.time_mix_lerp_v.weightF32[4096]
-
blk.27.time_mix_lerp_w.weightF32[4096]
-
blk.27.time_mix_lerp_x.weightF32[4096]
-
blk.27.time_mix_ln.biasF32[4096]
-
blk.27.time_mix_ln.weightF32[4096]
-
blk.27.time_mix_output.weightQ8_0[4096, 4096]
-
blk.27.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.27.time_mix_value.weightQ8_0[4096, 4096]
-
blk.27.time_mix_w1.weightF32[4096, 320]
-
blk.27.time_mix_w2.weightF32[64, 4096, 5]
-
blk.28.attn_norm.biasF32[4096]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_norm_2.biasF32[4096]
-
blk.28.attn_norm_2.weightF32[4096]
-
blk.28.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.28.channel_mix_lerp_k.weightF32[4096]
-
blk.28.channel_mix_lerp_r.weightF32[4096]
-
blk.28.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.28.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.28.time_mix_decay.weightF32[4096]
-
blk.28.time_mix_decay_w1.weightF32[4096, 128]
-
blk.28.time_mix_decay_w2.weightF32[128, 4096]
-
blk.28.time_mix_first.weightF32[64, 64]
-
blk.28.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.28.time_mix_key.weightQ8_0[4096, 4096]
-
blk.28.time_mix_lerp_g.weightF32[4096]
-
blk.28.time_mix_lerp_k.weightF32[4096]
-
blk.28.time_mix_lerp_r.weightF32[4096]
-
blk.28.time_mix_lerp_v.weightF32[4096]
-
blk.28.time_mix_lerp_w.weightF32[4096]
-
blk.28.time_mix_lerp_x.weightF32[4096]
-
blk.28.time_mix_ln.biasF32[4096]
-
blk.28.time_mix_ln.weightF32[4096]
-
blk.28.time_mix_output.weightQ8_0[4096, 4096]
-
blk.28.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.28.time_mix_value.weightQ8_0[4096, 4096]
-
blk.28.time_mix_w1.weightF32[4096, 320]
-
blk.28.time_mix_w2.weightF32[64, 4096, 5]
-
blk.29.attn_norm.biasF32[4096]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_norm_2.biasF32[4096]
-
blk.29.attn_norm_2.weightF32[4096]
-
blk.29.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.29.channel_mix_lerp_k.weightF32[4096]
-
blk.29.channel_mix_lerp_r.weightF32[4096]
-
blk.29.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.29.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.29.time_mix_decay.weightF32[4096]
-
blk.29.time_mix_decay_w1.weightF32[4096, 128]
-
blk.29.time_mix_decay_w2.weightF32[128, 4096]
-
blk.29.time_mix_first.weightF32[64, 64]
-
blk.29.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.29.time_mix_key.weightQ8_0[4096, 4096]
-
blk.29.time_mix_lerp_g.weightF32[4096]
-
blk.29.time_mix_lerp_k.weightF32[4096]
-
blk.29.time_mix_lerp_r.weightF32[4096]
-
blk.29.time_mix_lerp_v.weightF32[4096]
-
blk.29.time_mix_lerp_w.weightF32[4096]
-
blk.29.time_mix_lerp_x.weightF32[4096]
-
blk.29.time_mix_ln.biasF32[4096]
-
blk.29.time_mix_ln.weightF32[4096]
-
blk.29.time_mix_output.weightQ8_0[4096, 4096]
-
blk.29.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.29.time_mix_value.weightQ8_0[4096, 4096]
-
blk.29.time_mix_w1.weightF32[4096, 320]
-
blk.29.time_mix_w2.weightF32[64, 4096, 5]
-
blk.30.attn_norm.biasF32[4096]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_norm_2.biasF32[4096]
-
blk.30.attn_norm_2.weightF32[4096]
-
blk.30.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.30.channel_mix_lerp_k.weightF32[4096]
-
blk.30.channel_mix_lerp_r.weightF32[4096]
-
blk.30.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.30.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.30.time_mix_decay.weightF32[4096]
-
blk.30.time_mix_decay_w1.weightF32[4096, 128]
-
blk.30.time_mix_decay_w2.weightF32[128, 4096]
-
blk.30.time_mix_first.weightF32[64, 64]
-
blk.30.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.30.time_mix_key.weightQ8_0[4096, 4096]
-
blk.30.time_mix_lerp_g.weightF32[4096]
-
blk.30.time_mix_lerp_k.weightF32[4096]
-
blk.30.time_mix_lerp_r.weightF32[4096]
-
blk.30.time_mix_lerp_v.weightF32[4096]
-
blk.30.time_mix_lerp_w.weightF32[4096]
-
blk.30.time_mix_lerp_x.weightF32[4096]
-
blk.30.time_mix_ln.biasF32[4096]
-
blk.30.time_mix_ln.weightF32[4096]
-
blk.30.time_mix_output.weightQ8_0[4096, 4096]
-
blk.30.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.30.time_mix_value.weightQ8_0[4096, 4096]
-
blk.30.time_mix_w1.weightF32[4096, 320]
-
blk.30.time_mix_w2.weightF32[64, 4096, 5]
-
blk.31.attn_norm.biasF32[4096]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_norm_2.biasF32[4096]
-
blk.31.attn_norm_2.weightF32[4096]
-
blk.31.channel_mix_key.weightQ8_0[4096, 14336]
-
blk.31.channel_mix_lerp_k.weightF32[4096]
-
blk.31.channel_mix_lerp_r.weightF32[4096]
-
blk.31.channel_mix_receptance.weightQ8_0[4096, 4096]
-
blk.31.channel_mix_value.weightQ8_0[14336, 4096]
-
blk.31.time_mix_decay.weightF32[4096]
-
blk.31.time_mix_decay_w1.weightF32[4096, 128]
-
blk.31.time_mix_decay_w2.weightF32[128, 4096]
-
blk.31.time_mix_first.weightF32[64, 64]
-
blk.31.time_mix_gate.weightQ8_0[4096, 4096]
-
blk.31.time_mix_key.weightQ8_0[4096, 4096]
-
blk.31.time_mix_lerp_g.weightF32[4096]
-
blk.31.time_mix_lerp_k.weightF32[4096]
-
blk.31.time_mix_lerp_r.weightF32[4096]
-
blk.31.time_mix_lerp_v.weightF32[4096]
-
blk.31.time_mix_lerp_w.weightF32[4096]
-
blk.31.time_mix_lerp_x.weightF32[4096]
-
blk.31.time_mix_ln.biasF32[4096]
-
blk.31.time_mix_ln.weightF32[4096]
-
blk.31.time_mix_output.weightQ8_0[4096, 4096]
-
blk.31.time_mix_receptance.weightQ8_0[4096, 4096]
-
blk.31.time_mix_value.weightQ8_0[4096, 4096]
-
blk.31.time_mix_w1.weightF32[4096, 320]
-
blk.31.time_mix_w2.weightF32[64, 4096, 5]
-
output.weightQ8_0[4096, 65536]
-
output_norm.biasF32[4096]
-
token_embd_norm.biasF32[4096]
-
token_embd_norm.weightF32[4096]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31