Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
ignarobledo20300
/
cyrah-rwkv
:latest
Updated
yesterday
Cancel
cyrah-rwkv:latest
...
/
model
fccdf16e67d6 · 70MB
Metadata
general.architecture
rwkv7
rwkv7
general.file_type
BF16
BF16
rwkv7.attention.decay_lora_rank
64
64
rwkv7.attention.gate_lora_rank
128
128
rwkv7.attention.head_count
0
0
rwkv7.attention.iclr_lora_rank
64
64
rwkv7.attention.layer_norm_epsilon
1e-05
1e-05
rwkv7.attention.value_residual_mix_lora_rank
32
32
rwkv7.block_count
8
8
rwkv7.context_length
1048576
1048576
rwkv7.embedding_length
512
512
rwkv7.feed_forward_length
2048
2048
rwkv7.wkv.head_size
64
64
tokenizer.ggml.add_bos_token
false
false
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
2
2
tokenizer.ggml.merges
[Ġ t, Ġ a, i n, h e, r e, ...]
[Ġ t, Ġ a, i n, h e, r e, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
0
0
tokenizer.ggml.pre
gpt-2
gpt-2
tokenizer.ggml.token_type
[3, 3, 3, 1, 1, ...]
[3, 3, 3, 1, 1, ...]
tokenizer.ggml.tokens
[<|endoftext|>, <|im_start|>, <|im_end|>, !, ", ...]
[<|endoftext|>, <|im_start|>, <|im_end|>, !, ", ...]
tokenizer.ggml.unknown_token_id
0
0
Tensor
Name
Type
Shape
token_embd.weight
BF16
BF16
[512, 6400]
blk.0
blk.0.attn_norm.bias
F32
F32
[512]
blk.0.attn_norm.weight
F32
F32
[512]
blk.0.attn_norm_2.bias
F32
F32
[512]
blk.0.attn_norm_2.weight
F32
F32
[512]
blk.0.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.0.channel_mix_lerp_k.weight
F32
F32
[512]
blk.0.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.0.time_mix_a0.weight
F32
F32
[512]
blk.0.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.0.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.0.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.0.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.0.time_mix_k_a.weight
F32
F32
[512]
blk.0.time_mix_k_k.weight
F32
F32
[512]
blk.0.time_mix_key.weight
BF16
BF16
[512, 512]
blk.0.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.0.time_mix_ln.bias
F32
F32
[512]
blk.0.time_mix_ln.weight
F32
F32
[512]
blk.0.time_mix_output.weight
BF16
BF16
[512, 512]
blk.0.time_mix_r_k.weight
F32
F32
[512]
blk.0.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.0.time_mix_v0.weight
F32
F32
[512]
blk.0.time_mix_v1.weight
BF16
BF16
[512, 64]
blk.0.time_mix_v2.weight
BF16
BF16
[64, 512]
blk.0.time_mix_value.weight
BF16
BF16
[512, 512]
blk.0.time_mix_w0.weight
F32
F32
[512]
blk.0.time_mix_w1.weight
F32
F32
[512, 64]
blk.0.time_mix_w2.weight
F32
F32
[64, 512]
blk.1
blk.1.attn_norm.bias
F32
F32
[512]
blk.1.attn_norm.weight
F32
F32
[512]
blk.1.attn_norm_2.bias
F32
F32
[512]
blk.1.attn_norm_2.weight
F32
F32
[512]
blk.1.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.1.channel_mix_lerp_k.weight
F32
F32
[512]
blk.1.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.1.time_mix_a0.weight
F32
F32
[512]
blk.1.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.1.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.1.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.1.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.1.time_mix_k_a.weight
F32
F32
[512]
blk.1.time_mix_k_k.weight
F32
F32
[512]
blk.1.time_mix_key.weight
BF16
BF16
[512, 512]
blk.1.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.1.time_mix_ln.bias
F32
F32
[512]
blk.1.time_mix_ln.weight
F32
F32
[512]
blk.1.time_mix_output.weight
BF16
BF16
[512, 512]
blk.1.time_mix_r_k.weight
F32
F32
[512]
blk.1.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.1.time_mix_v0.weight
F32
F32
[512]
blk.1.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.1.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.1.time_mix_value.weight
BF16
BF16
[512, 512]
blk.1.time_mix_w0.weight
F32
F32
[512]
blk.1.time_mix_w1.weight
F32
F32
[512, 64]
blk.1.time_mix_w2.weight
F32
F32
[64, 512]
blk.2
blk.2.attn_norm.bias
F32
F32
[512]
blk.2.attn_norm.weight
F32
F32
[512]
blk.2.attn_norm_2.bias
F32
F32
[512]
blk.2.attn_norm_2.weight
F32
F32
[512]
blk.2.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.2.channel_mix_lerp_k.weight
F32
F32
[512]
blk.2.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.2.time_mix_a0.weight
F32
F32
[512]
blk.2.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.2.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.2.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.2.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.2.time_mix_k_a.weight
F32
F32
[512]
blk.2.time_mix_k_k.weight
F32
F32
[512]
blk.2.time_mix_key.weight
BF16
BF16
[512, 512]
blk.2.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.2.time_mix_ln.bias
F32
F32
[512]
blk.2.time_mix_ln.weight
F32
F32
[512]
blk.2.time_mix_output.weight
BF16
BF16
[512, 512]
blk.2.time_mix_r_k.weight
F32
F32
[512]
blk.2.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.2.time_mix_v0.weight
F32
F32
[512]
blk.2.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.2.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.2.time_mix_value.weight
BF16
BF16
[512, 512]
blk.2.time_mix_w0.weight
F32
F32
[512]
blk.2.time_mix_w1.weight
F32
F32
[512, 64]
blk.2.time_mix_w2.weight
F32
F32
[64, 512]
blk.3
blk.3.attn_norm.bias
F32
F32
[512]
blk.3.attn_norm.weight
F32
F32
[512]
blk.3.attn_norm_2.bias
F32
F32
[512]
blk.3.attn_norm_2.weight
F32
F32
[512]
blk.3.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.3.channel_mix_lerp_k.weight
F32
F32
[512]
blk.3.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.3.time_mix_a0.weight
F32
F32
[512]
blk.3.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.3.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.3.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.3.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.3.time_mix_k_a.weight
F32
F32
[512]
blk.3.time_mix_k_k.weight
F32
F32
[512]
blk.3.time_mix_key.weight
BF16
BF16
[512, 512]
blk.3.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.3.time_mix_ln.bias
F32
F32
[512]
blk.3.time_mix_ln.weight
F32
F32
[512]
blk.3.time_mix_output.weight
BF16
BF16
[512, 512]
blk.3.time_mix_r_k.weight
F32
F32
[512]
blk.3.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.3.time_mix_v0.weight
F32
F32
[512]
blk.3.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.3.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.3.time_mix_value.weight
BF16
BF16
[512, 512]
blk.3.time_mix_w0.weight
F32
F32
[512]
blk.3.time_mix_w1.weight
F32
F32
[512, 64]
blk.3.time_mix_w2.weight
F32
F32
[64, 512]
blk.4
blk.4.attn_norm.bias
F32
F32
[512]
blk.4.attn_norm.weight
F32
F32
[512]
blk.4.attn_norm_2.bias
F32
F32
[512]
blk.4.attn_norm_2.weight
F32
F32
[512]
blk.4.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.4.channel_mix_lerp_k.weight
F32
F32
[512]
blk.4.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.4.time_mix_a0.weight
F32
F32
[512]
blk.4.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.4.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.4.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.4.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.4.time_mix_k_a.weight
F32
F32
[512]
blk.4.time_mix_k_k.weight
F32
F32
[512]
blk.4.time_mix_key.weight
BF16
BF16
[512, 512]
blk.4.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.4.time_mix_ln.bias
F32
F32
[512]
blk.4.time_mix_ln.weight
F32
F32
[512]
blk.4.time_mix_output.weight
BF16
BF16
[512, 512]
blk.4.time_mix_r_k.weight
F32
F32
[512]
blk.4.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.4.time_mix_v0.weight
F32
F32
[512]
blk.4.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.4.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.4.time_mix_value.weight
BF16
BF16
[512, 512]
blk.4.time_mix_w0.weight
F32
F32
[512]
blk.4.time_mix_w1.weight
F32
F32
[512, 64]
blk.4.time_mix_w2.weight
F32
F32
[64, 512]
blk.5
blk.5.attn_norm.bias
F32
F32
[512]
blk.5.attn_norm.weight
F32
F32
[512]
blk.5.attn_norm_2.bias
F32
F32
[512]
blk.5.attn_norm_2.weight
F32
F32
[512]
blk.5.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.5.channel_mix_lerp_k.weight
F32
F32
[512]
blk.5.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.5.time_mix_a0.weight
F32
F32
[512]
blk.5.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.5.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.5.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.5.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.5.time_mix_k_a.weight
F32
F32
[512]
blk.5.time_mix_k_k.weight
F32
F32
[512]
blk.5.time_mix_key.weight
BF16
BF16
[512, 512]
blk.5.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.5.time_mix_ln.bias
F32
F32
[512]
blk.5.time_mix_ln.weight
F32
F32
[512]
blk.5.time_mix_output.weight
BF16
BF16
[512, 512]
blk.5.time_mix_r_k.weight
F32
F32
[512]
blk.5.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.5.time_mix_v0.weight
F32
F32
[512]
blk.5.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.5.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.5.time_mix_value.weight
BF16
BF16
[512, 512]
blk.5.time_mix_w0.weight
F32
F32
[512]
blk.5.time_mix_w1.weight
F32
F32
[512, 64]
blk.5.time_mix_w2.weight
F32
F32
[64, 512]
blk.6
blk.6.attn_norm.bias
F32
F32
[512]
blk.6.attn_norm.weight
F32
F32
[512]
blk.6.attn_norm_2.bias
F32
F32
[512]
blk.6.attn_norm_2.weight
F32
F32
[512]
blk.6.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.6.channel_mix_lerp_k.weight
F32
F32
[512]
blk.6.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.6.time_mix_a0.weight
F32
F32
[512]
blk.6.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.6.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.6.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.6.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.6.time_mix_k_a.weight
F32
F32
[512]
blk.6.time_mix_k_k.weight
F32
F32
[512]
blk.6.time_mix_key.weight
BF16
BF16
[512, 512]
blk.6.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.6.time_mix_ln.bias
F32
F32
[512]
blk.6.time_mix_ln.weight
F32
F32
[512]
blk.6.time_mix_output.weight
BF16
BF16
[512, 512]
blk.6.time_mix_r_k.weight
F32
F32
[512]
blk.6.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.6.time_mix_v0.weight
F32
F32
[512]
blk.6.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.6.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.6.time_mix_value.weight
BF16
BF16
[512, 512]
blk.6.time_mix_w0.weight
F32
F32
[512]
blk.6.time_mix_w1.weight
F32
F32
[512, 64]
blk.6.time_mix_w2.weight
F32
F32
[64, 512]
blk.7
blk.7.attn_norm.bias
F32
F32
[512]
blk.7.attn_norm.weight
F32
F32
[512]
blk.7.attn_norm_2.bias
F32
F32
[512]
blk.7.attn_norm_2.weight
F32
F32
[512]
blk.7.channel_mix_key.weight
BF16
BF16
[512, 2048]
blk.7.channel_mix_lerp_k.weight
F32
F32
[512]
blk.7.channel_mix_value.weight
BF16
BF16
[2048, 512]
blk.7.time_mix_a0.weight
F32
F32
[512]
blk.7.time_mix_a1.weight
BF16
BF16
[512, 64]
blk.7.time_mix_a2.weight
BF16
BF16
[64, 512]
blk.7.time_mix_g1.weight
BF16
BF16
[512, 128]
blk.7.time_mix_g2.weight
BF16
BF16
[128, 512]
blk.7.time_mix_k_a.weight
F32
F32
[512]
blk.7.time_mix_k_k.weight
F32
F32
[512]
blk.7.time_mix_key.weight
BF16
BF16
[512, 512]
blk.7.time_mix_lerp_fused.weight
F32
F32
[512, 1, 1, 6]
blk.7.time_mix_ln.bias
F32
F32
[512]
blk.7.time_mix_ln.weight
F32
F32
[512]
blk.7.time_mix_output.weight
BF16
BF16
[512, 512]
blk.7.time_mix_r_k.weight
F32
F32
[512]
blk.7.time_mix_receptance.weight
BF16
BF16
[512, 512]
blk.7.time_mix_v0.weight
F32
F32
[512]
blk.7.time_mix_v1.weight
BF16
BF16
[512, 32]
blk.7.time_mix_v2.weight
BF16
BF16
[32, 512]
blk.7.time_mix_value.weight
BF16
BF16
[512, 512]
blk.7.time_mix_w0.weight
F32
F32
[512]
blk.7.time_mix_w1.weight
F32
F32
[512, 64]
blk.7.time_mix_w2.weight
F32
F32
[64, 512]
output.weight
BF16
BF16
[512, 6400]
output_norm.bias
F32
F32
[512]
token_embd_norm.bias
F32
F32
[512]
token_embd_norm.weight
F32
F32
[512]
output_norm.weight
F32
F32
[512]