4 1 week ago

tools thinking
fccdf16e67d6 · 70MB
    Metadata
  • general.architecture
    rwkv7
  • general.file_type
    BF16
  • rwkv7.attention.decay_lora_rank
    64
  • rwkv7.attention.gate_lora_rank
    128
  • rwkv7.attention.head_count
    0
  • rwkv7.attention.iclr_lora_rank
    64
  • rwkv7.attention.layer_norm_epsilon
    1e-05
  • rwkv7.attention.value_residual_mix_lora_rank
    32
  • rwkv7.block_count
    8
  • rwkv7.context_length
    1048576
  • rwkv7.embedding_length
    512
  • rwkv7.feed_forward_length
    2048
  • rwkv7.wkv.head_size
    64
  • tokenizer.ggml.add_bos_token
    false
  • tokenizer.ggml.add_eos_token
    false
  • tokenizer.ggml.bos_token_id
    1
  • tokenizer.ggml.eos_token_id
    2
  • tokenizer.ggml.merges
    [Ġ t, Ġ a, i n, h e, r e, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.pre
    gpt-2
  • tokenizer.ggml.token_type
    [3, 3, 3, 1, 1, ...]
  • tokenizer.ggml.tokens
    [<|endoftext|>, <|im_start|>, <|im_end|>, !, ", ...]
  • tokenizer.ggml.unknown_token_id
    0
  • Tensor
  • token_embd.weight
    BF16
    [512, 6400]
  • blk.0
  • blk.0.attn_norm.bias
    F32
    [512]
  • blk.0.attn_norm.weight
    F32
    [512]
  • blk.0.attn_norm_2.bias
    F32
    [512]
  • blk.0.attn_norm_2.weight
    F32
    [512]
  • blk.0.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.0.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.0.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.0.time_mix_a0.weight
    F32
    [512]
  • blk.0.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.0.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.0.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.0.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.0.time_mix_k_a.weight
    F32
    [512]
  • blk.0.time_mix_k_k.weight
    F32
    [512]
  • blk.0.time_mix_key.weight
    BF16
    [512, 512]
  • blk.0.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.0.time_mix_ln.bias
    F32
    [512]
  • blk.0.time_mix_ln.weight
    F32
    [512]
  • blk.0.time_mix_output.weight
    BF16
    [512, 512]
  • blk.0.time_mix_r_k.weight
    F32
    [512]
  • blk.0.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.0.time_mix_v0.weight
    F32
    [512]
  • blk.0.time_mix_v1.weight
    BF16
    [512, 64]
  • blk.0.time_mix_v2.weight
    BF16
    [64, 512]
  • blk.0.time_mix_value.weight
    BF16
    [512, 512]
  • blk.0.time_mix_w0.weight
    F32
    [512]
  • blk.0.time_mix_w1.weight
    F32
    [512, 64]
  • blk.0.time_mix_w2.weight
    F32
    [64, 512]
  • blk.1
  • blk.1.attn_norm.bias
    F32
    [512]
  • blk.1.attn_norm.weight
    F32
    [512]
  • blk.1.attn_norm_2.bias
    F32
    [512]
  • blk.1.attn_norm_2.weight
    F32
    [512]
  • blk.1.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.1.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.1.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.1.time_mix_a0.weight
    F32
    [512]
  • blk.1.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.1.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.1.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.1.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.1.time_mix_k_a.weight
    F32
    [512]
  • blk.1.time_mix_k_k.weight
    F32
    [512]
  • blk.1.time_mix_key.weight
    BF16
    [512, 512]
  • blk.1.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.1.time_mix_ln.bias
    F32
    [512]
  • blk.1.time_mix_ln.weight
    F32
    [512]
  • blk.1.time_mix_output.weight
    BF16
    [512, 512]
  • blk.1.time_mix_r_k.weight
    F32
    [512]
  • blk.1.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.1.time_mix_v0.weight
    F32
    [512]
  • blk.1.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.1.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.1.time_mix_value.weight
    BF16
    [512, 512]
  • blk.1.time_mix_w0.weight
    F32
    [512]
  • blk.1.time_mix_w1.weight
    F32
    [512, 64]
  • blk.1.time_mix_w2.weight
    F32
    [64, 512]
  • blk.2
  • blk.2.attn_norm.bias
    F32
    [512]
  • blk.2.attn_norm.weight
    F32
    [512]
  • blk.2.attn_norm_2.bias
    F32
    [512]
  • blk.2.attn_norm_2.weight
    F32
    [512]
  • blk.2.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.2.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.2.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.2.time_mix_a0.weight
    F32
    [512]
  • blk.2.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.2.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.2.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.2.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.2.time_mix_k_a.weight
    F32
    [512]
  • blk.2.time_mix_k_k.weight
    F32
    [512]
  • blk.2.time_mix_key.weight
    BF16
    [512, 512]
  • blk.2.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.2.time_mix_ln.bias
    F32
    [512]
  • blk.2.time_mix_ln.weight
    F32
    [512]
  • blk.2.time_mix_output.weight
    BF16
    [512, 512]
  • blk.2.time_mix_r_k.weight
    F32
    [512]
  • blk.2.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.2.time_mix_v0.weight
    F32
    [512]
  • blk.2.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.2.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.2.time_mix_value.weight
    BF16
    [512, 512]
  • blk.2.time_mix_w0.weight
    F32
    [512]
  • blk.2.time_mix_w1.weight
    F32
    [512, 64]
  • blk.2.time_mix_w2.weight
    F32
    [64, 512]
  • blk.3
  • blk.3.attn_norm.bias
    F32
    [512]
  • blk.3.attn_norm.weight
    F32
    [512]
  • blk.3.attn_norm_2.bias
    F32
    [512]
  • blk.3.attn_norm_2.weight
    F32
    [512]
  • blk.3.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.3.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.3.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.3.time_mix_a0.weight
    F32
    [512]
  • blk.3.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.3.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.3.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.3.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.3.time_mix_k_a.weight
    F32
    [512]
  • blk.3.time_mix_k_k.weight
    F32
    [512]
  • blk.3.time_mix_key.weight
    BF16
    [512, 512]
  • blk.3.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.3.time_mix_ln.bias
    F32
    [512]
  • blk.3.time_mix_ln.weight
    F32
    [512]
  • blk.3.time_mix_output.weight
    BF16
    [512, 512]
  • blk.3.time_mix_r_k.weight
    F32
    [512]
  • blk.3.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.3.time_mix_v0.weight
    F32
    [512]
  • blk.3.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.3.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.3.time_mix_value.weight
    BF16
    [512, 512]
  • blk.3.time_mix_w0.weight
    F32
    [512]
  • blk.3.time_mix_w1.weight
    F32
    [512, 64]
  • blk.3.time_mix_w2.weight
    F32
    [64, 512]
  • blk.4
  • blk.4.attn_norm.bias
    F32
    [512]
  • blk.4.attn_norm.weight
    F32
    [512]
  • blk.4.attn_norm_2.bias
    F32
    [512]
  • blk.4.attn_norm_2.weight
    F32
    [512]
  • blk.4.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.4.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.4.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.4.time_mix_a0.weight
    F32
    [512]
  • blk.4.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.4.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.4.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.4.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.4.time_mix_k_a.weight
    F32
    [512]
  • blk.4.time_mix_k_k.weight
    F32
    [512]
  • blk.4.time_mix_key.weight
    BF16
    [512, 512]
  • blk.4.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.4.time_mix_ln.bias
    F32
    [512]
  • blk.4.time_mix_ln.weight
    F32
    [512]
  • blk.4.time_mix_output.weight
    BF16
    [512, 512]
  • blk.4.time_mix_r_k.weight
    F32
    [512]
  • blk.4.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.4.time_mix_v0.weight
    F32
    [512]
  • blk.4.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.4.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.4.time_mix_value.weight
    BF16
    [512, 512]
  • blk.4.time_mix_w0.weight
    F32
    [512]
  • blk.4.time_mix_w1.weight
    F32
    [512, 64]
  • blk.4.time_mix_w2.weight
    F32
    [64, 512]
  • blk.5
  • blk.5.attn_norm.bias
    F32
    [512]
  • blk.5.attn_norm.weight
    F32
    [512]
  • blk.5.attn_norm_2.bias
    F32
    [512]
  • blk.5.attn_norm_2.weight
    F32
    [512]
  • blk.5.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.5.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.5.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.5.time_mix_a0.weight
    F32
    [512]
  • blk.5.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.5.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.5.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.5.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.5.time_mix_k_a.weight
    F32
    [512]
  • blk.5.time_mix_k_k.weight
    F32
    [512]
  • blk.5.time_mix_key.weight
    BF16
    [512, 512]
  • blk.5.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.5.time_mix_ln.bias
    F32
    [512]
  • blk.5.time_mix_ln.weight
    F32
    [512]
  • blk.5.time_mix_output.weight
    BF16
    [512, 512]
  • blk.5.time_mix_r_k.weight
    F32
    [512]
  • blk.5.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.5.time_mix_v0.weight
    F32
    [512]
  • blk.5.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.5.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.5.time_mix_value.weight
    BF16
    [512, 512]
  • blk.5.time_mix_w0.weight
    F32
    [512]
  • blk.5.time_mix_w1.weight
    F32
    [512, 64]
  • blk.5.time_mix_w2.weight
    F32
    [64, 512]
  • blk.6
  • blk.6.attn_norm.bias
    F32
    [512]
  • blk.6.attn_norm.weight
    F32
    [512]
  • blk.6.attn_norm_2.bias
    F32
    [512]
  • blk.6.attn_norm_2.weight
    F32
    [512]
  • blk.6.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.6.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.6.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.6.time_mix_a0.weight
    F32
    [512]
  • blk.6.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.6.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.6.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.6.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.6.time_mix_k_a.weight
    F32
    [512]
  • blk.6.time_mix_k_k.weight
    F32
    [512]
  • blk.6.time_mix_key.weight
    BF16
    [512, 512]
  • blk.6.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.6.time_mix_ln.bias
    F32
    [512]
  • blk.6.time_mix_ln.weight
    F32
    [512]
  • blk.6.time_mix_output.weight
    BF16
    [512, 512]
  • blk.6.time_mix_r_k.weight
    F32
    [512]
  • blk.6.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.6.time_mix_v0.weight
    F32
    [512]
  • blk.6.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.6.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.6.time_mix_value.weight
    BF16
    [512, 512]
  • blk.6.time_mix_w0.weight
    F32
    [512]
  • blk.6.time_mix_w1.weight
    F32
    [512, 64]
  • blk.6.time_mix_w2.weight
    F32
    [64, 512]
  • blk.7
  • blk.7.attn_norm.bias
    F32
    [512]
  • blk.7.attn_norm.weight
    F32
    [512]
  • blk.7.attn_norm_2.bias
    F32
    [512]
  • blk.7.attn_norm_2.weight
    F32
    [512]
  • blk.7.channel_mix_key.weight
    BF16
    [512, 2048]
  • blk.7.channel_mix_lerp_k.weight
    F32
    [512]
  • blk.7.channel_mix_value.weight
    BF16
    [2048, 512]
  • blk.7.time_mix_a0.weight
    F32
    [512]
  • blk.7.time_mix_a1.weight
    BF16
    [512, 64]
  • blk.7.time_mix_a2.weight
    BF16
    [64, 512]
  • blk.7.time_mix_g1.weight
    BF16
    [512, 128]
  • blk.7.time_mix_g2.weight
    BF16
    [128, 512]
  • blk.7.time_mix_k_a.weight
    F32
    [512]
  • blk.7.time_mix_k_k.weight
    F32
    [512]
  • blk.7.time_mix_key.weight
    BF16
    [512, 512]
  • blk.7.time_mix_lerp_fused.weight
    F32
    [512, 1, 1, 6]
  • blk.7.time_mix_ln.bias
    F32
    [512]
  • blk.7.time_mix_ln.weight
    F32
    [512]
  • blk.7.time_mix_output.weight
    BF16
    [512, 512]
  • blk.7.time_mix_r_k.weight
    F32
    [512]
  • blk.7.time_mix_receptance.weight
    BF16
    [512, 512]
  • blk.7.time_mix_v0.weight
    F32
    [512]
  • blk.7.time_mix_v1.weight
    BF16
    [512, 32]
  • blk.7.time_mix_v2.weight
    BF16
    [32, 512]
  • blk.7.time_mix_value.weight
    BF16
    [512, 512]
  • blk.7.time_mix_w0.weight
    F32
    [512]
  • blk.7.time_mix_w1.weight
    F32
    [512, 64]
  • blk.7.time_mix_w2.weight
    F32
    [64, 512]
  • output.weight
    BF16
    [512, 6400]
  • output_norm.bias
    F32
    [512]
  • token_embd_norm.bias
    F32
    [512]
  • token_embd_norm.weight
    F32
    [512]
  • output_norm.weight
    F32
    [512]