7 4 hours ago

To act as an autonomous coding assistant capable of interacting with the local file system, reading project structures, and writing executable code directly to the disk.

vision tools thinking audio
4c27e0f5b5ad · 9.6GB
    Metadata
  • general.architecture
    gemma4
  • general.file_type
    Q4_K_M
  • gemma4.attention.head_count
    8
  • gemma4.attention.head_count_kv
    2
  • gemma4.attention.key_length
    512
  • gemma4.attention.key_length_swa
    256
  • gemma4.attention.layer_norm_rms_epsilon
    1e-06
  • gemma4.attention.shared_kv_layers
    18
  • gemma4.attention.sliding_window
    512
  • gemma4.attention.sliding_window_pattern
    [true, true, true, true, true, ...]
  • gemma4.attention.value_length
    512
  • gemma4.attention.value_length_swa
    256
  • gemma4.audio.attention.head_count
    8
  • gemma4.audio.attention.layer_norm_epsilon
    1e-06
  • gemma4.audio.block_count
    12
  • gemma4.audio.conv_kernel_size
    5
  • gemma4.audio.embedding_length
    1024
  • gemma4.audio.feed_forward_length
    4096
  • gemma4.block_count
    42
  • gemma4.context_length
    131072
  • gemma4.embedding_length
    2560
  • gemma4.embedding_length_per_layer_input
    256
  • gemma4.feed_forward_length
    10240
  • gemma4.final_logit_softcapping
    30
  • gemma4.rope.dimension_count
    512
  • gemma4.rope.dimension_count_swa
    256
  • gemma4.rope.freq_base
    1e+06
  • gemma4.rope.freq_base_swa
    10000
  • gemma4.vision.attention.head_count
    12
  • gemma4.vision.attention.layer_norm_epsilon
    1e-06
  • gemma4.vision.block_count
    16
  • gemma4.vision.embedding_length
    768
  • gemma4.vision.feed_forward_length
    3072
  • gemma4.vision.num_channels
    3
  • gemma4.vision.patch_size
    16
  • gemma4.vision.projector.scale_factor
    3
  • tokenizer.ggml.add_bos_token
    false
  • tokenizer.ggml.add_eos_token
    false
  • tokenizer.ggml.add_mask_token
    false
  • tokenizer.ggml.add_padding_token
    false
  • tokenizer.ggml.add_unknown_token
    false
  • tokenizer.ggml.bos_token_id
    2
  • tokenizer.ggml.eos_token_id
    1
  • tokenizer.ggml.eos_token_ids
    [1, 106, 50]
  • tokenizer.ggml.mask_token_id
    4
  • tokenizer.ggml.merges
    [ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
  • tokenizer.ggml.model
    llama
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.pre
    gemma4
  • tokenizer.ggml.scores
    [0, 1, 2, 3, 4, ...]
  • tokenizer.ggml.token_type
    [3, 3, 3, 3, 3, ...]
  • tokenizer.ggml.tokens
    [<pad>, <eos>, <bos>, <unk>, <mask>, ...]
  • tokenizer.ggml.unknown_token_id
    3
  • Tensor
  • token_embd.weight
    Q6_K
    [2560, 262144]
  • a.blk.0
  • a.blk.0.attn_k.input_max
    F32
    [1]
  • a.blk.0.attn_k.input_min
    F32
    [1]
  • a.blk.0.attn_k.output_max
    F32
    [1]
  • a.blk.0.attn_k.output_min
    F32
    [1]
  • a.blk.0.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.0.attn_out.input_max
    F32
    [1]
  • a.blk.0.attn_out.input_min
    F32
    [1]
  • a.blk.0.attn_out.output_max
    F32
    [1]
  • a.blk.0.attn_out.output_min
    F32
    [1]
  • a.blk.0.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.0.attn_q.input_max
    F32
    [1]
  • a.blk.0.attn_q.input_min
    F32
    [1]
  • a.blk.0.attn_q.output_max
    F32
    [1]
  • a.blk.0.attn_q.output_min
    F32
    [1]
  • a.blk.0.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.0.attn_v.input_max
    F32
    [1]
  • a.blk.0.attn_v.input_min
    F32
    [1]
  • a.blk.0.attn_v.output_max
    F32
    [1]
  • a.blk.0.attn_v.output_min
    F32
    [1]
  • a.blk.0.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.0.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.0.conv_norm.weight
    F32
    [1024]
  • a.blk.0.conv_pw1.input_max
    F32
    [1]
  • a.blk.0.conv_pw1.input_min
    F32
    [1]
  • a.blk.0.conv_pw1.output_max
    F32
    [1]
  • a.blk.0.conv_pw1.output_min
    F32
    [1]
  • a.blk.0.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.0.conv_pw2.input_max
    F32
    [1]
  • a.blk.0.conv_pw2.input_min
    F32
    [1]
  • a.blk.0.conv_pw2.output_max
    F32
    [1]
  • a.blk.0.conv_pw2.output_min
    F32
    [1]
  • a.blk.0.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.0.ffn_down.input_max
    F32
    [1]
  • a.blk.0.ffn_down.input_min
    F32
    [1]
  • a.blk.0.ffn_down.output_max
    F32
    [1]
  • a.blk.0.ffn_down.output_min
    F32
    [1]
  • a.blk.0.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.0.ffn_down_1.input_max
    F32
    [1]
  • a.blk.0.ffn_down_1.input_min
    F32
    [1]
  • a.blk.0.ffn_down_1.output_max
    F32
    [1]
  • a.blk.0.ffn_down_1.output_min
    F32
    [1]
  • a.blk.0.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.0.ffn_norm.weight
    F32
    [1024]
  • a.blk.0.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.0.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.0.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.0.ffn_up.input_max
    F32
    [1]
  • a.blk.0.ffn_up.input_min
    F32
    [1]
  • a.blk.0.ffn_up.output_max
    F32
    [1]
  • a.blk.0.ffn_up.output_min
    F32
    [1]
  • a.blk.0.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.0.ffn_up_1.input_max
    F32
    [1]
  • a.blk.0.ffn_up_1.input_min
    F32
    [1]
  • a.blk.0.ffn_up_1.output_max
    F32
    [1]
  • a.blk.0.ffn_up_1.output_min
    F32
    [1]
  • a.blk.0.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.0.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.0.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.0.ln1.weight
    F32
    [1024]
  • a.blk.0.ln2.weight
    F32
    [1024]
  • a.blk.0.norm_conv.weight
    F32
    [1024]
  • a.blk.0.per_dim_scale.weight
    F32
    [128]
  • a.blk.1
  • a.blk.1.attn_k.input_max
    F32
    [1]
  • a.blk.1.attn_k.input_min
    F32
    [1]
  • a.blk.1.attn_k.output_max
    F32
    [1]
  • a.blk.1.attn_k.output_min
    F32
    [1]
  • a.blk.1.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.1.attn_out.input_max
    F32
    [1]
  • a.blk.1.attn_out.input_min
    F32
    [1]
  • a.blk.1.attn_out.output_max
    F32
    [1]
  • a.blk.1.attn_out.output_min
    F32
    [1]
  • a.blk.1.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.1.attn_q.input_max
    F32
    [1]
  • a.blk.1.attn_q.input_min
    F32
    [1]
  • a.blk.1.attn_q.output_max
    F32
    [1]
  • a.blk.1.attn_q.output_min
    F32
    [1]
  • a.blk.1.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.1.attn_v.input_max
    F32
    [1]
  • a.blk.1.attn_v.input_min
    F32
    [1]
  • a.blk.1.attn_v.output_max
    F32
    [1]
  • a.blk.1.attn_v.output_min
    F32
    [1]
  • a.blk.1.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.1.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.1.conv_norm.weight
    F32
    [1024]
  • a.blk.1.conv_pw1.input_max
    F32
    [1]
  • a.blk.1.conv_pw1.input_min
    F32
    [1]
  • a.blk.1.conv_pw1.output_max
    F32
    [1]
  • a.blk.1.conv_pw1.output_min
    F32
    [1]
  • a.blk.1.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.1.conv_pw2.input_max
    F32
    [1]
  • a.blk.1.conv_pw2.input_min
    F32
    [1]
  • a.blk.1.conv_pw2.output_max
    F32
    [1]
  • a.blk.1.conv_pw2.output_min
    F32
    [1]
  • a.blk.1.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.1.ffn_down.input_max
    F32
    [1]
  • a.blk.1.ffn_down.input_min
    F32
    [1]
  • a.blk.1.ffn_down.output_max
    F32
    [1]
  • a.blk.1.ffn_down.output_min
    F32
    [1]
  • a.blk.1.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.1.ffn_down_1.input_max
    F32
    [1]
  • a.blk.1.ffn_down_1.input_min
    F32
    [1]
  • a.blk.1.ffn_down_1.output_max
    F32
    [1]
  • a.blk.1.ffn_down_1.output_min
    F32
    [1]
  • a.blk.1.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.1.ffn_norm.weight
    F32
    [1024]
  • a.blk.1.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.1.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.1.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.1.ffn_up.input_max
    F32
    [1]
  • a.blk.1.ffn_up.input_min
    F32
    [1]
  • a.blk.1.ffn_up.output_max
    F32
    [1]
  • a.blk.1.ffn_up.output_min
    F32
    [1]
  • a.blk.1.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.1.ffn_up_1.input_max
    F32
    [1]
  • a.blk.1.ffn_up_1.input_min
    F32
    [1]
  • a.blk.1.ffn_up_1.output_max
    F32
    [1]
  • a.blk.1.ffn_up_1.output_min
    F32
    [1]
  • a.blk.1.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.1.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.1.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.1.ln1.weight
    F32
    [1024]
  • a.blk.1.ln2.weight
    F32
    [1024]
  • a.blk.1.norm_conv.weight
    F32
    [1024]
  • a.blk.1.per_dim_scale.weight
    F32
    [128]
  • a.blk.2
  • a.blk.2.attn_k.input_max
    F32
    [1]
  • a.blk.2.attn_k.input_min
    F32
    [1]
  • a.blk.2.attn_k.output_max
    F32
    [1]
  • a.blk.2.attn_k.output_min
    F32
    [1]
  • a.blk.2.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.2.attn_out.input_max
    F32
    [1]
  • a.blk.2.attn_out.input_min
    F32
    [1]
  • a.blk.2.attn_out.output_max
    F32
    [1]
  • a.blk.2.attn_out.output_min
    F32
    [1]
  • a.blk.2.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.2.attn_q.input_max
    F32
    [1]
  • a.blk.2.attn_q.input_min
    F32
    [1]
  • a.blk.2.attn_q.output_max
    F32
    [1]
  • a.blk.2.attn_q.output_min
    F32
    [1]
  • a.blk.2.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.2.attn_v.input_max
    F32
    [1]
  • a.blk.2.attn_v.input_min
    F32
    [1]
  • a.blk.2.attn_v.output_max
    F32
    [1]
  • a.blk.2.attn_v.output_min
    F32
    [1]
  • a.blk.2.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.2.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.2.conv_norm.weight
    F32
    [1024]
  • a.blk.2.conv_pw1.input_max
    F32
    [1]
  • a.blk.2.conv_pw1.input_min
    F32
    [1]
  • a.blk.2.conv_pw1.output_max
    F32
    [1]
  • a.blk.2.conv_pw1.output_min
    F32
    [1]
  • a.blk.2.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.2.conv_pw2.input_max
    F32
    [1]
  • a.blk.2.conv_pw2.input_min
    F32
    [1]
  • a.blk.2.conv_pw2.output_max
    F32
    [1]
  • a.blk.2.conv_pw2.output_min
    F32
    [1]
  • a.blk.2.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.2.ffn_down.input_max
    F32
    [1]
  • a.blk.2.ffn_down.input_min
    F32
    [1]
  • a.blk.2.ffn_down.output_max
    F32
    [1]
  • a.blk.2.ffn_down.output_min
    F32
    [1]
  • a.blk.2.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.2.ffn_down_1.input_max
    F32
    [1]
  • a.blk.2.ffn_down_1.input_min
    F32
    [1]
  • a.blk.2.ffn_down_1.output_max
    F32
    [1]
  • a.blk.2.ffn_down_1.output_min
    F32
    [1]
  • a.blk.2.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.2.ffn_norm.weight
    F32
    [1024]
  • a.blk.2.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.2.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.2.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.2.ffn_up.input_max
    F32
    [1]
  • a.blk.2.ffn_up.input_min
    F32
    [1]
  • a.blk.2.ffn_up.output_max
    F32
    [1]
  • a.blk.2.ffn_up.output_min
    F32
    [1]
  • a.blk.2.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.2.ffn_up_1.input_max
    F32
    [1]
  • a.blk.2.ffn_up_1.input_min
    F32
    [1]
  • a.blk.2.ffn_up_1.output_max
    F32
    [1]
  • a.blk.2.ffn_up_1.output_min
    F32
    [1]
  • a.blk.2.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.2.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.2.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.2.ln1.weight
    F32
    [1024]
  • a.blk.2.ln2.weight
    F32
    [1024]
  • a.blk.2.norm_conv.weight
    F32
    [1024]
  • a.blk.2.per_dim_scale.weight
    F32
    [128]
  • a.blk.3
  • a.blk.3.attn_k.input_max
    F32
    [1]
  • a.blk.3.attn_k.input_min
    F32
    [1]
  • a.blk.3.attn_k.output_max
    F32
    [1]
  • a.blk.3.attn_k.output_min
    F32
    [1]
  • a.blk.3.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.3.attn_out.input_max
    F32
    [1]
  • a.blk.3.attn_out.input_min
    F32
    [1]
  • a.blk.3.attn_out.output_max
    F32
    [1]
  • a.blk.3.attn_out.output_min
    F32
    [1]
  • a.blk.3.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.3.attn_q.input_max
    F32
    [1]
  • a.blk.3.attn_q.input_min
    F32
    [1]
  • a.blk.3.attn_q.output_max
    F32
    [1]
  • a.blk.3.attn_q.output_min
    F32
    [1]
  • a.blk.3.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.3.attn_v.input_max
    F32
    [1]
  • a.blk.3.attn_v.input_min
    F32
    [1]
  • a.blk.3.attn_v.output_max
    F32
    [1]
  • a.blk.3.attn_v.output_min
    F32
    [1]
  • a.blk.3.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.3.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.3.conv_norm.weight
    F32
    [1024]
  • a.blk.3.conv_pw1.input_max
    F32
    [1]
  • a.blk.3.conv_pw1.input_min
    F32
    [1]
  • a.blk.3.conv_pw1.output_max
    F32
    [1]
  • a.blk.3.conv_pw1.output_min
    F32
    [1]
  • a.blk.3.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.3.conv_pw2.input_max
    F32
    [1]
  • a.blk.3.conv_pw2.input_min
    F32
    [1]
  • a.blk.3.conv_pw2.output_max
    F32
    [1]
  • a.blk.3.conv_pw2.output_min
    F32
    [1]
  • a.blk.3.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.3.ffn_down.input_max
    F32
    [1]
  • a.blk.3.ffn_down.input_min
    F32
    [1]
  • a.blk.3.ffn_down.output_max
    F32
    [1]
  • a.blk.3.ffn_down.output_min
    F32
    [1]
  • a.blk.3.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.3.ffn_down_1.input_max
    F32
    [1]
  • a.blk.3.ffn_down_1.input_min
    F32
    [1]
  • a.blk.3.ffn_down_1.output_max
    F32
    [1]
  • a.blk.3.ffn_down_1.output_min
    F32
    [1]
  • a.blk.3.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.3.ffn_norm.weight
    F32
    [1024]
  • a.blk.3.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.3.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.3.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.3.ffn_up.input_max
    F32
    [1]
  • a.blk.3.ffn_up.input_min
    F32
    [1]
  • a.blk.3.ffn_up.output_max
    F32
    [1]
  • a.blk.3.ffn_up.output_min
    F32
    [1]
  • a.blk.3.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.3.ffn_up_1.input_max
    F32
    [1]
  • a.blk.3.ffn_up_1.input_min
    F32
    [1]
  • a.blk.3.ffn_up_1.output_max
    F32
    [1]
  • a.blk.3.ffn_up_1.output_min
    F32
    [1]
  • a.blk.3.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.3.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.3.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.3.ln1.weight
    F32
    [1024]
  • a.blk.3.ln2.weight
    F32
    [1024]
  • a.blk.3.norm_conv.weight
    F32
    [1024]
  • a.blk.3.per_dim_scale.weight
    F32
    [128]
  • a.blk.4
  • a.blk.4.attn_k.input_max
    F32
    [1]
  • a.blk.4.attn_k.input_min
    F32
    [1]
  • a.blk.4.attn_k.output_max
    F32
    [1]
  • a.blk.4.attn_k.output_min
    F32
    [1]
  • a.blk.4.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.4.attn_out.input_max
    F32
    [1]
  • a.blk.4.attn_out.input_min
    F32
    [1]
  • a.blk.4.attn_out.output_max
    F32
    [1]
  • a.blk.4.attn_out.output_min
    F32
    [1]
  • a.blk.4.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.4.attn_q.input_max
    F32
    [1]
  • a.blk.4.attn_q.input_min
    F32
    [1]
  • a.blk.4.attn_q.output_max
    F32
    [1]
  • a.blk.4.attn_q.output_min
    F32
    [1]
  • a.blk.4.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.4.attn_v.input_max
    F32
    [1]
  • a.blk.4.attn_v.input_min
    F32
    [1]
  • a.blk.4.attn_v.output_max
    F32
    [1]
  • a.blk.4.attn_v.output_min
    F32
    [1]
  • a.blk.4.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.4.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.4.conv_norm.weight
    F32
    [1024]
  • a.blk.4.conv_pw1.input_max
    F32
    [1]
  • a.blk.4.conv_pw1.input_min
    F32
    [1]
  • a.blk.4.conv_pw1.output_max
    F32
    [1]
  • a.blk.4.conv_pw1.output_min
    F32
    [1]
  • a.blk.4.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.4.conv_pw2.input_max
    F32
    [1]
  • a.blk.4.conv_pw2.input_min
    F32
    [1]
  • a.blk.4.conv_pw2.output_max
    F32
    [1]
  • a.blk.4.conv_pw2.output_min
    F32
    [1]
  • a.blk.4.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.4.ffn_down.input_max
    F32
    [1]
  • a.blk.4.ffn_down.input_min
    F32
    [1]
  • a.blk.4.ffn_down.output_max
    F32
    [1]
  • a.blk.4.ffn_down.output_min
    F32
    [1]
  • a.blk.4.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.4.ffn_down_1.input_max
    F32
    [1]
  • a.blk.4.ffn_down_1.input_min
    F32
    [1]
  • a.blk.4.ffn_down_1.output_max
    F32
    [1]
  • a.blk.4.ffn_down_1.output_min
    F32
    [1]
  • a.blk.4.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.4.ffn_norm.weight
    F32
    [1024]
  • a.blk.4.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.4.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.4.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.4.ffn_up.input_max
    F32
    [1]
  • a.blk.4.ffn_up.input_min
    F32
    [1]
  • a.blk.4.ffn_up.output_max
    F32
    [1]
  • a.blk.4.ffn_up.output_min
    F32
    [1]
  • a.blk.4.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.4.ffn_up_1.input_max
    F32
    [1]
  • a.blk.4.ffn_up_1.input_min
    F32
    [1]
  • a.blk.4.ffn_up_1.output_max
    F32
    [1]
  • a.blk.4.ffn_up_1.output_min
    F32
    [1]
  • a.blk.4.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.4.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.4.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.4.ln1.weight
    F32
    [1024]
  • a.blk.4.ln2.weight
    F32
    [1024]
  • a.blk.4.norm_conv.weight
    F32
    [1024]
  • a.blk.4.per_dim_scale.weight
    F32
    [128]
  • a.blk.5
  • a.blk.5.attn_k.input_max
    F32
    [1]
  • a.blk.5.attn_k.input_min
    F32
    [1]
  • a.blk.5.attn_k.output_max
    F32
    [1]
  • a.blk.5.attn_k.output_min
    F32
    [1]
  • a.blk.5.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.5.attn_out.input_max
    F32
    [1]
  • a.blk.5.attn_out.input_min
    F32
    [1]
  • a.blk.5.attn_out.output_max
    F32
    [1]
  • a.blk.5.attn_out.output_min
    F32
    [1]
  • a.blk.5.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.5.attn_q.input_max
    F32
    [1]
  • a.blk.5.attn_q.input_min
    F32
    [1]
  • a.blk.5.attn_q.output_max
    F32
    [1]
  • a.blk.5.attn_q.output_min
    F32
    [1]
  • a.blk.5.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.5.attn_v.input_max
    F32
    [1]
  • a.blk.5.attn_v.input_min
    F32
    [1]
  • a.blk.5.attn_v.output_max
    F32
    [1]
  • a.blk.5.attn_v.output_min
    F32
    [1]
  • a.blk.5.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.5.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.5.conv_norm.weight
    F32
    [1024]
  • a.blk.5.conv_pw1.input_max
    F32
    [1]
  • a.blk.5.conv_pw1.input_min
    F32
    [1]
  • a.blk.5.conv_pw1.output_max
    F32
    [1]
  • a.blk.5.conv_pw1.output_min
    F32
    [1]
  • a.blk.5.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.5.conv_pw2.input_max
    F32
    [1]
  • a.blk.5.conv_pw2.input_min
    F32
    [1]
  • a.blk.5.conv_pw2.output_max
    F32
    [1]
  • a.blk.5.conv_pw2.output_min
    F32
    [1]
  • a.blk.5.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.5.ffn_down.input_max
    F32
    [1]
  • a.blk.5.ffn_down.input_min
    F32
    [1]
  • a.blk.5.ffn_down.output_max
    F32
    [1]
  • a.blk.5.ffn_down.output_min
    F32
    [1]
  • a.blk.5.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.5.ffn_down_1.input_max
    F32
    [1]
  • a.blk.5.ffn_down_1.input_min
    F32
    [1]
  • a.blk.5.ffn_down_1.output_max
    F32
    [1]
  • a.blk.5.ffn_down_1.output_min
    F32
    [1]
  • a.blk.5.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.5.ffn_norm.weight
    F32
    [1024]
  • a.blk.5.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.5.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.5.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.5.ffn_up.input_max
    F32
    [1]
  • a.blk.5.ffn_up.input_min
    F32
    [1]
  • a.blk.5.ffn_up.output_max
    F32
    [1]
  • a.blk.5.ffn_up.output_min
    F32
    [1]
  • a.blk.5.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.5.ffn_up_1.input_max
    F32
    [1]
  • a.blk.5.ffn_up_1.input_min
    F32
    [1]
  • a.blk.5.ffn_up_1.output_max
    F32
    [1]
  • a.blk.5.ffn_up_1.output_min
    F32
    [1]
  • a.blk.5.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.5.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.5.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.5.ln1.weight
    F32
    [1024]
  • a.blk.5.ln2.weight
    F32
    [1024]
  • a.blk.5.norm_conv.weight
    F32
    [1024]
  • a.blk.5.per_dim_scale.weight
    F32
    [128]
  • a.blk.6
  • a.blk.6.attn_k.input_max
    F32
    [1]
  • a.blk.6.attn_k.input_min
    F32
    [1]
  • a.blk.6.attn_k.output_max
    F32
    [1]
  • a.blk.6.attn_k.output_min
    F32
    [1]
  • a.blk.6.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.6.attn_out.input_max
    F32
    [1]
  • a.blk.6.attn_out.input_min
    F32
    [1]
  • a.blk.6.attn_out.output_max
    F32
    [1]
  • a.blk.6.attn_out.output_min
    F32
    [1]
  • a.blk.6.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.6.attn_q.input_max
    F32
    [1]
  • a.blk.6.attn_q.input_min
    F32
    [1]
  • a.blk.6.attn_q.output_max
    F32
    [1]
  • a.blk.6.attn_q.output_min
    F32
    [1]
  • a.blk.6.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.6.attn_v.input_max
    F32
    [1]
  • a.blk.6.attn_v.input_min
    F32
    [1]
  • a.blk.6.attn_v.output_max
    F32
    [1]
  • a.blk.6.attn_v.output_min
    F32
    [1]
  • a.blk.6.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.6.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.6.conv_norm.weight
    F32
    [1024]
  • a.blk.6.conv_pw1.input_max
    F32
    [1]
  • a.blk.6.conv_pw1.input_min
    F32
    [1]
  • a.blk.6.conv_pw1.output_max
    F32
    [1]
  • a.blk.6.conv_pw1.output_min
    F32
    [1]
  • a.blk.6.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.6.conv_pw2.input_max
    F32
    [1]
  • a.blk.6.conv_pw2.input_min
    F32
    [1]
  • a.blk.6.conv_pw2.output_max
    F32
    [1]
  • a.blk.6.conv_pw2.output_min
    F32
    [1]
  • a.blk.6.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.6.ffn_down.input_max
    F32
    [1]
  • a.blk.6.ffn_down.input_min
    F32
    [1]
  • a.blk.6.ffn_down.output_max
    F32
    [1]
  • a.blk.6.ffn_down.output_min
    F32
    [1]
  • a.blk.6.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.6.ffn_down_1.input_max
    F32
    [1]
  • a.blk.6.ffn_down_1.input_min
    F32
    [1]
  • a.blk.6.ffn_down_1.output_max
    F32
    [1]
  • a.blk.6.ffn_down_1.output_min
    F32
    [1]
  • a.blk.6.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.6.ffn_norm.weight
    F32
    [1024]
  • a.blk.6.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.6.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.6.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.6.ffn_up.input_max
    F32
    [1]
  • a.blk.6.ffn_up.input_min
    F32
    [1]
  • a.blk.6.ffn_up.output_max
    F32
    [1]
  • a.blk.6.ffn_up.output_min
    F32
    [1]
  • a.blk.6.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.6.ffn_up_1.input_max
    F32
    [1]
  • a.blk.6.ffn_up_1.input_min
    F32
    [1]
  • a.blk.6.ffn_up_1.output_max
    F32
    [1]
  • a.blk.6.ffn_up_1.output_min
    F32
    [1]
  • a.blk.6.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.6.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.6.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.6.ln1.weight
    F32
    [1024]
  • a.blk.6.ln2.weight
    F32
    [1024]
  • a.blk.6.norm_conv.weight
    F32
    [1024]
  • a.blk.6.per_dim_scale.weight
    F32
    [128]
  • a.blk.7
  • a.blk.7.attn_k.input_max
    F32
    [1]
  • a.blk.7.attn_k.input_min
    F32
    [1]
  • a.blk.7.attn_k.output_max
    F32
    [1]
  • a.blk.7.attn_k.output_min
    F32
    [1]
  • a.blk.7.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.7.attn_out.input_max
    F32
    [1]
  • a.blk.7.attn_out.input_min
    F32
    [1]
  • a.blk.7.attn_out.output_max
    F32
    [1]
  • a.blk.7.attn_out.output_min
    F32
    [1]
  • a.blk.7.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.7.attn_q.input_max
    F32
    [1]
  • a.blk.7.attn_q.input_min
    F32
    [1]
  • a.blk.7.attn_q.output_max
    F32
    [1]
  • a.blk.7.attn_q.output_min
    F32
    [1]
  • a.blk.7.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.7.attn_v.input_max
    F32
    [1]
  • a.blk.7.attn_v.input_min
    F32
    [1]
  • a.blk.7.attn_v.output_max
    F32
    [1]
  • a.blk.7.attn_v.output_min
    F32
    [1]
  • a.blk.7.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.7.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.7.conv_norm.weight
    F32
    [1024]
  • a.blk.7.conv_pw1.input_max
    F32
    [1]
  • a.blk.7.conv_pw1.input_min
    F32
    [1]
  • a.blk.7.conv_pw1.output_max
    F32
    [1]
  • a.blk.7.conv_pw1.output_min
    F32
    [1]
  • a.blk.7.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.7.conv_pw2.input_max
    F32
    [1]
  • a.blk.7.conv_pw2.input_min
    F32
    [1]
  • a.blk.7.conv_pw2.output_max
    F32
    [1]
  • a.blk.7.conv_pw2.output_min
    F32
    [1]
  • a.blk.7.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.7.ffn_down.input_max
    F32
    [1]
  • a.blk.7.ffn_down.input_min
    F32
    [1]
  • a.blk.7.ffn_down.output_max
    F32
    [1]
  • a.blk.7.ffn_down.output_min
    F32
    [1]
  • a.blk.7.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.7.ffn_down_1.input_max
    F32
    [1]
  • a.blk.7.ffn_down_1.input_min
    F32
    [1]
  • a.blk.7.ffn_down_1.output_max
    F32
    [1]
  • a.blk.7.ffn_down_1.output_min
    F32
    [1]
  • a.blk.7.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.7.ffn_norm.weight
    F32
    [1024]
  • a.blk.7.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.7.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.7.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.7.ffn_up.input_max
    F32
    [1]
  • a.blk.7.ffn_up.input_min
    F32
    [1]
  • a.blk.7.ffn_up.output_max
    F32
    [1]
  • a.blk.7.ffn_up.output_min
    F32
    [1]
  • a.blk.7.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.7.ffn_up_1.input_max
    F32
    [1]
  • a.blk.7.ffn_up_1.input_min
    F32
    [1]
  • a.blk.7.ffn_up_1.output_max
    F32
    [1]
  • a.blk.7.ffn_up_1.output_min
    F32
    [1]
  • a.blk.7.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.7.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.7.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.7.ln1.weight
    F32
    [1024]
  • a.blk.7.ln2.weight
    F32
    [1024]
  • a.blk.7.norm_conv.weight
    F32
    [1024]
  • a.blk.7.per_dim_scale.weight
    F32
    [128]
  • a.blk.8
  • a.blk.8.attn_k.input_max
    F32
    [1]
  • a.blk.8.attn_k.input_min
    F32
    [1]
  • a.blk.8.attn_k.output_max
    F32
    [1]
  • a.blk.8.attn_k.output_min
    F32
    [1]
  • a.blk.8.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.8.attn_out.input_max
    F32
    [1]
  • a.blk.8.attn_out.input_min
    F32
    [1]
  • a.blk.8.attn_out.output_max
    F32
    [1]
  • a.blk.8.attn_out.output_min
    F32
    [1]
  • a.blk.8.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.8.attn_q.input_max
    F32
    [1]
  • a.blk.8.attn_q.input_min
    F32
    [1]
  • a.blk.8.attn_q.output_max
    F32
    [1]
  • a.blk.8.attn_q.output_min
    F32
    [1]
  • a.blk.8.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.8.attn_v.input_max
    F32
    [1]
  • a.blk.8.attn_v.input_min
    F32
    [1]
  • a.blk.8.attn_v.output_max
    F32
    [1]
  • a.blk.8.attn_v.output_min
    F32
    [1]
  • a.blk.8.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.8.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.8.conv_norm.weight
    F32
    [1024]
  • a.blk.8.conv_pw1.input_max
    F32
    [1]
  • a.blk.8.conv_pw1.input_min
    F32
    [1]
  • a.blk.8.conv_pw1.output_max
    F32
    [1]
  • a.blk.8.conv_pw1.output_min
    F32
    [1]
  • a.blk.8.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.8.conv_pw2.input_max
    F32
    [1]
  • a.blk.8.conv_pw2.input_min
    F32
    [1]
  • a.blk.8.conv_pw2.output_max
    F32
    [1]
  • a.blk.8.conv_pw2.output_min
    F32
    [1]
  • a.blk.8.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.8.ffn_down.input_max
    F32
    [1]
  • a.blk.8.ffn_down.input_min
    F32
    [1]
  • a.blk.8.ffn_down.output_max
    F32
    [1]
  • a.blk.8.ffn_down.output_min
    F32
    [1]
  • a.blk.8.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.8.ffn_down_1.input_max
    F32
    [1]
  • a.blk.8.ffn_down_1.input_min
    F32
    [1]
  • a.blk.8.ffn_down_1.output_max
    F32
    [1]
  • a.blk.8.ffn_down_1.output_min
    F32
    [1]
  • a.blk.8.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.8.ffn_norm.weight
    F32
    [1024]
  • a.blk.8.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.8.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.8.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.8.ffn_up.input_max
    F32
    [1]
  • a.blk.8.ffn_up.input_min
    F32
    [1]
  • a.blk.8.ffn_up.output_max
    F32
    [1]
  • a.blk.8.ffn_up.output_min
    F32
    [1]
  • a.blk.8.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.8.ffn_up_1.input_max
    F32
    [1]
  • a.blk.8.ffn_up_1.input_min
    F32
    [1]
  • a.blk.8.ffn_up_1.output_max
    F32
    [1]
  • a.blk.8.ffn_up_1.output_min
    F32
    [1]
  • a.blk.8.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.8.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.8.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.8.ln1.weight
    F32
    [1024]
  • a.blk.8.ln2.weight
    F32
    [1024]
  • a.blk.8.norm_conv.weight
    F32
    [1024]
  • a.blk.8.per_dim_scale.weight
    F32
    [128]
  • a.blk.9
  • a.blk.9.attn_k.input_max
    F32
    [1]
  • a.blk.9.attn_k.input_min
    F32
    [1]
  • a.blk.9.attn_k.output_max
    F32
    [1]
  • a.blk.9.attn_k.output_min
    F32
    [1]
  • a.blk.9.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.9.attn_out.input_max
    F32
    [1]
  • a.blk.9.attn_out.input_min
    F32
    [1]
  • a.blk.9.attn_out.output_max
    F32
    [1]
  • a.blk.9.attn_out.output_min
    F32
    [1]
  • a.blk.9.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.9.attn_q.input_max
    F32
    [1]
  • a.blk.9.attn_q.input_min
    F32
    [1]
  • a.blk.9.attn_q.output_max
    F32
    [1]
  • a.blk.9.attn_q.output_min
    F32
    [1]
  • a.blk.9.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.9.attn_v.input_max
    F32
    [1]
  • a.blk.9.attn_v.input_min
    F32
    [1]
  • a.blk.9.attn_v.output_max
    F32
    [1]
  • a.blk.9.attn_v.output_min
    F32
    [1]
  • a.blk.9.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.9.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.9.conv_norm.weight
    F32
    [1024]
  • a.blk.9.conv_pw1.input_max
    F32
    [1]
  • a.blk.9.conv_pw1.input_min
    F32
    [1]
  • a.blk.9.conv_pw1.output_max
    F32
    [1]
  • a.blk.9.conv_pw1.output_min
    F32
    [1]
  • a.blk.9.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.9.conv_pw2.input_max
    F32
    [1]
  • a.blk.9.conv_pw2.input_min
    F32
    [1]
  • a.blk.9.conv_pw2.output_max
    F32
    [1]
  • a.blk.9.conv_pw2.output_min
    F32
    [1]
  • a.blk.9.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.9.ffn_down.input_max
    F32
    [1]
  • a.blk.9.ffn_down.input_min
    F32
    [1]
  • a.blk.9.ffn_down.output_max
    F32
    [1]
  • a.blk.9.ffn_down.output_min
    F32
    [1]
  • a.blk.9.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.9.ffn_down_1.input_max
    F32
    [1]
  • a.blk.9.ffn_down_1.input_min
    F32
    [1]
  • a.blk.9.ffn_down_1.output_max
    F32
    [1]
  • a.blk.9.ffn_down_1.output_min
    F32
    [1]
  • a.blk.9.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.9.ffn_norm.weight
    F32
    [1024]
  • a.blk.9.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.9.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.9.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.9.ffn_up.input_max
    F32
    [1]
  • a.blk.9.ffn_up.input_min
    F32
    [1]
  • a.blk.9.ffn_up.output_max
    F32
    [1]
  • a.blk.9.ffn_up.output_min
    F32
    [1]
  • a.blk.9.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.9.ffn_up_1.input_max
    F32
    [1]
  • a.blk.9.ffn_up_1.input_min
    F32
    [1]
  • a.blk.9.ffn_up_1.output_max
    F32
    [1]
  • a.blk.9.ffn_up_1.output_min
    F32
    [1]
  • a.blk.9.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.9.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.9.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.9.ln1.weight
    F32
    [1024]
  • a.blk.9.ln2.weight
    F32
    [1024]
  • a.blk.9.norm_conv.weight
    F32
    [1024]
  • a.blk.9.per_dim_scale.weight
    F32
    [128]
  • a.blk.10
  • a.blk.10.attn_k.input_max
    F32
    [1]
  • a.blk.10.attn_k.input_min
    F32
    [1]
  • a.blk.10.attn_k.output_max
    F32
    [1]
  • a.blk.10.attn_k.output_min
    F32
    [1]
  • a.blk.10.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.10.attn_out.input_max
    F32
    [1]
  • a.blk.10.attn_out.input_min
    F32
    [1]
  • a.blk.10.attn_out.output_max
    F32
    [1]
  • a.blk.10.attn_out.output_min
    F32
    [1]
  • a.blk.10.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.10.attn_q.input_max
    F32
    [1]
  • a.blk.10.attn_q.input_min
    F32
    [1]
  • a.blk.10.attn_q.output_max
    F32
    [1]
  • a.blk.10.attn_q.output_min
    F32
    [1]
  • a.blk.10.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.10.attn_v.input_max
    F32
    [1]
  • a.blk.10.attn_v.input_min
    F32
    [1]
  • a.blk.10.attn_v.output_max
    F32
    [1]
  • a.blk.10.attn_v.output_min
    F32
    [1]
  • a.blk.10.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.10.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.10.conv_norm.weight
    F32
    [1024]
  • a.blk.10.conv_pw1.input_max
    F32
    [1]
  • a.blk.10.conv_pw1.input_min
    F32
    [1]
  • a.blk.10.conv_pw1.output_max
    F32
    [1]
  • a.blk.10.conv_pw1.output_min
    F32
    [1]
  • a.blk.10.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.10.conv_pw2.input_max
    F32
    [1]
  • a.blk.10.conv_pw2.input_min
    F32
    [1]
  • a.blk.10.conv_pw2.output_max
    F32
    [1]
  • a.blk.10.conv_pw2.output_min
    F32
    [1]
  • a.blk.10.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.10.ffn_down.input_max
    F32
    [1]
  • a.blk.10.ffn_down.input_min
    F32
    [1]
  • a.blk.10.ffn_down.output_max
    F32
    [1]
  • a.blk.10.ffn_down.output_min
    F32
    [1]
  • a.blk.10.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.10.ffn_down_1.input_max
    F32
    [1]
  • a.blk.10.ffn_down_1.input_min
    F32
    [1]
  • a.blk.10.ffn_down_1.output_max
    F32
    [1]
  • a.blk.10.ffn_down_1.output_min
    F32
    [1]
  • a.blk.10.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.10.ffn_norm.weight
    F32
    [1024]
  • a.blk.10.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.10.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.10.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.10.ffn_up.input_max
    F32
    [1]
  • a.blk.10.ffn_up.input_min
    F32
    [1]
  • a.blk.10.ffn_up.output_max
    F32
    [1]
  • a.blk.10.ffn_up.output_min
    F32
    [1]
  • a.blk.10.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.10.ffn_up_1.input_max
    F32
    [1]
  • a.blk.10.ffn_up_1.input_min
    F32
    [1]
  • a.blk.10.ffn_up_1.output_max
    F32
    [1]
  • a.blk.10.ffn_up_1.output_min
    F32
    [1]
  • a.blk.10.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.10.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.10.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.10.ln1.weight
    F32
    [1024]
  • a.blk.10.ln2.weight
    F32
    [1024]
  • a.blk.10.norm_conv.weight
    F32
    [1024]
  • a.blk.10.per_dim_scale.weight
    F32
    [128]
  • a.blk.11
  • a.blk.11.attn_k.input_max
    F32
    [1]
  • a.blk.11.attn_k.input_min
    F32
    [1]
  • a.blk.11.attn_k.output_max
    F32
    [1]
  • a.blk.11.attn_k.output_min
    F32
    [1]
  • a.blk.11.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.11.attn_out.input_max
    F32
    [1]
  • a.blk.11.attn_out.input_min
    F32
    [1]
  • a.blk.11.attn_out.output_max
    F32
    [1]
  • a.blk.11.attn_out.output_min
    F32
    [1]
  • a.blk.11.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.11.attn_q.input_max
    F32
    [1]
  • a.blk.11.attn_q.input_min
    F32
    [1]
  • a.blk.11.attn_q.output_max
    F32
    [1]
  • a.blk.11.attn_q.output_min
    F32
    [1]
  • a.blk.11.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.11.attn_v.input_max
    F32
    [1]
  • a.blk.11.attn_v.input_min
    F32
    [1]
  • a.blk.11.attn_v.output_max
    F32
    [1]
  • a.blk.11.attn_v.output_min
    F32
    [1]
  • a.blk.11.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.11.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.11.conv_norm.weight
    F32
    [1024]
  • a.blk.11.conv_pw1.input_max
    F32
    [1]
  • a.blk.11.conv_pw1.input_min
    F32
    [1]
  • a.blk.11.conv_pw1.output_max
    F32
    [1]
  • a.blk.11.conv_pw1.output_min
    F32
    [1]
  • a.blk.11.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.11.conv_pw2.input_max
    F32
    [1]
  • a.blk.11.conv_pw2.input_min
    F32
    [1]
  • a.blk.11.conv_pw2.output_max
    F32
    [1]
  • a.blk.11.conv_pw2.output_min
    F32
    [1]
  • a.blk.11.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.11.ffn_down.input_max
    F32
    [1]
  • a.blk.11.ffn_down.input_min
    F32
    [1]
  • a.blk.11.ffn_down.output_max
    F32
    [1]
  • a.blk.11.ffn_down.output_min
    F32
    [1]
  • a.blk.11.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.11.ffn_down_1.input_max
    F32
    [1]
  • a.blk.11.ffn_down_1.input_min
    F32
    [1]
  • a.blk.11.ffn_down_1.output_max
    F32
    [1]
  • a.blk.11.ffn_down_1.output_min
    F32
    [1]
  • a.blk.11.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.11.ffn_norm.weight
    F32
    [1024]
  • a.blk.11.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.11.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.11.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.11.ffn_up.input_max
    F32
    [1]
  • a.blk.11.ffn_up.input_min
    F32
    [1]
  • a.blk.11.ffn_up.output_max
    F32
    [1]
  • a.blk.11.ffn_up.output_min
    F32
    [1]
  • a.blk.11.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.11.ffn_up_1.input_max
    F32
    [1]
  • a.blk.11.ffn_up_1.input_min
    F32
    [1]
  • a.blk.11.ffn_up_1.output_max
    F32
    [1]
  • a.blk.11.ffn_up_1.output_min
    F32
    [1]
  • a.blk.11.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.11.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.11.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.11.ln1.weight
    F32
    [1024]
  • a.blk.11.ln2.weight
    F32
    [1024]
  • a.blk.11.norm_conv.weight
    F32
    [1024]
  • a.blk.11.per_dim_scale.weight
    F32
    [128]
  • a.conv1d.0.norm.weight
    F32
    [128]
  • a.conv1d.0.weight
    F32
    [3, 3, 1, 128]
  • a.conv1d.1.norm.weight
    F32
    [32]
  • a.conv1d.1.weight
    F32
    [3, 3, 128, 32]
  • a.pre_encode.out.weight
    BF16
    [1024, 1024]
  • blk.0
  • blk.0.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.0.attn_k_norm.weight
    F32
    [256]
  • blk.0.attn_norm.weight
    F32
    [2560]
  • blk.0.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.0.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.0.attn_q_norm.weight
    F32
    [256]
  • blk.0.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.0.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.0.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.0.ffn_norm.weight
    F32
    [2560]
  • blk.0.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.0.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.0.layer_output_scale.weight
    F32
    [1]
  • blk.0.post_attention_norm.weight
    F32
    [2560]
  • blk.0.post_ffw_norm.weight
    F32
    [2560]
  • blk.0.post_norm.weight
    F32
    [2560]
  • blk.0.proj.weight
    Q4_K
    [256, 2560]
  • blk.1
  • blk.1.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.1.attn_k_norm.weight
    F32
    [256]
  • blk.1.attn_norm.weight
    F32
    [2560]
  • blk.1.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.1.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.1.attn_q_norm.weight
    F32
    [256]
  • blk.1.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.1.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.1.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.1.ffn_norm.weight
    F32
    [2560]
  • blk.1.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.1.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.1.layer_output_scale.weight
    F32
    [1]
  • blk.1.post_attention_norm.weight
    F32
    [2560]
  • blk.1.post_ffw_norm.weight
    F32
    [2560]
  • blk.1.post_norm.weight
    F32
    [2560]
  • blk.1.proj.weight
    Q4_K
    [256, 2560]
  • blk.2
  • blk.2.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.2.attn_k_norm.weight
    F32
    [256]
  • blk.2.attn_norm.weight
    F32
    [2560]
  • blk.2.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.2.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.2.attn_q_norm.weight
    F32
    [256]
  • blk.2.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.2.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.2.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.2.ffn_norm.weight
    F32
    [2560]
  • blk.2.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.2.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.2.layer_output_scale.weight
    F32
    [1]
  • blk.2.post_attention_norm.weight
    F32
    [2560]
  • blk.2.post_ffw_norm.weight
    F32
    [2560]
  • blk.2.post_norm.weight
    F32
    [2560]
  • blk.2.proj.weight
    Q4_K
    [256, 2560]
  • blk.3
  • blk.3.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.3.attn_k_norm.weight
    F32
    [256]
  • blk.3.attn_norm.weight
    F32
    [2560]
  • blk.3.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.3.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.3.attn_q_norm.weight
    F32
    [256]
  • blk.3.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.3.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.3.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.3.ffn_norm.weight
    F32
    [2560]
  • blk.3.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.3.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.3.layer_output_scale.weight
    F32
    [1]
  • blk.3.post_attention_norm.weight
    F32
    [2560]
  • blk.3.post_ffw_norm.weight
    F32
    [2560]
  • blk.3.post_norm.weight
    F32
    [2560]
  • blk.3.proj.weight
    Q4_K
    [256, 2560]
  • blk.4
  • blk.4.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.4.attn_k_norm.weight
    F32
    [256]
  • blk.4.attn_norm.weight
    F32
    [2560]
  • blk.4.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.4.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.4.attn_q_norm.weight
    F32
    [256]
  • blk.4.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.4.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.4.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.4.ffn_norm.weight
    F32
    [2560]
  • blk.4.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.4.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.4.layer_output_scale.weight
    F32
    [1]
  • blk.4.post_attention_norm.weight
    F32
    [2560]
  • blk.4.post_ffw_norm.weight
    F32
    [2560]
  • blk.4.post_norm.weight
    F32
    [2560]
  • blk.4.proj.weight
    Q4_K
    [256, 2560]
  • blk.5
  • blk.5.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.5.attn_k_norm.weight
    F32
    [512]
  • blk.5.attn_norm.weight
    F32
    [2560]
  • blk.5.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.5.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.5.attn_q_norm.weight
    F32
    [512]
  • blk.5.attn_v.weight
    Q6_K
    [2560, 1024]
  • blk.5.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.5.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.5.ffn_norm.weight
    F32
    [2560]
  • blk.5.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.5.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.5.layer_output_scale.weight
    F32
    [1]
  • blk.5.post_attention_norm.weight
    F32
    [2560]
  • blk.5.post_ffw_norm.weight
    F32
    [2560]
  • blk.5.post_norm.weight
    F32
    [2560]
  • blk.5.proj.weight
    Q4_K
    [256, 2560]
  • blk.6
  • blk.6.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.6.attn_k_norm.weight
    F32
    [256]
  • blk.6.attn_norm.weight
    F32
    [2560]
  • blk.6.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.6.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.6.attn_q_norm.weight
    F32
    [256]
  • blk.6.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.6.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.6.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.6.ffn_norm.weight
    F32
    [2560]
  • blk.6.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.6.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.6.layer_output_scale.weight
    F32
    [1]
  • blk.6.post_attention_norm.weight
    F32
    [2560]
  • blk.6.post_ffw_norm.weight
    F32
    [2560]
  • blk.6.post_norm.weight
    F32
    [2560]
  • blk.6.proj.weight
    Q4_K
    [256, 2560]
  • blk.7
  • blk.7.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.7.attn_k_norm.weight
    F32
    [256]
  • blk.7.attn_norm.weight
    F32
    [2560]
  • blk.7.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.7.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.7.attn_q_norm.weight
    F32
    [256]
  • blk.7.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.7.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.7.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.7.ffn_norm.weight
    F32
    [2560]
  • blk.7.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.7.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.7.layer_output_scale.weight
    F32
    [1]
  • blk.7.post_attention_norm.weight
    F32
    [2560]
  • blk.7.post_ffw_norm.weight
    F32
    [2560]
  • blk.7.post_norm.weight
    F32
    [2560]
  • blk.7.proj.weight
    Q4_K
    [256, 2560]
  • blk.8
  • blk.8.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.8.attn_k_norm.weight
    F32
    [256]
  • blk.8.attn_norm.weight
    F32
    [2560]
  • blk.8.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.8.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.8.attn_q_norm.weight
    F32
    [256]
  • blk.8.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.8.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.8.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.8.ffn_norm.weight
    F32
    [2560]
  • blk.8.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.8.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.8.layer_output_scale.weight
    F32
    [1]
  • blk.8.post_attention_norm.weight
    F32
    [2560]
  • blk.8.post_ffw_norm.weight
    F32
    [2560]
  • blk.8.post_norm.weight
    F32
    [2560]
  • blk.8.proj.weight
    Q4_K
    [256, 2560]
  • blk.9
  • blk.9.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.9.attn_k_norm.weight
    F32
    [256]
  • blk.9.attn_norm.weight
    F32
    [2560]
  • blk.9.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.9.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.9.attn_q_norm.weight
    F32
    [256]
  • blk.9.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.9.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.9.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.9.ffn_norm.weight
    F32
    [2560]
  • blk.9.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.9.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.9.layer_output_scale.weight
    F32
    [1]
  • blk.9.post_attention_norm.weight
    F32
    [2560]
  • blk.9.post_ffw_norm.weight
    F32
    [2560]
  • blk.9.post_norm.weight
    F32
    [2560]
  • blk.9.proj.weight
    Q4_K
    [256, 2560]
  • blk.10
  • blk.10.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.10.attn_k_norm.weight
    F32
    [256]
  • blk.10.attn_norm.weight
    F32
    [2560]
  • blk.10.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.10.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.10.attn_q_norm.weight
    F32
    [256]
  • blk.10.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.10.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.10.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.10.ffn_norm.weight
    F32
    [2560]
  • blk.10.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.10.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.10.layer_output_scale.weight
    F32
    [1]
  • blk.10.post_attention_norm.weight
    F32
    [2560]
  • blk.10.post_ffw_norm.weight
    F32
    [2560]
  • blk.10.post_norm.weight
    F32
    [2560]
  • blk.10.proj.weight
    Q4_K
    [256, 2560]
  • blk.11
  • blk.11.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.11.attn_k_norm.weight
    F32
    [512]
  • blk.11.attn_norm.weight
    F32
    [2560]
  • blk.11.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.11.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.11.attn_q_norm.weight
    F32
    [512]
  • blk.11.attn_v.weight
    Q4_K
    [2560, 1024]
  • blk.11.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.11.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.11.ffn_norm.weight
    F32
    [2560]
  • blk.11.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.11.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.11.layer_output_scale.weight
    F32
    [1]
  • blk.11.post_attention_norm.weight
    F32
    [2560]
  • blk.11.post_ffw_norm.weight
    F32
    [2560]
  • blk.11.post_norm.weight
    F32
    [2560]
  • blk.11.proj.weight
    Q4_K
    [256, 2560]
  • blk.12
  • blk.12.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.12.attn_k_norm.weight
    F32
    [256]
  • blk.12.attn_norm.weight
    F32
    [2560]
  • blk.12.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.12.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.12.attn_q_norm.weight
    F32
    [256]
  • blk.12.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.12.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.12.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.12.ffn_norm.weight
    F32
    [2560]
  • blk.12.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.12.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.12.layer_output_scale.weight
    F32
    [1]
  • blk.12.post_attention_norm.weight
    F32
    [2560]
  • blk.12.post_ffw_norm.weight
    F32
    [2560]
  • blk.12.post_norm.weight
    F32
    [2560]
  • blk.12.proj.weight
    Q4_K
    [256, 2560]
  • blk.13
  • blk.13.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.13.attn_k_norm.weight
    F32
    [256]
  • blk.13.attn_norm.weight
    F32
    [2560]
  • blk.13.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.13.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.13.attn_q_norm.weight
    F32
    [256]
  • blk.13.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.13.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.13.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.13.ffn_norm.weight
    F32
    [2560]
  • blk.13.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.13.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.13.layer_output_scale.weight
    F32
    [1]
  • blk.13.post_attention_norm.weight
    F32
    [2560]
  • blk.13.post_ffw_norm.weight
    F32
    [2560]
  • blk.13.post_norm.weight
    F32
    [2560]
  • blk.13.proj.weight
    Q4_K
    [256, 2560]
  • blk.14
  • blk.14.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.14.attn_k_norm.weight
    F32
    [256]
  • blk.14.attn_norm.weight
    F32
    [2560]
  • blk.14.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.14.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.14.attn_q_norm.weight
    F32
    [256]
  • blk.14.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.14.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.14.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.14.ffn_norm.weight
    F32
    [2560]
  • blk.14.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.14.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.14.layer_output_scale.weight
    F32
    [1]
  • blk.14.post_attention_norm.weight
    F32
    [2560]
  • blk.14.post_ffw_norm.weight
    F32
    [2560]
  • blk.14.post_norm.weight
    F32
    [2560]
  • blk.14.proj.weight
    Q4_K
    [256, 2560]
  • blk.15
  • blk.15.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.15.attn_k_norm.weight
    F32
    [256]
  • blk.15.attn_norm.weight
    F32
    [2560]
  • blk.15.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.15.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.15.attn_q_norm.weight
    F32
    [256]
  • blk.15.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.15.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.15.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.15.ffn_norm.weight
    F32
    [2560]
  • blk.15.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.15.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.15.layer_output_scale.weight
    F32
    [1]
  • blk.15.post_attention_norm.weight
    F32
    [2560]
  • blk.15.post_ffw_norm.weight
    F32
    [2560]
  • blk.15.post_norm.weight
    F32
    [2560]
  • blk.15.proj.weight
    Q4_K
    [256, 2560]
  • blk.16
  • blk.16.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.16.attn_k_norm.weight
    F32
    [256]
  • blk.16.attn_norm.weight
    F32
    [2560]
  • blk.16.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.16.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.16.attn_q_norm.weight
    F32
    [256]
  • blk.16.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.16.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.16.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.16.ffn_norm.weight
    F32
    [2560]
  • blk.16.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.16.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.16.layer_output_scale.weight
    F32
    [1]
  • blk.16.post_attention_norm.weight
    F32
    [2560]
  • blk.16.post_ffw_norm.weight
    F32
    [2560]
  • blk.16.post_norm.weight
    F32
    [2560]
  • blk.16.proj.weight
    Q4_K
    [256, 2560]
  • blk.17
  • blk.17.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.17.attn_k_norm.weight
    F32
    [512]
  • blk.17.attn_norm.weight
    F32
    [2560]
  • blk.17.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.17.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.17.attn_q_norm.weight
    F32
    [512]
  • blk.17.attn_v.weight
    Q4_K
    [2560, 1024]
  • blk.17.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.17.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.17.ffn_norm.weight
    F32
    [2560]
  • blk.17.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.17.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.17.layer_output_scale.weight
    F32
    [1]
  • blk.17.post_attention_norm.weight
    F32
    [2560]
  • blk.17.post_ffw_norm.weight
    F32
    [2560]
  • blk.17.post_norm.weight
    F32
    [2560]
  • blk.17.proj.weight
    Q4_K
    [256, 2560]
  • blk.18
  • blk.18.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.18.attn_k_norm.weight
    F32
    [256]
  • blk.18.attn_norm.weight
    F32
    [2560]
  • blk.18.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.18.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.18.attn_q_norm.weight
    F32
    [256]
  • blk.18.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.18.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.18.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.18.ffn_norm.weight
    F32
    [2560]
  • blk.18.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.18.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.18.layer_output_scale.weight
    F32
    [1]
  • blk.18.post_attention_norm.weight
    F32
    [2560]
  • blk.18.post_ffw_norm.weight
    F32
    [2560]
  • blk.18.post_norm.weight
    F32
    [2560]
  • blk.18.proj.weight
    Q4_K
    [256, 2560]
  • blk.19
  • blk.19.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.19.attn_k_norm.weight
    F32
    [256]
  • blk.19.attn_norm.weight
    F32
    [2560]
  • blk.19.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.19.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.19.attn_q_norm.weight
    F32
    [256]
  • blk.19.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.19.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.19.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.19.ffn_norm.weight
    F32
    [2560]
  • blk.19.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.19.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.19.layer_output_scale.weight
    F32
    [1]
  • blk.19.post_attention_norm.weight
    F32
    [2560]
  • blk.19.post_ffw_norm.weight
    F32
    [2560]
  • blk.19.post_norm.weight
    F32
    [2560]
  • blk.19.proj.weight
    Q4_K
    [256, 2560]
  • blk.20
  • blk.20.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.20.attn_k_norm.weight
    F32
    [256]
  • blk.20.attn_norm.weight
    F32
    [2560]
  • blk.20.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.20.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.20.attn_q_norm.weight
    F32
    [256]
  • blk.20.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.20.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.20.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.20.ffn_norm.weight
    F32
    [2560]
  • blk.20.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.20.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.20.layer_output_scale.weight
    F32
    [1]
  • blk.20.post_attention_norm.weight
    F32
    [2560]
  • blk.20.post_ffw_norm.weight
    F32
    [2560]
  • blk.20.post_norm.weight
    F32
    [2560]
  • blk.20.proj.weight
    Q4_K
    [256, 2560]
  • blk.21
  • blk.21.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.21.attn_k_norm.weight
    F32
    [256]
  • blk.21.attn_norm.weight
    F32
    [2560]
  • blk.21.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.21.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.21.attn_q_norm.weight
    F32
    [256]
  • blk.21.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.21.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.21.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.21.ffn_norm.weight
    F32
    [2560]
  • blk.21.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.21.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.21.layer_output_scale.weight
    F32
    [1]
  • blk.21.post_attention_norm.weight
    F32
    [2560]
  • blk.21.post_ffw_norm.weight
    F32
    [2560]
  • blk.21.post_norm.weight
    F32
    [2560]
  • blk.21.proj.weight
    Q4_K
    [256, 2560]
  • blk.22
  • blk.22.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.22.attn_k_norm.weight
    F32
    [256]
  • blk.22.attn_norm.weight
    F32
    [2560]
  • blk.22.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.22.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.22.attn_q_norm.weight
    F32
    [256]
  • blk.22.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.22.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.22.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.22.ffn_norm.weight
    F32
    [2560]
  • blk.22.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.22.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.22.layer_output_scale.weight
    F32
    [1]
  • blk.22.post_attention_norm.weight
    F32
    [2560]
  • blk.22.post_ffw_norm.weight
    F32
    [2560]
  • blk.22.post_norm.weight
    F32
    [2560]
  • blk.22.proj.weight
    Q4_K
    [256, 2560]
  • blk.23
  • blk.23.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.23.attn_k_norm.weight
    F32
    [512]
  • blk.23.attn_norm.weight
    F32
    [2560]
  • blk.23.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.23.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.23.attn_q_norm.weight
    F32
    [512]
  • blk.23.attn_v.weight
    Q4_K
    [2560, 1024]
  • blk.23.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.23.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.23.ffn_norm.weight
    F32
    [2560]
  • blk.23.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.23.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.23.layer_output_scale.weight
    F32
    [1]
  • blk.23.post_attention_norm.weight
    F32
    [2560]
  • blk.23.post_ffw_norm.weight
    F32
    [2560]
  • blk.23.post_norm.weight
    F32
    [2560]
  • blk.23.proj.weight
    Q4_K
    [256, 2560]
  • blk.24
  • blk.24.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.24.attn_k_norm.weight
    F32
    [256]
  • blk.24.attn_norm.weight
    F32
    [2560]
  • blk.24.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.24.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.24.attn_q_norm.weight
    F32
    [256]
  • blk.24.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.24.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.24.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.24.ffn_norm.weight
    F32
    [2560]
  • blk.24.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.24.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.24.layer_output_scale.weight
    F32
    [1]
  • blk.24.post_attention_norm.weight
    F32
    [2560]
  • blk.24.post_ffw_norm.weight
    F32
    [2560]
  • blk.24.post_norm.weight
    F32
    [2560]
  • blk.24.proj.weight
    Q4_K
    [256, 2560]
  • blk.25
  • blk.25.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.25.attn_k_norm.weight
    F32
    [256]
  • blk.25.attn_norm.weight
    F32
    [2560]
  • blk.25.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.25.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.25.attn_q_norm.weight
    F32
    [256]
  • blk.25.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.25.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.25.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.25.ffn_norm.weight
    F32
    [2560]
  • blk.25.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.25.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.25.layer_output_scale.weight
    F32
    [1]
  • blk.25.post_attention_norm.weight
    F32
    [2560]
  • blk.25.post_ffw_norm.weight
    F32
    [2560]
  • blk.25.post_norm.weight
    F32
    [2560]
  • blk.25.proj.weight
    Q4_K
    [256, 2560]
  • blk.26
  • blk.26.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.26.attn_k_norm.weight
    F32
    [256]
  • blk.26.attn_norm.weight
    F32
    [2560]
  • blk.26.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.26.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.26.attn_q_norm.weight
    F32
    [256]
  • blk.26.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.26.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.26.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.26.ffn_norm.weight
    F32
    [2560]
  • blk.26.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.26.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.26.layer_output_scale.weight
    F32
    [1]
  • blk.26.post_attention_norm.weight
    F32
    [2560]
  • blk.26.post_ffw_norm.weight
    F32
    [2560]
  • blk.26.post_norm.weight
    F32
    [2560]
  • blk.26.proj.weight
    Q4_K
    [256, 2560]
  • blk.27
  • blk.27.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.27.attn_k_norm.weight
    F32
    [256]
  • blk.27.attn_norm.weight
    F32
    [2560]
  • blk.27.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.27.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.27.attn_q_norm.weight
    F32
    [256]
  • blk.27.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.27.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.27.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.27.ffn_norm.weight
    F32
    [2560]
  • blk.27.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.27.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.27.layer_output_scale.weight
    F32
    [1]
  • blk.27.post_attention_norm.weight
    F32
    [2560]
  • blk.27.post_ffw_norm.weight
    F32
    [2560]
  • blk.27.post_norm.weight
    F32
    [2560]
  • blk.27.proj.weight
    Q4_K
    [256, 2560]
  • blk.28
  • blk.28.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.28.attn_k_norm.weight
    F32
    [256]
  • blk.28.attn_norm.weight
    F32
    [2560]
  • blk.28.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.28.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.28.attn_q_norm.weight
    F32
    [256]
  • blk.28.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.28.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.28.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.28.ffn_norm.weight
    F32
    [2560]
  • blk.28.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.28.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.28.layer_output_scale.weight
    F32
    [1]
  • blk.28.post_attention_norm.weight
    F32
    [2560]
  • blk.28.post_ffw_norm.weight
    F32
    [2560]
  • blk.28.post_norm.weight
    F32
    [2560]
  • blk.28.proj.weight
    Q4_K
    [256, 2560]
  • blk.29
  • blk.29.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.29.attn_k_norm.weight
    F32
    [512]
  • blk.29.attn_norm.weight
    F32
    [2560]
  • blk.29.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.29.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.29.attn_q_norm.weight
    F32
    [512]
  • blk.29.attn_v.weight
    Q4_K
    [2560, 1024]
  • blk.29.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.29.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.29.ffn_norm.weight
    F32
    [2560]
  • blk.29.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.29.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.29.layer_output_scale.weight
    F32
    [1]
  • blk.29.post_attention_norm.weight
    F32
    [2560]
  • blk.29.post_ffw_norm.weight
    F32
    [2560]
  • blk.29.post_norm.weight
    F32
    [2560]
  • blk.29.proj.weight
    Q4_K
    [256, 2560]
  • blk.30
  • blk.30.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.30.attn_k_norm.weight
    F32
    [256]
  • blk.30.attn_norm.weight
    F32
    [2560]
  • blk.30.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.30.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.30.attn_q_norm.weight
    F32
    [256]
  • blk.30.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.30.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.30.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.30.ffn_norm.weight
    F32
    [2560]
  • blk.30.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.30.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.30.layer_output_scale.weight
    F32
    [1]
  • blk.30.post_attention_norm.weight
    F32
    [2560]
  • blk.30.post_ffw_norm.weight
    F32
    [2560]
  • blk.30.post_norm.weight
    F32
    [2560]
  • blk.30.proj.weight
    Q4_K
    [256, 2560]
  • blk.31
  • blk.31.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.31.attn_k_norm.weight
    F32
    [256]
  • blk.31.attn_norm.weight
    F32
    [2560]
  • blk.31.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.31.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.31.attn_q_norm.weight
    F32
    [256]
  • blk.31.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.31.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.31.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.31.ffn_norm.weight
    F32
    [2560]
  • blk.31.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.31.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.31.layer_output_scale.weight
    F32
    [1]
  • blk.31.post_attention_norm.weight
    F32
    [2560]
  • blk.31.post_ffw_norm.weight
    F32
    [2560]
  • blk.31.post_norm.weight
    F32
    [2560]
  • blk.31.proj.weight
    Q4_K
    [256, 2560]
  • blk.32
  • blk.32.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.32.attn_k_norm.weight
    F32
    [256]
  • blk.32.attn_norm.weight
    F32
    [2560]
  • blk.32.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.32.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.32.attn_q_norm.weight
    F32
    [256]
  • blk.32.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.32.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.32.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.32.ffn_norm.weight
    F32
    [2560]
  • blk.32.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.32.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.32.layer_output_scale.weight
    F32
    [1]
  • blk.32.post_attention_norm.weight
    F32
    [2560]
  • blk.32.post_ffw_norm.weight
    F32
    [2560]
  • blk.32.post_norm.weight
    F32
    [2560]
  • blk.32.proj.weight
    Q4_K
    [256, 2560]
  • blk.33
  • blk.33.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.33.attn_k_norm.weight
    F32
    [256]
  • blk.33.attn_norm.weight
    F32
    [2560]
  • blk.33.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.33.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.33.attn_q_norm.weight
    F32
    [256]
  • blk.33.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.33.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.33.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.33.ffn_norm.weight
    F32
    [2560]
  • blk.33.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.33.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.33.layer_output_scale.weight
    F32
    [1]
  • blk.33.post_attention_norm.weight
    F32
    [2560]
  • blk.33.post_ffw_norm.weight
    F32
    [2560]
  • blk.33.post_norm.weight
    F32
    [2560]
  • blk.33.proj.weight
    Q4_K
    [256, 2560]
  • blk.34
  • blk.34.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.34.attn_k_norm.weight
    F32
    [256]
  • blk.34.attn_norm.weight
    F32
    [2560]
  • blk.34.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.34.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.34.attn_q_norm.weight
    F32
    [256]
  • blk.34.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.34.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.34.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.34.ffn_norm.weight
    F32
    [2560]
  • blk.34.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.34.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.34.layer_output_scale.weight
    F32
    [1]
  • blk.34.post_attention_norm.weight
    F32
    [2560]
  • blk.34.post_ffw_norm.weight
    F32
    [2560]
  • blk.34.post_norm.weight
    F32
    [2560]
  • blk.34.proj.weight
    Q4_K
    [256, 2560]
  • blk.35
  • blk.35.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.35.attn_k_norm.weight
    F32
    [512]
  • blk.35.attn_norm.weight
    F32
    [2560]
  • blk.35.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.35.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.35.attn_q_norm.weight
    F32
    [512]
  • blk.35.attn_v.weight
    Q4_K
    [2560, 1024]
  • blk.35.ffn_down.weight
    Q4_K
    [10240, 2560]
  • blk.35.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.35.ffn_norm.weight
    F32
    [2560]
  • blk.35.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.35.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.35.layer_output_scale.weight
    F32
    [1]
  • blk.35.post_attention_norm.weight
    F32
    [2560]
  • blk.35.post_ffw_norm.weight
    F32
    [2560]
  • blk.35.post_norm.weight
    F32
    [2560]
  • blk.35.proj.weight
    Q4_K
    [256, 2560]
  • blk.36
  • blk.36.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.36.attn_k_norm.weight
    F32
    [256]
  • blk.36.attn_norm.weight
    F32
    [2560]
  • blk.36.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.36.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.36.attn_q_norm.weight
    F32
    [256]
  • blk.36.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.36.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.36.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.36.ffn_norm.weight
    F32
    [2560]
  • blk.36.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.36.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.36.layer_output_scale.weight
    F32
    [1]
  • blk.36.post_attention_norm.weight
    F32
    [2560]
  • blk.36.post_ffw_norm.weight
    F32
    [2560]
  • blk.36.post_norm.weight
    F32
    [2560]
  • blk.36.proj.weight
    Q4_K
    [256, 2560]
  • blk.37
  • blk.37.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.37.attn_k_norm.weight
    F32
    [256]
  • blk.37.attn_norm.weight
    F32
    [2560]
  • blk.37.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.37.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.37.attn_q_norm.weight
    F32
    [256]
  • blk.37.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.37.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.37.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.37.ffn_norm.weight
    F32
    [2560]
  • blk.37.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.37.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.37.layer_output_scale.weight
    F32
    [1]
  • blk.37.post_attention_norm.weight
    F32
    [2560]
  • blk.37.post_ffw_norm.weight
    F32
    [2560]
  • blk.37.post_norm.weight
    F32
    [2560]
  • blk.37.proj.weight
    Q4_K
    [256, 2560]
  • blk.38
  • blk.38.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.38.attn_k_norm.weight
    F32
    [256]
  • blk.38.attn_norm.weight
    F32
    [2560]
  • blk.38.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.38.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.38.attn_q_norm.weight
    F32
    [256]
  • blk.38.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.38.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.38.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.38.ffn_norm.weight
    F32
    [2560]
  • blk.38.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.38.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.38.layer_output_scale.weight
    F32
    [1]
  • blk.38.post_attention_norm.weight
    F32
    [2560]
  • blk.38.post_ffw_norm.weight
    F32
    [2560]
  • blk.38.post_norm.weight
    F32
    [2560]
  • blk.38.proj.weight
    Q4_K
    [256, 2560]
  • blk.39
  • blk.39.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.39.attn_k_norm.weight
    F32
    [256]
  • blk.39.attn_norm.weight
    F32
    [2560]
  • blk.39.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.39.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.39.attn_q_norm.weight
    F32
    [256]
  • blk.39.attn_v.weight
    Q4_K
    [2560, 512]
  • blk.39.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.39.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.39.ffn_norm.weight
    F32
    [2560]
  • blk.39.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.39.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.39.layer_output_scale.weight
    F32
    [1]
  • blk.39.post_attention_norm.weight
    F32
    [2560]
  • blk.39.post_ffw_norm.weight
    F32
    [2560]
  • blk.39.post_norm.weight
    F32
    [2560]
  • blk.39.proj.weight
    Q4_K
    [256, 2560]
  • blk.40
  • blk.40.attn_k.weight
    Q4_K
    [2560, 512]
  • blk.40.attn_k_norm.weight
    F32
    [256]
  • blk.40.attn_norm.weight
    F32
    [2560]
  • blk.40.attn_output.weight
    Q4_K
    [2048, 2560]
  • blk.40.attn_q.weight
    Q4_K
    [2560, 2048]
  • blk.40.attn_q_norm.weight
    F32
    [256]
  • blk.40.attn_v.weight
    Q6_K
    [2560, 512]
  • blk.40.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.40.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.40.ffn_norm.weight
    F32
    [2560]
  • blk.40.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.40.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.40.layer_output_scale.weight
    F32
    [1]
  • blk.40.post_attention_norm.weight
    F32
    [2560]
  • blk.40.post_ffw_norm.weight
    F32
    [2560]
  • blk.40.post_norm.weight
    F32
    [2560]
  • blk.40.proj.weight
    Q4_K
    [256, 2560]
  • blk.41
  • blk.41.attn_k.weight
    Q4_K
    [2560, 1024]
  • blk.41.attn_k_norm.weight
    F32
    [512]
  • blk.41.attn_norm.weight
    F32
    [2560]
  • blk.41.attn_output.weight
    Q4_K
    [4096, 2560]
  • blk.41.attn_q.weight
    Q4_K
    [2560, 4096]
  • blk.41.attn_q_norm.weight
    F32
    [512]
  • blk.41.attn_v.weight
    Q4_K
    [2560, 1024]
  • blk.41.ffn_down.weight
    Q6_K
    [10240, 2560]
  • blk.41.ffn_gate.weight
    Q4_K
    [2560, 10240]
  • blk.41.ffn_norm.weight
    F32
    [2560]
  • blk.41.ffn_up.weight
    Q4_K
    [2560, 10240]
  • blk.41.inp_gate.weight
    Q4_K
    [2560, 256]
  • blk.41.layer_output_scale.weight
    F32
    [1]
  • blk.41.post_attention_norm.weight
    F32
    [2560]
  • blk.41.post_ffw_norm.weight
    F32
    [2560]
  • blk.41.post_norm.weight
    F32
    [2560]
  • blk.41.proj.weight
    Q4_K
    [256, 2560]
  • mm.a.fc.bias
    F32
    [1536]
  • mm.a.fc.weight
    F16
    [1024, 1536]
  • mm.a.input_projection.weight
    F16
    [1536, 2560]
  • mm.input_projection.weight
    F16
    [768, 2560]
  • per_layer_model_proj.weight
    Q4_K
    [2560, 10752]
  • per_layer_proj_norm.weight
    F32
    [256]
  • per_layer_token_embd.weight
    BF16
    [10752, 262144]
  • rope_freqs.weight
    F32
    [256]
  • v.blk.0
  • v.blk.0.attn_k.input_max
    F32
    [1]
  • v.blk.0.attn_k.input_min
    F32
    [1]
  • v.blk.0.attn_k.output_max
    F32
    [1]
  • v.blk.0.attn_k.output_min
    F32
    [1]
  • v.blk.0.attn_k.weight
    F16
    [768, 768]
  • v.blk.0.attn_k_norm.weight
    F32
    [64]
  • v.blk.0.attn_out.input_max
    F32
    [1]
  • v.blk.0.attn_out.input_min
    F32
    [1]
  • v.blk.0.attn_out.output_max
    F32
    [1]
  • v.blk.0.attn_out.output_min
    F32
    [1]
  • v.blk.0.attn_out.weight
    F16
    [768, 768]
  • v.blk.0.attn_post_norm.weight
    F32
    [768]
  • v.blk.0.attn_q.input_max
    F32
    [1]
  • v.blk.0.attn_q.input_min
    F32
    [1]
  • v.blk.0.attn_q.output_max
    F32
    [1]
  • v.blk.0.attn_q.output_min
    F32
    [1]
  • v.blk.0.attn_q.weight
    F16
    [768, 768]
  • v.blk.0.attn_q_norm.weight
    F32
    [64]
  • v.blk.0.attn_v.input_max
    F32
    [1]
  • v.blk.0.attn_v.input_min
    F32
    [1]
  • v.blk.0.attn_v.output_max
    F32
    [1]
  • v.blk.0.attn_v.output_min
    F32
    [1]
  • v.blk.0.attn_v.weight
    F16
    [768, 768]
  • v.blk.0.ffn_down.input_max
    F32
    [1]
  • v.blk.0.ffn_down.input_min
    F32
    [1]
  • v.blk.0.ffn_down.output_max
    F32
    [1]
  • v.blk.0.ffn_down.output_min
    F32
    [1]
  • v.blk.0.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.0.ffn_gate.input_max
    F32
    [1]
  • v.blk.0.ffn_gate.input_min
    F32
    [1]
  • v.blk.0.ffn_gate.output_max
    F32
    [1]
  • v.blk.0.ffn_gate.output_min
    F32
    [1]
  • v.blk.0.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.0.ffn_post_norm.weight
    F32
    [768]
  • v.blk.0.ffn_up.input_max
    F32
    [1]
  • v.blk.0.ffn_up.input_min
    F32
    [1]
  • v.blk.0.ffn_up.output_max
    F32
    [1]
  • v.blk.0.ffn_up.output_min
    F32
    [1]
  • v.blk.0.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.0.ln1.weight
    F32
    [768]
  • v.blk.0.ln2.weight
    F32
    [768]
  • v.blk.1
  • v.blk.1.attn_k.input_max
    F32
    [1]
  • v.blk.1.attn_k.input_min
    F32
    [1]
  • v.blk.1.attn_k.output_max
    F32
    [1]
  • v.blk.1.attn_k.output_min
    F32
    [1]
  • v.blk.1.attn_k.weight
    F16
    [768, 768]
  • v.blk.1.attn_k_norm.weight
    F32
    [64]
  • v.blk.1.attn_out.input_max
    F32
    [1]
  • v.blk.1.attn_out.input_min
    F32
    [1]
  • v.blk.1.attn_out.output_max
    F32
    [1]
  • v.blk.1.attn_out.output_min
    F32
    [1]
  • v.blk.1.attn_out.weight
    F16
    [768, 768]
  • v.blk.1.attn_post_norm.weight
    F32
    [768]
  • v.blk.1.attn_q.input_max
    F32
    [1]
  • v.blk.1.attn_q.input_min
    F32
    [1]
  • v.blk.1.attn_q.output_max
    F32
    [1]
  • v.blk.1.attn_q.output_min
    F32
    [1]
  • v.blk.1.attn_q.weight
    F16
    [768, 768]
  • v.blk.1.attn_q_norm.weight
    F32
    [64]
  • v.blk.1.attn_v.input_max
    F32
    [1]
  • v.blk.1.attn_v.input_min
    F32
    [1]
  • v.blk.1.attn_v.output_max
    F32
    [1]
  • v.blk.1.attn_v.output_min
    F32
    [1]
  • v.blk.1.attn_v.weight
    F16
    [768, 768]
  • v.blk.1.ffn_down.input_max
    F32
    [1]
  • v.blk.1.ffn_down.input_min
    F32
    [1]
  • v.blk.1.ffn_down.output_max
    F32
    [1]
  • v.blk.1.ffn_down.output_min
    F32
    [1]
  • v.blk.1.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.1.ffn_gate.input_max
    F32
    [1]
  • v.blk.1.ffn_gate.input_min
    F32
    [1]
  • v.blk.1.ffn_gate.output_max
    F32
    [1]
  • v.blk.1.ffn_gate.output_min
    F32
    [1]
  • v.blk.1.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.1.ffn_post_norm.weight
    F32
    [768]
  • v.blk.1.ffn_up.input_max
    F32
    [1]
  • v.blk.1.ffn_up.input_min
    F32
    [1]
  • v.blk.1.ffn_up.output_max
    F32
    [1]
  • v.blk.1.ffn_up.output_min
    F32
    [1]
  • v.blk.1.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.1.ln1.weight
    F32
    [768]
  • v.blk.1.ln2.weight
    F32
    [768]
  • v.blk.2
  • v.blk.2.attn_k.input_max
    F32
    [1]
  • v.blk.2.attn_k.input_min
    F32
    [1]
  • v.blk.2.attn_k.output_max
    F32
    [1]
  • v.blk.2.attn_k.output_min
    F32
    [1]
  • v.blk.2.attn_k.weight
    F16
    [768, 768]
  • v.blk.2.attn_k_norm.weight
    F32
    [64]
  • v.blk.2.attn_out.input_max
    F32
    [1]
  • v.blk.2.attn_out.input_min
    F32
    [1]
  • v.blk.2.attn_out.output_max
    F32
    [1]
  • v.blk.2.attn_out.output_min
    F32
    [1]
  • v.blk.2.attn_out.weight
    F16
    [768, 768]
  • v.blk.2.attn_post_norm.weight
    F32
    [768]
  • v.blk.2.attn_q.input_max
    F32
    [1]
  • v.blk.2.attn_q.input_min
    F32
    [1]
  • v.blk.2.attn_q.output_max
    F32
    [1]
  • v.blk.2.attn_q.output_min
    F32
    [1]
  • v.blk.2.attn_q.weight
    F16
    [768, 768]
  • v.blk.2.attn_q_norm.weight
    F32
    [64]
  • v.blk.2.attn_v.input_max
    F32
    [1]
  • v.blk.2.attn_v.input_min
    F32
    [1]
  • v.blk.2.attn_v.output_max
    F32
    [1]
  • v.blk.2.attn_v.output_min
    F32
    [1]
  • v.blk.2.attn_v.weight
    F16
    [768, 768]
  • v.blk.2.ffn_down.input_max
    F32
    [1]
  • v.blk.2.ffn_down.input_min
    F32
    [1]
  • v.blk.2.ffn_down.output_max
    F32
    [1]
  • v.blk.2.ffn_down.output_min
    F32
    [1]
  • v.blk.2.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.2.ffn_gate.input_max
    F32
    [1]
  • v.blk.2.ffn_gate.input_min
    F32
    [1]
  • v.blk.2.ffn_gate.output_max
    F32
    [1]
  • v.blk.2.ffn_gate.output_min
    F32
    [1]
  • v.blk.2.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.2.ffn_post_norm.weight
    F32
    [768]
  • v.blk.2.ffn_up.input_max
    F32
    [1]
  • v.blk.2.ffn_up.input_min
    F32
    [1]
  • v.blk.2.ffn_up.output_max
    F32
    [1]
  • v.blk.2.ffn_up.output_min
    F32
    [1]
  • v.blk.2.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.2.ln1.weight
    F32
    [768]
  • v.blk.2.ln2.weight
    F32
    [768]
  • v.blk.3
  • v.blk.3.attn_k.input_max
    F32
    [1]
  • v.blk.3.attn_k.input_min
    F32
    [1]
  • v.blk.3.attn_k.output_max
    F32
    [1]
  • v.blk.3.attn_k.output_min
    F32
    [1]
  • v.blk.3.attn_k.weight
    F16
    [768, 768]
  • v.blk.3.attn_k_norm.weight
    F32
    [64]
  • v.blk.3.attn_out.input_max
    F32
    [1]
  • v.blk.3.attn_out.input_min
    F32
    [1]
  • v.blk.3.attn_out.output_max
    F32
    [1]
  • v.blk.3.attn_out.output_min
    F32
    [1]
  • v.blk.3.attn_out.weight
    F16
    [768, 768]
  • v.blk.3.attn_post_norm.weight
    F32
    [768]
  • v.blk.3.attn_q.input_max
    F32
    [1]
  • v.blk.3.attn_q.input_min
    F32
    [1]
  • v.blk.3.attn_q.output_max
    F32
    [1]
  • v.blk.3.attn_q.output_min
    F32
    [1]
  • v.blk.3.attn_q.weight
    F16
    [768, 768]
  • v.blk.3.attn_q_norm.weight
    F32
    [64]
  • v.blk.3.attn_v.input_max
    F32
    [1]
  • v.blk.3.attn_v.input_min
    F32
    [1]
  • v.blk.3.attn_v.output_max
    F32
    [1]
  • v.blk.3.attn_v.output_min
    F32
    [1]
  • v.blk.3.attn_v.weight
    F16
    [768, 768]
  • v.blk.3.ffn_down.input_max
    F32
    [1]
  • v.blk.3.ffn_down.input_min
    F32
    [1]
  • v.blk.3.ffn_down.output_max
    F32
    [1]
  • v.blk.3.ffn_down.output_min
    F32
    [1]
  • v.blk.3.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.3.ffn_gate.input_max
    F32
    [1]
  • v.blk.3.ffn_gate.input_min
    F32
    [1]
  • v.blk.3.ffn_gate.output_max
    F32
    [1]
  • v.blk.3.ffn_gate.output_min
    F32
    [1]
  • v.blk.3.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.3.ffn_post_norm.weight
    F32
    [768]
  • v.blk.3.ffn_up.input_max
    F32
    [1]
  • v.blk.3.ffn_up.input_min
    F32
    [1]
  • v.blk.3.ffn_up.output_max
    F32
    [1]
  • v.blk.3.ffn_up.output_min
    F32
    [1]
  • v.blk.3.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.3.ln1.weight
    F32
    [768]
  • v.blk.3.ln2.weight
    F32
    [768]
  • v.blk.4
  • v.blk.4.attn_k.input_max
    F32
    [1]
  • v.blk.4.attn_k.input_min
    F32
    [1]
  • v.blk.4.attn_k.output_max
    F32
    [1]
  • v.blk.4.attn_k.output_min
    F32
    [1]
  • v.blk.4.attn_k.weight
    F16
    [768, 768]
  • v.blk.4.attn_k_norm.weight
    F32
    [64]
  • v.blk.4.attn_out.input_max
    F32
    [1]
  • v.blk.4.attn_out.input_min
    F32
    [1]
  • v.blk.4.attn_out.output_max
    F32
    [1]
  • v.blk.4.attn_out.output_min
    F32
    [1]
  • v.blk.4.attn_out.weight
    F16
    [768, 768]
  • v.blk.4.attn_post_norm.weight
    F32
    [768]
  • v.blk.4.attn_q.input_max
    F32
    [1]
  • v.blk.4.attn_q.input_min
    F32
    [1]
  • v.blk.4.attn_q.output_max
    F32
    [1]
  • v.blk.4.attn_q.output_min
    F32
    [1]
  • v.blk.4.attn_q.weight
    F16
    [768, 768]
  • v.blk.4.attn_q_norm.weight
    F32
    [64]
  • v.blk.4.attn_v.input_max
    F32
    [1]
  • v.blk.4.attn_v.input_min
    F32
    [1]
  • v.blk.4.attn_v.output_max
    F32
    [1]
  • v.blk.4.attn_v.output_min
    F32
    [1]
  • v.blk.4.attn_v.weight
    F16
    [768, 768]
  • v.blk.4.ffn_down.input_max
    F32
    [1]
  • v.blk.4.ffn_down.input_min
    F32
    [1]
  • v.blk.4.ffn_down.output_max
    F32
    [1]
  • v.blk.4.ffn_down.output_min
    F32
    [1]
  • v.blk.4.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.4.ffn_gate.input_max
    F32
    [1]
  • v.blk.4.ffn_gate.input_min
    F32
    [1]
  • v.blk.4.ffn_gate.output_max
    F32
    [1]
  • v.blk.4.ffn_gate.output_min
    F32
    [1]
  • v.blk.4.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.4.ffn_post_norm.weight
    F32
    [768]
  • v.blk.4.ffn_up.input_max
    F32
    [1]
  • v.blk.4.ffn_up.input_min
    F32
    [1]
  • v.blk.4.ffn_up.output_max
    F32
    [1]
  • v.blk.4.ffn_up.output_min
    F32
    [1]
  • v.blk.4.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.4.ln1.weight
    F32
    [768]
  • v.blk.4.ln2.weight
    F32
    [768]
  • v.blk.5
  • v.blk.5.attn_k.input_max
    F32
    [1]
  • v.blk.5.attn_k.input_min
    F32
    [1]
  • v.blk.5.attn_k.output_max
    F32
    [1]
  • v.blk.5.attn_k.output_min
    F32
    [1]
  • v.blk.5.attn_k.weight
    F16
    [768, 768]
  • v.blk.5.attn_k_norm.weight
    F32
    [64]
  • v.blk.5.attn_out.input_max
    F32
    [1]
  • v.blk.5.attn_out.input_min
    F32
    [1]
  • v.blk.5.attn_out.output_max
    F32
    [1]
  • v.blk.5.attn_out.output_min
    F32
    [1]
  • v.blk.5.attn_out.weight
    F16
    [768, 768]
  • v.blk.5.attn_post_norm.weight
    F32
    [768]
  • v.blk.5.attn_q.input_max
    F32
    [1]
  • v.blk.5.attn_q.input_min
    F32
    [1]
  • v.blk.5.attn_q.output_max
    F32
    [1]
  • v.blk.5.attn_q.output_min
    F32
    [1]
  • v.blk.5.attn_q.weight
    F16
    [768, 768]
  • v.blk.5.attn_q_norm.weight
    F32
    [64]
  • v.blk.5.attn_v.input_max
    F32
    [1]
  • v.blk.5.attn_v.input_min
    F32
    [1]
  • v.blk.5.attn_v.output_max
    F32
    [1]
  • v.blk.5.attn_v.output_min
    F32
    [1]
  • v.blk.5.attn_v.weight
    F16
    [768, 768]
  • v.blk.5.ffn_down.input_max
    F32
    [1]
  • v.blk.5.ffn_down.input_min
    F32
    [1]
  • v.blk.5.ffn_down.output_max
    F32
    [1]
  • v.blk.5.ffn_down.output_min
    F32
    [1]
  • v.blk.5.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.5.ffn_gate.input_max
    F32
    [1]
  • v.blk.5.ffn_gate.input_min
    F32
    [1]
  • v.blk.5.ffn_gate.output_max
    F32
    [1]
  • v.blk.5.ffn_gate.output_min
    F32
    [1]
  • v.blk.5.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.5.ffn_post_norm.weight
    F32
    [768]
  • v.blk.5.ffn_up.input_max
    F32
    [1]
  • v.blk.5.ffn_up.input_min
    F32
    [1]
  • v.blk.5.ffn_up.output_max
    F32
    [1]
  • v.blk.5.ffn_up.output_min
    F32
    [1]
  • v.blk.5.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.5.ln1.weight
    F32
    [768]
  • v.blk.5.ln2.weight
    F32
    [768]
  • v.blk.6
  • v.blk.6.attn_k.input_max
    F32
    [1]
  • v.blk.6.attn_k.input_min
    F32
    [1]
  • v.blk.6.attn_k.output_max
    F32
    [1]
  • v.blk.6.attn_k.output_min
    F32
    [1]
  • v.blk.6.attn_k.weight
    F16
    [768, 768]
  • v.blk.6.attn_k_norm.weight
    F32
    [64]
  • v.blk.6.attn_out.input_max
    F32
    [1]
  • v.blk.6.attn_out.input_min
    F32
    [1]
  • v.blk.6.attn_out.output_max
    F32
    [1]
  • v.blk.6.attn_out.output_min
    F32
    [1]
  • v.blk.6.attn_out.weight
    F16
    [768, 768]
  • v.blk.6.attn_post_norm.weight
    F32
    [768]
  • v.blk.6.attn_q.input_max
    F32
    [1]
  • v.blk.6.attn_q.input_min
    F32
    [1]
  • v.blk.6.attn_q.output_max
    F32
    [1]
  • v.blk.6.attn_q.output_min
    F32
    [1]
  • v.blk.6.attn_q.weight
    F16
    [768, 768]
  • v.blk.6.attn_q_norm.weight
    F32
    [64]
  • v.blk.6.attn_v.input_max
    F32
    [1]
  • v.blk.6.attn_v.input_min
    F32
    [1]
  • v.blk.6.attn_v.output_max
    F32
    [1]
  • v.blk.6.attn_v.output_min
    F32
    [1]
  • v.blk.6.attn_v.weight
    F16
    [768, 768]
  • v.blk.6.ffn_down.input_max
    F32
    [1]
  • v.blk.6.ffn_down.input_min
    F32
    [1]
  • v.blk.6.ffn_down.output_max
    F32
    [1]
  • v.blk.6.ffn_down.output_min
    F32
    [1]
  • v.blk.6.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.6.ffn_gate.input_max
    F32
    [1]
  • v.blk.6.ffn_gate.input_min
    F32
    [1]
  • v.blk.6.ffn_gate.output_max
    F32
    [1]
  • v.blk.6.ffn_gate.output_min
    F32
    [1]
  • v.blk.6.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.6.ffn_post_norm.weight
    F32
    [768]
  • v.blk.6.ffn_up.input_max
    F32
    [1]
  • v.blk.6.ffn_up.input_min
    F32
    [1]
  • v.blk.6.ffn_up.output_max
    F32
    [1]
  • v.blk.6.ffn_up.output_min
    F32
    [1]
  • v.blk.6.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.6.ln1.weight
    F32
    [768]
  • v.blk.6.ln2.weight
    F32
    [768]
  • v.blk.7
  • v.blk.7.attn_k.input_max
    F32
    [1]
  • v.blk.7.attn_k.input_min
    F32
    [1]
  • v.blk.7.attn_k.output_max
    F32
    [1]
  • v.blk.7.attn_k.output_min
    F32
    [1]
  • v.blk.7.attn_k.weight
    F16
    [768, 768]
  • v.blk.7.attn_k_norm.weight
    F32
    [64]
  • v.blk.7.attn_out.input_max
    F32
    [1]
  • v.blk.7.attn_out.input_min
    F32
    [1]
  • v.blk.7.attn_out.output_max
    F32
    [1]
  • v.blk.7.attn_out.output_min
    F32
    [1]
  • v.blk.7.attn_out.weight
    F16
    [768, 768]
  • v.blk.7.attn_post_norm.weight
    F32
    [768]
  • v.blk.7.attn_q.input_max
    F32
    [1]
  • v.blk.7.attn_q.input_min
    F32
    [1]
  • v.blk.7.attn_q.output_max
    F32
    [1]
  • v.blk.7.attn_q.output_min
    F32
    [1]
  • v.blk.7.attn_q.weight
    F16
    [768, 768]
  • v.blk.7.attn_q_norm.weight
    F32
    [64]
  • v.blk.7.attn_v.input_max
    F32
    [1]
  • v.blk.7.attn_v.input_min
    F32
    [1]
  • v.blk.7.attn_v.output_max
    F32
    [1]
  • v.blk.7.attn_v.output_min
    F32
    [1]
  • v.blk.7.attn_v.weight
    F16
    [768, 768]
  • v.blk.7.ffn_down.input_max
    F32
    [1]
  • v.blk.7.ffn_down.input_min
    F32
    [1]
  • v.blk.7.ffn_down.output_max
    F32
    [1]
  • v.blk.7.ffn_down.output_min
    F32
    [1]
  • v.blk.7.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.7.ffn_gate.input_max
    F32
    [1]
  • v.blk.7.ffn_gate.input_min
    F32
    [1]
  • v.blk.7.ffn_gate.output_max
    F32
    [1]
  • v.blk.7.ffn_gate.output_min
    F32
    [1]
  • v.blk.7.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.7.ffn_post_norm.weight
    F32
    [768]
  • v.blk.7.ffn_up.input_max
    F32
    [1]
  • v.blk.7.ffn_up.input_min
    F32
    [1]
  • v.blk.7.ffn_up.output_max
    F32
    [1]
  • v.blk.7.ffn_up.output_min
    F32
    [1]
  • v.blk.7.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.7.ln1.weight
    F32
    [768]
  • v.blk.7.ln2.weight
    F32
    [768]
  • v.blk.8
  • v.blk.8.attn_k.input_max
    F32
    [1]
  • v.blk.8.attn_k.input_min
    F32
    [1]
  • v.blk.8.attn_k.output_max
    F32
    [1]
  • v.blk.8.attn_k.output_min
    F32
    [1]
  • v.blk.8.attn_k.weight
    F16
    [768, 768]
  • v.blk.8.attn_k_norm.weight
    F32
    [64]
  • v.blk.8.attn_out.input_max
    F32
    [1]
  • v.blk.8.attn_out.input_min
    F32
    [1]
  • v.blk.8.attn_out.output_max
    F32
    [1]
  • v.blk.8.attn_out.output_min
    F32
    [1]
  • v.blk.8.attn_out.weight
    F16
    [768, 768]
  • v.blk.8.attn_post_norm.weight
    F32
    [768]
  • v.blk.8.attn_q.input_max
    F32
    [1]
  • v.blk.8.attn_q.input_min
    F32
    [1]
  • v.blk.8.attn_q.output_max
    F32
    [1]
  • v.blk.8.attn_q.output_min
    F32
    [1]
  • v.blk.8.attn_q.weight
    F16
    [768, 768]
  • v.blk.8.attn_q_norm.weight
    F32
    [64]
  • v.blk.8.attn_v.input_max
    F32
    [1]
  • v.blk.8.attn_v.input_min
    F32
    [1]
  • v.blk.8.attn_v.output_max
    F32
    [1]
  • v.blk.8.attn_v.output_min
    F32
    [1]
  • v.blk.8.attn_v.weight
    F16
    [768, 768]
  • v.blk.8.ffn_down.input_max
    F32
    [1]
  • v.blk.8.ffn_down.input_min
    F32
    [1]
  • v.blk.8.ffn_down.output_max
    F32
    [1]
  • v.blk.8.ffn_down.output_min
    F32
    [1]
  • v.blk.8.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.8.ffn_gate.input_max
    F32
    [1]
  • v.blk.8.ffn_gate.input_min
    F32
    [1]
  • v.blk.8.ffn_gate.output_max
    F32
    [1]
  • v.blk.8.ffn_gate.output_min
    F32
    [1]
  • v.blk.8.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.8.ffn_post_norm.weight
    F32
    [768]
  • v.blk.8.ffn_up.input_max
    F32
    [1]
  • v.blk.8.ffn_up.input_min
    F32
    [1]
  • v.blk.8.ffn_up.output_max
    F32
    [1]
  • v.blk.8.ffn_up.output_min
    F32
    [1]
  • v.blk.8.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.8.ln1.weight
    F32
    [768]
  • v.blk.8.ln2.weight
    F32
    [768]
  • v.blk.9
  • v.blk.9.attn_k.input_max
    F32
    [1]
  • v.blk.9.attn_k.input_min
    F32
    [1]
  • v.blk.9.attn_k.output_max
    F32
    [1]
  • v.blk.9.attn_k.output_min
    F32
    [1]
  • v.blk.9.attn_k.weight
    F16
    [768, 768]
  • v.blk.9.attn_k_norm.weight
    F32
    [64]
  • v.blk.9.attn_out.input_max
    F32
    [1]
  • v.blk.9.attn_out.input_min
    F32
    [1]
  • v.blk.9.attn_out.output_max
    F32
    [1]
  • v.blk.9.attn_out.output_min
    F32
    [1]
  • v.blk.9.attn_out.weight
    F16
    [768, 768]
  • v.blk.9.attn_post_norm.weight
    F32
    [768]
  • v.blk.9.attn_q.input_max
    F32
    [1]
  • v.blk.9.attn_q.input_min
    F32
    [1]
  • v.blk.9.attn_q.output_max
    F32
    [1]
  • v.blk.9.attn_q.output_min
    F32
    [1]
  • v.blk.9.attn_q.weight
    F16
    [768, 768]
  • v.blk.9.attn_q_norm.weight
    F32
    [64]
  • v.blk.9.attn_v.input_max
    F32
    [1]
  • v.blk.9.attn_v.input_min
    F32
    [1]
  • v.blk.9.attn_v.output_max
    F32
    [1]
  • v.blk.9.attn_v.output_min
    F32
    [1]
  • v.blk.9.attn_v.weight
    F16
    [768, 768]
  • v.blk.9.ffn_down.input_max
    F32
    [1]
  • v.blk.9.ffn_down.input_min
    F32
    [1]
  • v.blk.9.ffn_down.output_max
    F32
    [1]
  • v.blk.9.ffn_down.output_min
    F32
    [1]
  • v.blk.9.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.9.ffn_gate.input_max
    F32
    [1]
  • v.blk.9.ffn_gate.input_min
    F32
    [1]
  • v.blk.9.ffn_gate.output_max
    F32
    [1]
  • v.blk.9.ffn_gate.output_min
    F32
    [1]
  • v.blk.9.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.9.ffn_post_norm.weight
    F32
    [768]
  • v.blk.9.ffn_up.input_max
    F32
    [1]
  • v.blk.9.ffn_up.input_min
    F32
    [1]
  • v.blk.9.ffn_up.output_max
    F32
    [1]
  • v.blk.9.ffn_up.output_min
    F32
    [1]
  • v.blk.9.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.9.ln1.weight
    F32
    [768]
  • v.blk.9.ln2.weight
    F32
    [768]
  • v.blk.10
  • v.blk.10.attn_k.input_max
    F32
    [1]
  • v.blk.10.attn_k.input_min
    F32
    [1]
  • v.blk.10.attn_k.output_max
    F32
    [1]
  • v.blk.10.attn_k.output_min
    F32
    [1]
  • v.blk.10.attn_k.weight
    F16
    [768, 768]
  • v.blk.10.attn_k_norm.weight
    F32
    [64]
  • v.blk.10.attn_out.input_max
    F32
    [1]
  • v.blk.10.attn_out.input_min
    F32
    [1]
  • v.blk.10.attn_out.output_max
    F32
    [1]
  • v.blk.10.attn_out.output_min
    F32
    [1]
  • v.blk.10.attn_out.weight
    F16
    [768, 768]
  • v.blk.10.attn_post_norm.weight
    F32
    [768]
  • v.blk.10.attn_q.input_max
    F32
    [1]
  • v.blk.10.attn_q.input_min
    F32
    [1]
  • v.blk.10.attn_q.output_max
    F32
    [1]
  • v.blk.10.attn_q.output_min
    F32
    [1]
  • v.blk.10.attn_q.weight
    F16
    [768, 768]
  • v.blk.10.attn_q_norm.weight
    F32
    [64]
  • v.blk.10.attn_v.input_max
    F32
    [1]
  • v.blk.10.attn_v.input_min
    F32
    [1]
  • v.blk.10.attn_v.output_max
    F32
    [1]
  • v.blk.10.attn_v.output_min
    F32
    [1]
  • v.blk.10.attn_v.weight
    F16
    [768, 768]
  • v.blk.10.ffn_down.input_max
    F32
    [1]
  • v.blk.10.ffn_down.input_min
    F32
    [1]
  • v.blk.10.ffn_down.output_max
    F32
    [1]
  • v.blk.10.ffn_down.output_min
    F32
    [1]
  • v.blk.10.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.10.ffn_gate.input_max
    F32
    [1]
  • v.blk.10.ffn_gate.input_min
    F32
    [1]
  • v.blk.10.ffn_gate.output_max
    F32
    [1]
  • v.blk.10.ffn_gate.output_min
    F32
    [1]
  • v.blk.10.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.10.ffn_post_norm.weight
    F32
    [768]
  • v.blk.10.ffn_up.input_max
    F32
    [1]
  • v.blk.10.ffn_up.input_min
    F32
    [1]
  • v.blk.10.ffn_up.output_max
    F32
    [1]
  • v.blk.10.ffn_up.output_min
    F32
    [1]
  • v.blk.10.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.10.ln1.weight
    F32
    [768]
  • v.blk.10.ln2.weight
    F32
    [768]
  • v.blk.11
  • v.blk.11.attn_k.input_max
    F32
    [1]
  • v.blk.11.attn_k.input_min
    F32
    [1]
  • v.blk.11.attn_k.output_max
    F32
    [1]
  • v.blk.11.attn_k.output_min
    F32
    [1]
  • v.blk.11.attn_k.weight
    F16
    [768, 768]
  • v.blk.11.attn_k_norm.weight
    F32
    [64]
  • v.blk.11.attn_out.input_max
    F32
    [1]
  • v.blk.11.attn_out.input_min
    F32
    [1]
  • v.blk.11.attn_out.output_max
    F32
    [1]
  • v.blk.11.attn_out.output_min
    F32
    [1]
  • v.blk.11.attn_out.weight
    F16
    [768, 768]
  • v.blk.11.attn_post_norm.weight
    F32
    [768]
  • v.blk.11.attn_q.input_max
    F32
    [1]
  • v.blk.11.attn_q.input_min
    F32
    [1]
  • v.blk.11.attn_q.output_max
    F32
    [1]
  • v.blk.11.attn_q.output_min
    F32
    [1]
  • v.blk.11.attn_q.weight
    F16
    [768, 768]
  • v.blk.11.attn_q_norm.weight
    F32
    [64]
  • v.blk.11.attn_v.input_max
    F32
    [1]
  • v.blk.11.attn_v.input_min
    F32
    [1]
  • v.blk.11.attn_v.output_max
    F32
    [1]
  • v.blk.11.attn_v.output_min
    F32
    [1]
  • v.blk.11.attn_v.weight
    F16
    [768, 768]
  • v.blk.11.ffn_down.input_max
    F32
    [1]
  • v.blk.11.ffn_down.input_min
    F32
    [1]
  • v.blk.11.ffn_down.output_max
    F32
    [1]
  • v.blk.11.ffn_down.output_min
    F32
    [1]
  • v.blk.11.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.11.ffn_gate.input_max
    F32
    [1]
  • v.blk.11.ffn_gate.input_min
    F32
    [1]
  • v.blk.11.ffn_gate.output_max
    F32
    [1]
  • v.blk.11.ffn_gate.output_min
    F32
    [1]
  • v.blk.11.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.11.ffn_post_norm.weight
    F32
    [768]
  • v.blk.11.ffn_up.input_max
    F32
    [1]
  • v.blk.11.ffn_up.input_min
    F32
    [1]
  • v.blk.11.ffn_up.output_max
    F32
    [1]
  • v.blk.11.ffn_up.output_min
    F32
    [1]
  • v.blk.11.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.11.ln1.weight
    F32
    [768]
  • v.blk.11.ln2.weight
    F32
    [768]
  • v.blk.12
  • v.blk.12.attn_k.input_max
    F32
    [1]
  • v.blk.12.attn_k.input_min
    F32
    [1]
  • v.blk.12.attn_k.output_max
    F32
    [1]
  • v.blk.12.attn_k.output_min
    F32
    [1]
  • v.blk.12.attn_k.weight
    F16
    [768, 768]
  • v.blk.12.attn_k_norm.weight
    F32
    [64]
  • v.blk.12.attn_out.input_max
    F32
    [1]
  • v.blk.12.attn_out.input_min
    F32
    [1]
  • v.blk.12.attn_out.output_max
    F32
    [1]
  • v.blk.12.attn_out.output_min
    F32
    [1]
  • v.blk.12.attn_out.weight
    F16
    [768, 768]
  • v.blk.12.attn_post_norm.weight
    F32
    [768]
  • v.blk.12.attn_q.input_max
    F32
    [1]
  • v.blk.12.attn_q.input_min
    F32
    [1]
  • v.blk.12.attn_q.output_max
    F32
    [1]
  • v.blk.12.attn_q.output_min
    F32
    [1]
  • v.blk.12.attn_q.weight
    F16
    [768, 768]
  • v.blk.12.attn_q_norm.weight
    F32
    [64]
  • v.blk.12.attn_v.input_max
    F32
    [1]
  • v.blk.12.attn_v.input_min
    F32
    [1]
  • v.blk.12.attn_v.output_max
    F32
    [1]
  • v.blk.12.attn_v.output_min
    F32
    [1]
  • v.blk.12.attn_v.weight
    F16
    [768, 768]
  • v.blk.12.ffn_down.input_max
    F32
    [1]
  • v.blk.12.ffn_down.input_min
    F32
    [1]
  • v.blk.12.ffn_down.output_max
    F32
    [1]
  • v.blk.12.ffn_down.output_min
    F32
    [1]
  • v.blk.12.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.12.ffn_gate.input_max
    F32
    [1]
  • v.blk.12.ffn_gate.input_min
    F32
    [1]
  • v.blk.12.ffn_gate.output_max
    F32
    [1]
  • v.blk.12.ffn_gate.output_min
    F32
    [1]
  • v.blk.12.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.12.ffn_post_norm.weight
    F32
    [768]
  • v.blk.12.ffn_up.input_max
    F32
    [1]
  • v.blk.12.ffn_up.input_min
    F32
    [1]
  • v.blk.12.ffn_up.output_max
    F32
    [1]
  • v.blk.12.ffn_up.output_min
    F32
    [1]
  • v.blk.12.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.12.ln1.weight
    F32
    [768]
  • v.blk.12.ln2.weight
    F32
    [768]
  • v.blk.13
  • v.blk.13.attn_k.input_max
    F32
    [1]
  • v.blk.13.attn_k.input_min
    F32
    [1]
  • v.blk.13.attn_k.output_max
    F32
    [1]
  • v.blk.13.attn_k.output_min
    F32
    [1]
  • v.blk.13.attn_k.weight
    F16
    [768, 768]
  • v.blk.13.attn_k_norm.weight
    F32
    [64]
  • v.blk.13.attn_out.input_max
    F32
    [1]
  • v.blk.13.attn_out.input_min
    F32
    [1]
  • v.blk.13.attn_out.output_max
    F32
    [1]
  • v.blk.13.attn_out.output_min
    F32
    [1]
  • v.blk.13.attn_out.weight
    F16
    [768, 768]
  • v.blk.13.attn_post_norm.weight
    F32
    [768]
  • v.blk.13.attn_q.input_max
    F32
    [1]
  • v.blk.13.attn_q.input_min
    F32
    [1]
  • v.blk.13.attn_q.output_max
    F32
    [1]
  • v.blk.13.attn_q.output_min
    F32
    [1]
  • v.blk.13.attn_q.weight
    F16
    [768, 768]
  • v.blk.13.attn_q_norm.weight
    F32
    [64]
  • v.blk.13.attn_v.input_max
    F32
    [1]
  • v.blk.13.attn_v.input_min
    F32
    [1]
  • v.blk.13.attn_v.output_max
    F32
    [1]
  • v.blk.13.attn_v.output_min
    F32
    [1]
  • v.blk.13.attn_v.weight
    F16
    [768, 768]
  • v.blk.13.ffn_down.input_max
    F32
    [1]
  • v.blk.13.ffn_down.input_min
    F32
    [1]
  • v.blk.13.ffn_down.output_max
    F32
    [1]
  • v.blk.13.ffn_down.output_min
    F32
    [1]
  • v.blk.13.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.13.ffn_gate.input_max
    F32
    [1]
  • v.blk.13.ffn_gate.input_min
    F32
    [1]
  • v.blk.13.ffn_gate.output_max
    F32
    [1]
  • v.blk.13.ffn_gate.output_min
    F32
    [1]
  • v.blk.13.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.13.ffn_post_norm.weight
    F32
    [768]
  • v.blk.13.ffn_up.input_max
    F32
    [1]
  • v.blk.13.ffn_up.input_min
    F32
    [1]
  • v.blk.13.ffn_up.output_max
    F32
    [1]
  • v.blk.13.ffn_up.output_min
    F32
    [1]
  • v.blk.13.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.13.ln1.weight
    F32
    [768]
  • v.blk.13.ln2.weight
    F32
    [768]
  • v.blk.14
  • v.blk.14.attn_k.input_max
    F32
    [1]
  • v.blk.14.attn_k.input_min
    F32
    [1]
  • v.blk.14.attn_k.output_max
    F32
    [1]
  • v.blk.14.attn_k.output_min
    F32
    [1]
  • v.blk.14.attn_k.weight
    F16
    [768, 768]
  • v.blk.14.attn_k_norm.weight
    F32
    [64]
  • v.blk.14.attn_out.input_max
    F32
    [1]
  • v.blk.14.attn_out.input_min
    F32
    [1]
  • v.blk.14.attn_out.output_max
    F32
    [1]
  • v.blk.14.attn_out.output_min
    F32
    [1]
  • v.blk.14.attn_out.weight
    F16
    [768, 768]
  • v.blk.14.attn_post_norm.weight
    F32
    [768]
  • v.blk.14.attn_q.input_max
    F32
    [1]
  • v.blk.14.attn_q.input_min
    F32
    [1]
  • v.blk.14.attn_q.output_max
    F32
    [1]
  • v.blk.14.attn_q.output_min
    F32
    [1]
  • v.blk.14.attn_q.weight
    F16
    [768, 768]
  • v.blk.14.attn_q_norm.weight
    F32
    [64]
  • v.blk.14.attn_v.input_max
    F32
    [1]
  • v.blk.14.attn_v.input_min
    F32
    [1]
  • v.blk.14.attn_v.output_max
    F32
    [1]
  • v.blk.14.attn_v.output_min
    F32
    [1]
  • v.blk.14.attn_v.weight
    F16
    [768, 768]
  • v.blk.14.ffn_down.input_max
    F32
    [1]
  • v.blk.14.ffn_down.input_min
    F32
    [1]
  • v.blk.14.ffn_down.output_max
    F32
    [1]
  • v.blk.14.ffn_down.output_min
    F32
    [1]
  • v.blk.14.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.14.ffn_gate.input_max
    F32
    [1]
  • v.blk.14.ffn_gate.input_min
    F32
    [1]
  • v.blk.14.ffn_gate.output_max
    F32
    [1]
  • v.blk.14.ffn_gate.output_min
    F32
    [1]
  • v.blk.14.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.14.ffn_post_norm.weight
    F32
    [768]
  • v.blk.14.ffn_up.input_max
    F32
    [1]
  • v.blk.14.ffn_up.input_min
    F32
    [1]
  • v.blk.14.ffn_up.output_max
    F32
    [1]
  • v.blk.14.ffn_up.output_min
    F32
    [1]
  • v.blk.14.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.14.ln1.weight
    F32
    [768]
  • v.blk.14.ln2.weight
    F32
    [768]
  • v.blk.15
  • v.blk.15.attn_k.input_max
    F32
    [1]
  • v.blk.15.attn_k.input_min
    F32
    [1]
  • v.blk.15.attn_k.output_max
    F32
    [1]
  • v.blk.15.attn_k.output_min
    F32
    [1]
  • v.blk.15.attn_k.weight
    F16
    [768, 768]
  • v.blk.15.attn_k_norm.weight
    F32
    [64]
  • v.blk.15.attn_out.input_max
    F32
    [1]
  • v.blk.15.attn_out.input_min
    F32
    [1]
  • v.blk.15.attn_out.output_max
    F32
    [1]
  • v.blk.15.attn_out.output_min
    F32
    [1]
  • v.blk.15.attn_out.weight
    F16
    [768, 768]
  • v.blk.15.attn_post_norm.weight
    F32
    [768]
  • v.blk.15.attn_q.input_max
    F32
    [1]
  • v.blk.15.attn_q.input_min
    F32
    [1]
  • v.blk.15.attn_q.output_max
    F32
    [1]
  • v.blk.15.attn_q.output_min
    F32
    [1]
  • v.blk.15.attn_q.weight
    F16
    [768, 768]
  • v.blk.15.attn_q_norm.weight
    F32
    [64]
  • v.blk.15.attn_v.input_max
    F32
    [1]
  • v.blk.15.attn_v.input_min
    F32
    [1]
  • v.blk.15.attn_v.output_max
    F32
    [1]
  • v.blk.15.attn_v.output_min
    F32
    [1]
  • v.blk.15.attn_v.weight
    F16
    [768, 768]
  • v.blk.15.ffn_down.input_max
    F32
    [1]
  • v.blk.15.ffn_down.input_min
    F32
    [1]
  • v.blk.15.ffn_down.output_max
    F32
    [1]
  • v.blk.15.ffn_down.output_min
    F32
    [1]
  • v.blk.15.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.15.ffn_gate.input_max
    F32
    [1]
  • v.blk.15.ffn_gate.input_min
    F32
    [1]
  • v.blk.15.ffn_gate.output_max
    F32
    [1]
  • v.blk.15.ffn_gate.output_min
    F32
    [1]
  • v.blk.15.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.15.ffn_post_norm.weight
    F32
    [768]
  • v.blk.15.ffn_up.input_max
    F32
    [1]
  • v.blk.15.ffn_up.input_min
    F32
    [1]
  • v.blk.15.ffn_up.output_max
    F32
    [1]
  • v.blk.15.ffn_up.output_min
    F32
    [1]
  • v.blk.15.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.15.ln1.weight
    F32
    [768]
  • v.blk.15.ln2.weight
    F32
    [768]
  • v.patch_embd.weight
    F16
    [16, 16, 3, 768]
  • v.position_embd.weight
    F32
    [768, 10240, 2]
  • output_norm.weight
    F32
    [2560]