16.3K 21 minutes ago

Gemma 4 Turbo is an optimized version of Google's Gemma 4 (9B) model, achieving 51% faster CPU inference through int4 quantization and performance tuning. Ideal for local AI assistants, tool calling, and chat applications on Windows systems without GPU.

vision tools thinking audio e2b e4b 26b 31b
587f7c53d95e · 4.3GB
    Metadata
  • general.architecture
    gemma4
  • general.file_type
    IQ4_XS
  • gemma4.attention.head_count
    8
  • gemma4.attention.head_count_kv
    1
  • gemma4.attention.key_length
    512
  • gemma4.attention.key_length_swa
    256
  • gemma4.attention.layer_norm_rms_epsilon
    1e-06
  • gemma4.attention.shared_kv_layers
    20
  • gemma4.attention.sliding_window
    512
  • gemma4.attention.sliding_window_pattern
    [true, true, true, true, false, ...]
  • gemma4.attention.value_length
    512
  • gemma4.attention.value_length_swa
    256
  • gemma4.audio.attention.head_count
    8
  • gemma4.audio.attention.layer_norm_epsilon
    1e-06
  • gemma4.audio.block_count
    12
  • gemma4.audio.conv_kernel_size
    5
  • gemma4.audio.embedding_length
    1024
  • gemma4.audio.feed_forward_length
    4096
  • gemma4.block_count
    35
  • gemma4.context_length
    131072
  • gemma4.embedding_length
    1536
  • gemma4.embedding_length_per_layer_input
    256
  • gemma4.feed_forward_length
    [6144, 6144, 6144, 6144, 6144, ...]
  • gemma4.final_logit_softcapping
    30
  • gemma4.rope.dimension_count
    512
  • gemma4.rope.dimension_count_swa
    256
  • gemma4.rope.freq_base
    1e+06
  • gemma4.rope.freq_base_swa
    10000
  • gemma4.vision.attention.head_count
    12
  • gemma4.vision.attention.layer_norm_epsilon
    1e-06
  • gemma4.vision.block_count
    16
  • gemma4.vision.embedding_length
    768
  • gemma4.vision.feed_forward_length
    3072
  • gemma4.vision.num_channels
    3
  • gemma4.vision.patch_size
    16
  • gemma4.vision.projector.scale_factor
    3
  • tokenizer.ggml.add_bos_token
    true
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    2
  • tokenizer.ggml.eos_token_id
    1
  • tokenizer.ggml.mask_token_id
    4
  • tokenizer.ggml.merges
    [ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
  • tokenizer.ggml.model
    gemma4
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.scores
    [-1000, -1000, -1000, -1000, -1000, ...]
  • tokenizer.ggml.token_type
    [3, 3, 3, 3, 3, ...]
  • tokenizer.ggml.tokens
    [<pad>, <eos>, <bos>, <unk>, <mask>, ...]
  • tokenizer.ggml.unknown_token_id
    3
  • Tensor
  • token_embd.weight
    Q6_K
    [1536, 262144]
  • a.blk.0
  • a.blk.0.attn_k.input_max
    F32
    [1]
  • a.blk.0.attn_k.input_min
    F32
    [1]
  • a.blk.0.attn_k.output_max
    F32
    [1]
  • a.blk.0.attn_k.output_min
    F32
    [1]
  • a.blk.0.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.0.attn_out.input_max
    F32
    [1]
  • a.blk.0.attn_out.input_min
    F32
    [1]
  • a.blk.0.attn_out.output_max
    F32
    [1]
  • a.blk.0.attn_out.output_min
    F32
    [1]
  • a.blk.0.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.0.attn_q.input_max
    F32
    [1]
  • a.blk.0.attn_q.input_min
    F32
    [1]
  • a.blk.0.attn_q.output_max
    F32
    [1]
  • a.blk.0.attn_q.output_min
    F32
    [1]
  • a.blk.0.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.0.attn_v.input_max
    F32
    [1]
  • a.blk.0.attn_v.input_min
    F32
    [1]
  • a.blk.0.attn_v.output_max
    F32
    [1]
  • a.blk.0.attn_v.output_min
    F32
    [1]
  • a.blk.0.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.0.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.0.conv_norm.weight
    F32
    [1024]
  • a.blk.0.conv_pw1.input_max
    F32
    [1]
  • a.blk.0.conv_pw1.input_min
    F32
    [1]
  • a.blk.0.conv_pw1.output_max
    F32
    [1]
  • a.blk.0.conv_pw1.output_min
    F32
    [1]
  • a.blk.0.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.0.conv_pw2.input_max
    F32
    [1]
  • a.blk.0.conv_pw2.input_min
    F32
    [1]
  • a.blk.0.conv_pw2.output_max
    F32
    [1]
  • a.blk.0.conv_pw2.output_min
    F32
    [1]
  • a.blk.0.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.0.ffn_down.input_max
    F32
    [1]
  • a.blk.0.ffn_down.input_min
    F32
    [1]
  • a.blk.0.ffn_down.output_max
    F32
    [1]
  • a.blk.0.ffn_down.output_min
    F32
    [1]
  • a.blk.0.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.0.ffn_down_1.input_max
    F32
    [1]
  • a.blk.0.ffn_down_1.input_min
    F32
    [1]
  • a.blk.0.ffn_down_1.output_max
    F32
    [1]
  • a.blk.0.ffn_down_1.output_min
    F32
    [1]
  • a.blk.0.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.0.ffn_norm.weight
    F32
    [1024]
  • a.blk.0.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.0.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.0.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.0.ffn_up.input_max
    F32
    [1]
  • a.blk.0.ffn_up.input_min
    F32
    [1]
  • a.blk.0.ffn_up.output_max
    F32
    [1]
  • a.blk.0.ffn_up.output_min
    F32
    [1]
  • a.blk.0.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.0.ffn_up_1.input_max
    F32
    [1]
  • a.blk.0.ffn_up_1.input_min
    F32
    [1]
  • a.blk.0.ffn_up_1.output_max
    F32
    [1]
  • a.blk.0.ffn_up_1.output_min
    F32
    [1]
  • a.blk.0.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.0.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.0.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.0.ln1.weight
    F32
    [1024]
  • a.blk.0.ln2.weight
    F32
    [1024]
  • a.blk.0.norm_conv.weight
    F32
    [1024]
  • a.blk.0.per_dim_scale.weight
    F32
    [128]
  • a.blk.1
  • a.blk.1.attn_k.input_max
    F32
    [1]
  • a.blk.1.attn_k.input_min
    F32
    [1]
  • a.blk.1.attn_k.output_max
    F32
    [1]
  • a.blk.1.attn_k.output_min
    F32
    [1]
  • a.blk.1.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.1.attn_out.input_max
    F32
    [1]
  • a.blk.1.attn_out.input_min
    F32
    [1]
  • a.blk.1.attn_out.output_max
    F32
    [1]
  • a.blk.1.attn_out.output_min
    F32
    [1]
  • a.blk.1.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.1.attn_q.input_max
    F32
    [1]
  • a.blk.1.attn_q.input_min
    F32
    [1]
  • a.blk.1.attn_q.output_max
    F32
    [1]
  • a.blk.1.attn_q.output_min
    F32
    [1]
  • a.blk.1.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.1.attn_v.input_max
    F32
    [1]
  • a.blk.1.attn_v.input_min
    F32
    [1]
  • a.blk.1.attn_v.output_max
    F32
    [1]
  • a.blk.1.attn_v.output_min
    F32
    [1]
  • a.blk.1.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.1.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.1.conv_norm.weight
    F32
    [1024]
  • a.blk.1.conv_pw1.input_max
    F32
    [1]
  • a.blk.1.conv_pw1.input_min
    F32
    [1]
  • a.blk.1.conv_pw1.output_max
    F32
    [1]
  • a.blk.1.conv_pw1.output_min
    F32
    [1]
  • a.blk.1.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.1.conv_pw2.input_max
    F32
    [1]
  • a.blk.1.conv_pw2.input_min
    F32
    [1]
  • a.blk.1.conv_pw2.output_max
    F32
    [1]
  • a.blk.1.conv_pw2.output_min
    F32
    [1]
  • a.blk.1.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.1.ffn_down.input_max
    F32
    [1]
  • a.blk.1.ffn_down.input_min
    F32
    [1]
  • a.blk.1.ffn_down.output_max
    F32
    [1]
  • a.blk.1.ffn_down.output_min
    F32
    [1]
  • a.blk.1.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.1.ffn_down_1.input_max
    F32
    [1]
  • a.blk.1.ffn_down_1.input_min
    F32
    [1]
  • a.blk.1.ffn_down_1.output_max
    F32
    [1]
  • a.blk.1.ffn_down_1.output_min
    F32
    [1]
  • a.blk.1.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.1.ffn_norm.weight
    F32
    [1024]
  • a.blk.1.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.1.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.1.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.1.ffn_up.input_max
    F32
    [1]
  • a.blk.1.ffn_up.input_min
    F32
    [1]
  • a.blk.1.ffn_up.output_max
    F32
    [1]
  • a.blk.1.ffn_up.output_min
    F32
    [1]
  • a.blk.1.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.1.ffn_up_1.input_max
    F32
    [1]
  • a.blk.1.ffn_up_1.input_min
    F32
    [1]
  • a.blk.1.ffn_up_1.output_max
    F32
    [1]
  • a.blk.1.ffn_up_1.output_min
    F32
    [1]
  • a.blk.1.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.1.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.1.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.1.ln1.weight
    F32
    [1024]
  • a.blk.1.ln2.weight
    F32
    [1024]
  • a.blk.1.norm_conv.weight
    F32
    [1024]
  • a.blk.1.per_dim_scale.weight
    F32
    [128]
  • a.blk.2
  • a.blk.2.attn_k.input_max
    F32
    [1]
  • a.blk.2.attn_k.input_min
    F32
    [1]
  • a.blk.2.attn_k.output_max
    F32
    [1]
  • a.blk.2.attn_k.output_min
    F32
    [1]
  • a.blk.2.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.2.attn_out.input_max
    F32
    [1]
  • a.blk.2.attn_out.input_min
    F32
    [1]
  • a.blk.2.attn_out.output_max
    F32
    [1]
  • a.blk.2.attn_out.output_min
    F32
    [1]
  • a.blk.2.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.2.attn_q.input_max
    F32
    [1]
  • a.blk.2.attn_q.input_min
    F32
    [1]
  • a.blk.2.attn_q.output_max
    F32
    [1]
  • a.blk.2.attn_q.output_min
    F32
    [1]
  • a.blk.2.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.2.attn_v.input_max
    F32
    [1]
  • a.blk.2.attn_v.input_min
    F32
    [1]
  • a.blk.2.attn_v.output_max
    F32
    [1]
  • a.blk.2.attn_v.output_min
    F32
    [1]
  • a.blk.2.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.2.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.2.conv_norm.weight
    F32
    [1024]
  • a.blk.2.conv_pw1.input_max
    F32
    [1]
  • a.blk.2.conv_pw1.input_min
    F32
    [1]
  • a.blk.2.conv_pw1.output_max
    F32
    [1]
  • a.blk.2.conv_pw1.output_min
    F32
    [1]
  • a.blk.2.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.2.conv_pw2.input_max
    F32
    [1]
  • a.blk.2.conv_pw2.input_min
    F32
    [1]
  • a.blk.2.conv_pw2.output_max
    F32
    [1]
  • a.blk.2.conv_pw2.output_min
    F32
    [1]
  • a.blk.2.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.2.ffn_down.input_max
    F32
    [1]
  • a.blk.2.ffn_down.input_min
    F32
    [1]
  • a.blk.2.ffn_down.output_max
    F32
    [1]
  • a.blk.2.ffn_down.output_min
    F32
    [1]
  • a.blk.2.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.2.ffn_down_1.input_max
    F32
    [1]
  • a.blk.2.ffn_down_1.input_min
    F32
    [1]
  • a.blk.2.ffn_down_1.output_max
    F32
    [1]
  • a.blk.2.ffn_down_1.output_min
    F32
    [1]
  • a.blk.2.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.2.ffn_norm.weight
    F32
    [1024]
  • a.blk.2.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.2.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.2.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.2.ffn_up.input_max
    F32
    [1]
  • a.blk.2.ffn_up.input_min
    F32
    [1]
  • a.blk.2.ffn_up.output_max
    F32
    [1]
  • a.blk.2.ffn_up.output_min
    F32
    [1]
  • a.blk.2.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.2.ffn_up_1.input_max
    F32
    [1]
  • a.blk.2.ffn_up_1.input_min
    F32
    [1]
  • a.blk.2.ffn_up_1.output_max
    F32
    [1]
  • a.blk.2.ffn_up_1.output_min
    F32
    [1]
  • a.blk.2.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.2.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.2.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.2.ln1.weight
    F32
    [1024]
  • a.blk.2.ln2.weight
    F32
    [1024]
  • a.blk.2.norm_conv.weight
    F32
    [1024]
  • a.blk.2.per_dim_scale.weight
    F32
    [128]
  • a.blk.3
  • a.blk.3.attn_k.input_max
    F32
    [1]
  • a.blk.3.attn_k.input_min
    F32
    [1]
  • a.blk.3.attn_k.output_max
    F32
    [1]
  • a.blk.3.attn_k.output_min
    F32
    [1]
  • a.blk.3.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.3.attn_out.input_max
    F32
    [1]
  • a.blk.3.attn_out.input_min
    F32
    [1]
  • a.blk.3.attn_out.output_max
    F32
    [1]
  • a.blk.3.attn_out.output_min
    F32
    [1]
  • a.blk.3.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.3.attn_q.input_max
    F32
    [1]
  • a.blk.3.attn_q.input_min
    F32
    [1]
  • a.blk.3.attn_q.output_max
    F32
    [1]
  • a.blk.3.attn_q.output_min
    F32
    [1]
  • a.blk.3.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.3.attn_v.input_max
    F32
    [1]
  • a.blk.3.attn_v.input_min
    F32
    [1]
  • a.blk.3.attn_v.output_max
    F32
    [1]
  • a.blk.3.attn_v.output_min
    F32
    [1]
  • a.blk.3.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.3.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.3.conv_norm.weight
    F32
    [1024]
  • a.blk.3.conv_pw1.input_max
    F32
    [1]
  • a.blk.3.conv_pw1.input_min
    F32
    [1]
  • a.blk.3.conv_pw1.output_max
    F32
    [1]
  • a.blk.3.conv_pw1.output_min
    F32
    [1]
  • a.blk.3.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.3.conv_pw2.input_max
    F32
    [1]
  • a.blk.3.conv_pw2.input_min
    F32
    [1]
  • a.blk.3.conv_pw2.output_max
    F32
    [1]
  • a.blk.3.conv_pw2.output_min
    F32
    [1]
  • a.blk.3.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.3.ffn_down.input_max
    F32
    [1]
  • a.blk.3.ffn_down.input_min
    F32
    [1]
  • a.blk.3.ffn_down.output_max
    F32
    [1]
  • a.blk.3.ffn_down.output_min
    F32
    [1]
  • a.blk.3.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.3.ffn_down_1.input_max
    F32
    [1]
  • a.blk.3.ffn_down_1.input_min
    F32
    [1]
  • a.blk.3.ffn_down_1.output_max
    F32
    [1]
  • a.blk.3.ffn_down_1.output_min
    F32
    [1]
  • a.blk.3.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.3.ffn_norm.weight
    F32
    [1024]
  • a.blk.3.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.3.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.3.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.3.ffn_up.input_max
    F32
    [1]
  • a.blk.3.ffn_up.input_min
    F32
    [1]
  • a.blk.3.ffn_up.output_max
    F32
    [1]
  • a.blk.3.ffn_up.output_min
    F32
    [1]
  • a.blk.3.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.3.ffn_up_1.input_max
    F32
    [1]
  • a.blk.3.ffn_up_1.input_min
    F32
    [1]
  • a.blk.3.ffn_up_1.output_max
    F32
    [1]
  • a.blk.3.ffn_up_1.output_min
    F32
    [1]
  • a.blk.3.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.3.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.3.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.3.ln1.weight
    F32
    [1024]
  • a.blk.3.ln2.weight
    F32
    [1024]
  • a.blk.3.norm_conv.weight
    F32
    [1024]
  • a.blk.3.per_dim_scale.weight
    F32
    [128]
  • a.blk.4
  • a.blk.4.attn_k.input_max
    F32
    [1]
  • a.blk.4.attn_k.input_min
    F32
    [1]
  • a.blk.4.attn_k.output_max
    F32
    [1]
  • a.blk.4.attn_k.output_min
    F32
    [1]
  • a.blk.4.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.4.attn_out.input_max
    F32
    [1]
  • a.blk.4.attn_out.input_min
    F32
    [1]
  • a.blk.4.attn_out.output_max
    F32
    [1]
  • a.blk.4.attn_out.output_min
    F32
    [1]
  • a.blk.4.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.4.attn_q.input_max
    F32
    [1]
  • a.blk.4.attn_q.input_min
    F32
    [1]
  • a.blk.4.attn_q.output_max
    F32
    [1]
  • a.blk.4.attn_q.output_min
    F32
    [1]
  • a.blk.4.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.4.attn_v.input_max
    F32
    [1]
  • a.blk.4.attn_v.input_min
    F32
    [1]
  • a.blk.4.attn_v.output_max
    F32
    [1]
  • a.blk.4.attn_v.output_min
    F32
    [1]
  • a.blk.4.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.4.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.4.conv_norm.weight
    F32
    [1024]
  • a.blk.4.conv_pw1.input_max
    F32
    [1]
  • a.blk.4.conv_pw1.input_min
    F32
    [1]
  • a.blk.4.conv_pw1.output_max
    F32
    [1]
  • a.blk.4.conv_pw1.output_min
    F32
    [1]
  • a.blk.4.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.4.conv_pw2.input_max
    F32
    [1]
  • a.blk.4.conv_pw2.input_min
    F32
    [1]
  • a.blk.4.conv_pw2.output_max
    F32
    [1]
  • a.blk.4.conv_pw2.output_min
    F32
    [1]
  • a.blk.4.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.4.ffn_down.input_max
    F32
    [1]
  • a.blk.4.ffn_down.input_min
    F32
    [1]
  • a.blk.4.ffn_down.output_max
    F32
    [1]
  • a.blk.4.ffn_down.output_min
    F32
    [1]
  • a.blk.4.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.4.ffn_down_1.input_max
    F32
    [1]
  • a.blk.4.ffn_down_1.input_min
    F32
    [1]
  • a.blk.4.ffn_down_1.output_max
    F32
    [1]
  • a.blk.4.ffn_down_1.output_min
    F32
    [1]
  • a.blk.4.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.4.ffn_norm.weight
    F32
    [1024]
  • a.blk.4.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.4.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.4.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.4.ffn_up.input_max
    F32
    [1]
  • a.blk.4.ffn_up.input_min
    F32
    [1]
  • a.blk.4.ffn_up.output_max
    F32
    [1]
  • a.blk.4.ffn_up.output_min
    F32
    [1]
  • a.blk.4.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.4.ffn_up_1.input_max
    F32
    [1]
  • a.blk.4.ffn_up_1.input_min
    F32
    [1]
  • a.blk.4.ffn_up_1.output_max
    F32
    [1]
  • a.blk.4.ffn_up_1.output_min
    F32
    [1]
  • a.blk.4.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.4.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.4.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.4.ln1.weight
    F32
    [1024]
  • a.blk.4.ln2.weight
    F32
    [1024]
  • a.blk.4.norm_conv.weight
    F32
    [1024]
  • a.blk.4.per_dim_scale.weight
    F32
    [128]
  • a.blk.5
  • a.blk.5.attn_k.input_max
    F32
    [1]
  • a.blk.5.attn_k.input_min
    F32
    [1]
  • a.blk.5.attn_k.output_max
    F32
    [1]
  • a.blk.5.attn_k.output_min
    F32
    [1]
  • a.blk.5.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.5.attn_out.input_max
    F32
    [1]
  • a.blk.5.attn_out.input_min
    F32
    [1]
  • a.blk.5.attn_out.output_max
    F32
    [1]
  • a.blk.5.attn_out.output_min
    F32
    [1]
  • a.blk.5.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.5.attn_q.input_max
    F32
    [1]
  • a.blk.5.attn_q.input_min
    F32
    [1]
  • a.blk.5.attn_q.output_max
    F32
    [1]
  • a.blk.5.attn_q.output_min
    F32
    [1]
  • a.blk.5.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.5.attn_v.input_max
    F32
    [1]
  • a.blk.5.attn_v.input_min
    F32
    [1]
  • a.blk.5.attn_v.output_max
    F32
    [1]
  • a.blk.5.attn_v.output_min
    F32
    [1]
  • a.blk.5.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.5.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.5.conv_norm.weight
    F32
    [1024]
  • a.blk.5.conv_pw1.input_max
    F32
    [1]
  • a.blk.5.conv_pw1.input_min
    F32
    [1]
  • a.blk.5.conv_pw1.output_max
    F32
    [1]
  • a.blk.5.conv_pw1.output_min
    F32
    [1]
  • a.blk.5.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.5.conv_pw2.input_max
    F32
    [1]
  • a.blk.5.conv_pw2.input_min
    F32
    [1]
  • a.blk.5.conv_pw2.output_max
    F32
    [1]
  • a.blk.5.conv_pw2.output_min
    F32
    [1]
  • a.blk.5.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.5.ffn_down.input_max
    F32
    [1]
  • a.blk.5.ffn_down.input_min
    F32
    [1]
  • a.blk.5.ffn_down.output_max
    F32
    [1]
  • a.blk.5.ffn_down.output_min
    F32
    [1]
  • a.blk.5.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.5.ffn_down_1.input_max
    F32
    [1]
  • a.blk.5.ffn_down_1.input_min
    F32
    [1]
  • a.blk.5.ffn_down_1.output_max
    F32
    [1]
  • a.blk.5.ffn_down_1.output_min
    F32
    [1]
  • a.blk.5.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.5.ffn_norm.weight
    F32
    [1024]
  • a.blk.5.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.5.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.5.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.5.ffn_up.input_max
    F32
    [1]
  • a.blk.5.ffn_up.input_min
    F32
    [1]
  • a.blk.5.ffn_up.output_max
    F32
    [1]
  • a.blk.5.ffn_up.output_min
    F32
    [1]
  • a.blk.5.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.5.ffn_up_1.input_max
    F32
    [1]
  • a.blk.5.ffn_up_1.input_min
    F32
    [1]
  • a.blk.5.ffn_up_1.output_max
    F32
    [1]
  • a.blk.5.ffn_up_1.output_min
    F32
    [1]
  • a.blk.5.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.5.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.5.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.5.ln1.weight
    F32
    [1024]
  • a.blk.5.ln2.weight
    F32
    [1024]
  • a.blk.5.norm_conv.weight
    F32
    [1024]
  • a.blk.5.per_dim_scale.weight
    F32
    [128]
  • a.blk.6
  • a.blk.6.attn_k.input_max
    F32
    [1]
  • a.blk.6.attn_k.input_min
    F32
    [1]
  • a.blk.6.attn_k.output_max
    F32
    [1]
  • a.blk.6.attn_k.output_min
    F32
    [1]
  • a.blk.6.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.6.attn_out.input_max
    F32
    [1]
  • a.blk.6.attn_out.input_min
    F32
    [1]
  • a.blk.6.attn_out.output_max
    F32
    [1]
  • a.blk.6.attn_out.output_min
    F32
    [1]
  • a.blk.6.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.6.attn_q.input_max
    F32
    [1]
  • a.blk.6.attn_q.input_min
    F32
    [1]
  • a.blk.6.attn_q.output_max
    F32
    [1]
  • a.blk.6.attn_q.output_min
    F32
    [1]
  • a.blk.6.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.6.attn_v.input_max
    F32
    [1]
  • a.blk.6.attn_v.input_min
    F32
    [1]
  • a.blk.6.attn_v.output_max
    F32
    [1]
  • a.blk.6.attn_v.output_min
    F32
    [1]
  • a.blk.6.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.6.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.6.conv_norm.weight
    F32
    [1024]
  • a.blk.6.conv_pw1.input_max
    F32
    [1]
  • a.blk.6.conv_pw1.input_min
    F32
    [1]
  • a.blk.6.conv_pw1.output_max
    F32
    [1]
  • a.blk.6.conv_pw1.output_min
    F32
    [1]
  • a.blk.6.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.6.conv_pw2.input_max
    F32
    [1]
  • a.blk.6.conv_pw2.input_min
    F32
    [1]
  • a.blk.6.conv_pw2.output_max
    F32
    [1]
  • a.blk.6.conv_pw2.output_min
    F32
    [1]
  • a.blk.6.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.6.ffn_down.input_max
    F32
    [1]
  • a.blk.6.ffn_down.input_min
    F32
    [1]
  • a.blk.6.ffn_down.output_max
    F32
    [1]
  • a.blk.6.ffn_down.output_min
    F32
    [1]
  • a.blk.6.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.6.ffn_down_1.input_max
    F32
    [1]
  • a.blk.6.ffn_down_1.input_min
    F32
    [1]
  • a.blk.6.ffn_down_1.output_max
    F32
    [1]
  • a.blk.6.ffn_down_1.output_min
    F32
    [1]
  • a.blk.6.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.6.ffn_norm.weight
    F32
    [1024]
  • a.blk.6.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.6.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.6.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.6.ffn_up.input_max
    F32
    [1]
  • a.blk.6.ffn_up.input_min
    F32
    [1]
  • a.blk.6.ffn_up.output_max
    F32
    [1]
  • a.blk.6.ffn_up.output_min
    F32
    [1]
  • a.blk.6.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.6.ffn_up_1.input_max
    F32
    [1]
  • a.blk.6.ffn_up_1.input_min
    F32
    [1]
  • a.blk.6.ffn_up_1.output_max
    F32
    [1]
  • a.blk.6.ffn_up_1.output_min
    F32
    [1]
  • a.blk.6.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.6.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.6.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.6.ln1.weight
    F32
    [1024]
  • a.blk.6.ln2.weight
    F32
    [1024]
  • a.blk.6.norm_conv.weight
    F32
    [1024]
  • a.blk.6.per_dim_scale.weight
    F32
    [128]
  • a.blk.7
  • a.blk.7.attn_k.input_max
    F32
    [1]
  • a.blk.7.attn_k.input_min
    F32
    [1]
  • a.blk.7.attn_k.output_max
    F32
    [1]
  • a.blk.7.attn_k.output_min
    F32
    [1]
  • a.blk.7.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.7.attn_out.input_max
    F32
    [1]
  • a.blk.7.attn_out.input_min
    F32
    [1]
  • a.blk.7.attn_out.output_max
    F32
    [1]
  • a.blk.7.attn_out.output_min
    F32
    [1]
  • a.blk.7.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.7.attn_q.input_max
    F32
    [1]
  • a.blk.7.attn_q.input_min
    F32
    [1]
  • a.blk.7.attn_q.output_max
    F32
    [1]
  • a.blk.7.attn_q.output_min
    F32
    [1]
  • a.blk.7.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.7.attn_v.input_max
    F32
    [1]
  • a.blk.7.attn_v.input_min
    F32
    [1]
  • a.blk.7.attn_v.output_max
    F32
    [1]
  • a.blk.7.attn_v.output_min
    F32
    [1]
  • a.blk.7.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.7.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.7.conv_norm.weight
    F32
    [1024]
  • a.blk.7.conv_pw1.input_max
    F32
    [1]
  • a.blk.7.conv_pw1.input_min
    F32
    [1]
  • a.blk.7.conv_pw1.output_max
    F32
    [1]
  • a.blk.7.conv_pw1.output_min
    F32
    [1]
  • a.blk.7.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.7.conv_pw2.input_max
    F32
    [1]
  • a.blk.7.conv_pw2.input_min
    F32
    [1]
  • a.blk.7.conv_pw2.output_max
    F32
    [1]
  • a.blk.7.conv_pw2.output_min
    F32
    [1]
  • a.blk.7.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.7.ffn_down.input_max
    F32
    [1]
  • a.blk.7.ffn_down.input_min
    F32
    [1]
  • a.blk.7.ffn_down.output_max
    F32
    [1]
  • a.blk.7.ffn_down.output_min
    F32
    [1]
  • a.blk.7.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.7.ffn_down_1.input_max
    F32
    [1]
  • a.blk.7.ffn_down_1.input_min
    F32
    [1]
  • a.blk.7.ffn_down_1.output_max
    F32
    [1]
  • a.blk.7.ffn_down_1.output_min
    F32
    [1]
  • a.blk.7.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.7.ffn_norm.weight
    F32
    [1024]
  • a.blk.7.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.7.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.7.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.7.ffn_up.input_max
    F32
    [1]
  • a.blk.7.ffn_up.input_min
    F32
    [1]
  • a.blk.7.ffn_up.output_max
    F32
    [1]
  • a.blk.7.ffn_up.output_min
    F32
    [1]
  • a.blk.7.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.7.ffn_up_1.input_max
    F32
    [1]
  • a.blk.7.ffn_up_1.input_min
    F32
    [1]
  • a.blk.7.ffn_up_1.output_max
    F32
    [1]
  • a.blk.7.ffn_up_1.output_min
    F32
    [1]
  • a.blk.7.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.7.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.7.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.7.ln1.weight
    F32
    [1024]
  • a.blk.7.ln2.weight
    F32
    [1024]
  • a.blk.7.norm_conv.weight
    F32
    [1024]
  • a.blk.7.per_dim_scale.weight
    F32
    [128]
  • a.blk.8
  • a.blk.8.attn_k.input_max
    F32
    [1]
  • a.blk.8.attn_k.input_min
    F32
    [1]
  • a.blk.8.attn_k.output_max
    F32
    [1]
  • a.blk.8.attn_k.output_min
    F32
    [1]
  • a.blk.8.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.8.attn_out.input_max
    F32
    [1]
  • a.blk.8.attn_out.input_min
    F32
    [1]
  • a.blk.8.attn_out.output_max
    F32
    [1]
  • a.blk.8.attn_out.output_min
    F32
    [1]
  • a.blk.8.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.8.attn_q.input_max
    F32
    [1]
  • a.blk.8.attn_q.input_min
    F32
    [1]
  • a.blk.8.attn_q.output_max
    F32
    [1]
  • a.blk.8.attn_q.output_min
    F32
    [1]
  • a.blk.8.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.8.attn_v.input_max
    F32
    [1]
  • a.blk.8.attn_v.input_min
    F32
    [1]
  • a.blk.8.attn_v.output_max
    F32
    [1]
  • a.blk.8.attn_v.output_min
    F32
    [1]
  • a.blk.8.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.8.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.8.conv_norm.weight
    F32
    [1024]
  • a.blk.8.conv_pw1.input_max
    F32
    [1]
  • a.blk.8.conv_pw1.input_min
    F32
    [1]
  • a.blk.8.conv_pw1.output_max
    F32
    [1]
  • a.blk.8.conv_pw1.output_min
    F32
    [1]
  • a.blk.8.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.8.conv_pw2.input_max
    F32
    [1]
  • a.blk.8.conv_pw2.input_min
    F32
    [1]
  • a.blk.8.conv_pw2.output_max
    F32
    [1]
  • a.blk.8.conv_pw2.output_min
    F32
    [1]
  • a.blk.8.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.8.ffn_down.input_max
    F32
    [1]
  • a.blk.8.ffn_down.input_min
    F32
    [1]
  • a.blk.8.ffn_down.output_max
    F32
    [1]
  • a.blk.8.ffn_down.output_min
    F32
    [1]
  • a.blk.8.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.8.ffn_down_1.input_max
    F32
    [1]
  • a.blk.8.ffn_down_1.input_min
    F32
    [1]
  • a.blk.8.ffn_down_1.output_max
    F32
    [1]
  • a.blk.8.ffn_down_1.output_min
    F32
    [1]
  • a.blk.8.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.8.ffn_norm.weight
    F32
    [1024]
  • a.blk.8.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.8.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.8.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.8.ffn_up.input_max
    F32
    [1]
  • a.blk.8.ffn_up.input_min
    F32
    [1]
  • a.blk.8.ffn_up.output_max
    F32
    [1]
  • a.blk.8.ffn_up.output_min
    F32
    [1]
  • a.blk.8.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.8.ffn_up_1.input_max
    F32
    [1]
  • a.blk.8.ffn_up_1.input_min
    F32
    [1]
  • a.blk.8.ffn_up_1.output_max
    F32
    [1]
  • a.blk.8.ffn_up_1.output_min
    F32
    [1]
  • a.blk.8.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.8.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.8.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.8.ln1.weight
    F32
    [1024]
  • a.blk.8.ln2.weight
    F32
    [1024]
  • a.blk.8.norm_conv.weight
    F32
    [1024]
  • a.blk.8.per_dim_scale.weight
    F32
    [128]
  • a.blk.9
  • a.blk.9.attn_k.input_max
    F32
    [1]
  • a.blk.9.attn_k.input_min
    F32
    [1]
  • a.blk.9.attn_k.output_max
    F32
    [1]
  • a.blk.9.attn_k.output_min
    F32
    [1]
  • a.blk.9.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.9.attn_out.input_max
    F32
    [1]
  • a.blk.9.attn_out.input_min
    F32
    [1]
  • a.blk.9.attn_out.output_max
    F32
    [1]
  • a.blk.9.attn_out.output_min
    F32
    [1]
  • a.blk.9.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.9.attn_q.input_max
    F32
    [1]
  • a.blk.9.attn_q.input_min
    F32
    [1]
  • a.blk.9.attn_q.output_max
    F32
    [1]
  • a.blk.9.attn_q.output_min
    F32
    [1]
  • a.blk.9.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.9.attn_v.input_max
    F32
    [1]
  • a.blk.9.attn_v.input_min
    F32
    [1]
  • a.blk.9.attn_v.output_max
    F32
    [1]
  • a.blk.9.attn_v.output_min
    F32
    [1]
  • a.blk.9.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.9.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.9.conv_norm.weight
    F32
    [1024]
  • a.blk.9.conv_pw1.input_max
    F32
    [1]
  • a.blk.9.conv_pw1.input_min
    F32
    [1]
  • a.blk.9.conv_pw1.output_max
    F32
    [1]
  • a.blk.9.conv_pw1.output_min
    F32
    [1]
  • a.blk.9.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.9.conv_pw2.input_max
    F32
    [1]
  • a.blk.9.conv_pw2.input_min
    F32
    [1]
  • a.blk.9.conv_pw2.output_max
    F32
    [1]
  • a.blk.9.conv_pw2.output_min
    F32
    [1]
  • a.blk.9.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.9.ffn_down.input_max
    F32
    [1]
  • a.blk.9.ffn_down.input_min
    F32
    [1]
  • a.blk.9.ffn_down.output_max
    F32
    [1]
  • a.blk.9.ffn_down.output_min
    F32
    [1]
  • a.blk.9.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.9.ffn_down_1.input_max
    F32
    [1]
  • a.blk.9.ffn_down_1.input_min
    F32
    [1]
  • a.blk.9.ffn_down_1.output_max
    F32
    [1]
  • a.blk.9.ffn_down_1.output_min
    F32
    [1]
  • a.blk.9.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.9.ffn_norm.weight
    F32
    [1024]
  • a.blk.9.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.9.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.9.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.9.ffn_up.input_max
    F32
    [1]
  • a.blk.9.ffn_up.input_min
    F32
    [1]
  • a.blk.9.ffn_up.output_max
    F32
    [1]
  • a.blk.9.ffn_up.output_min
    F32
    [1]
  • a.blk.9.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.9.ffn_up_1.input_max
    F32
    [1]
  • a.blk.9.ffn_up_1.input_min
    F32
    [1]
  • a.blk.9.ffn_up_1.output_max
    F32
    [1]
  • a.blk.9.ffn_up_1.output_min
    F32
    [1]
  • a.blk.9.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.9.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.9.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.9.ln1.weight
    F32
    [1024]
  • a.blk.9.ln2.weight
    F32
    [1024]
  • a.blk.9.norm_conv.weight
    F32
    [1024]
  • a.blk.9.per_dim_scale.weight
    F32
    [128]
  • a.blk.10
  • a.blk.10.attn_k.input_max
    F32
    [1]
  • a.blk.10.attn_k.input_min
    F32
    [1]
  • a.blk.10.attn_k.output_max
    F32
    [1]
  • a.blk.10.attn_k.output_min
    F32
    [1]
  • a.blk.10.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.10.attn_out.input_max
    F32
    [1]
  • a.blk.10.attn_out.input_min
    F32
    [1]
  • a.blk.10.attn_out.output_max
    F32
    [1]
  • a.blk.10.attn_out.output_min
    F32
    [1]
  • a.blk.10.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.10.attn_q.input_max
    F32
    [1]
  • a.blk.10.attn_q.input_min
    F32
    [1]
  • a.blk.10.attn_q.output_max
    F32
    [1]
  • a.blk.10.attn_q.output_min
    F32
    [1]
  • a.blk.10.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.10.attn_v.input_max
    F32
    [1]
  • a.blk.10.attn_v.input_min
    F32
    [1]
  • a.blk.10.attn_v.output_max
    F32
    [1]
  • a.blk.10.attn_v.output_min
    F32
    [1]
  • a.blk.10.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.10.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.10.conv_norm.weight
    F32
    [1024]
  • a.blk.10.conv_pw1.input_max
    F32
    [1]
  • a.blk.10.conv_pw1.input_min
    F32
    [1]
  • a.blk.10.conv_pw1.output_max
    F32
    [1]
  • a.blk.10.conv_pw1.output_min
    F32
    [1]
  • a.blk.10.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.10.conv_pw2.input_max
    F32
    [1]
  • a.blk.10.conv_pw2.input_min
    F32
    [1]
  • a.blk.10.conv_pw2.output_max
    F32
    [1]
  • a.blk.10.conv_pw2.output_min
    F32
    [1]
  • a.blk.10.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.10.ffn_down.input_max
    F32
    [1]
  • a.blk.10.ffn_down.input_min
    F32
    [1]
  • a.blk.10.ffn_down.output_max
    F32
    [1]
  • a.blk.10.ffn_down.output_min
    F32
    [1]
  • a.blk.10.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.10.ffn_down_1.input_max
    F32
    [1]
  • a.blk.10.ffn_down_1.input_min
    F32
    [1]
  • a.blk.10.ffn_down_1.output_max
    F32
    [1]
  • a.blk.10.ffn_down_1.output_min
    F32
    [1]
  • a.blk.10.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.10.ffn_norm.weight
    F32
    [1024]
  • a.blk.10.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.10.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.10.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.10.ffn_up.input_max
    F32
    [1]
  • a.blk.10.ffn_up.input_min
    F32
    [1]
  • a.blk.10.ffn_up.output_max
    F32
    [1]
  • a.blk.10.ffn_up.output_min
    F32
    [1]
  • a.blk.10.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.10.ffn_up_1.input_max
    F32
    [1]
  • a.blk.10.ffn_up_1.input_min
    F32
    [1]
  • a.blk.10.ffn_up_1.output_max
    F32
    [1]
  • a.blk.10.ffn_up_1.output_min
    F32
    [1]
  • a.blk.10.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.10.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.10.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.10.ln1.weight
    F32
    [1024]
  • a.blk.10.ln2.weight
    F32
    [1024]
  • a.blk.10.norm_conv.weight
    F32
    [1024]
  • a.blk.10.per_dim_scale.weight
    F32
    [128]
  • a.blk.11
  • a.blk.11.attn_k.input_max
    F32
    [1]
  • a.blk.11.attn_k.input_min
    F32
    [1]
  • a.blk.11.attn_k.output_max
    F32
    [1]
  • a.blk.11.attn_k.output_min
    F32
    [1]
  • a.blk.11.attn_k.weight
    BF16
    [1024, 1024]
  • a.blk.11.attn_out.input_max
    F32
    [1]
  • a.blk.11.attn_out.input_min
    F32
    [1]
  • a.blk.11.attn_out.output_max
    F32
    [1]
  • a.blk.11.attn_out.output_min
    F32
    [1]
  • a.blk.11.attn_out.weight
    BF16
    [1024, 1024]
  • a.blk.11.attn_q.input_max
    F32
    [1]
  • a.blk.11.attn_q.input_min
    F32
    [1]
  • a.blk.11.attn_q.output_max
    F32
    [1]
  • a.blk.11.attn_q.output_min
    F32
    [1]
  • a.blk.11.attn_q.weight
    BF16
    [1024, 1024]
  • a.blk.11.attn_v.input_max
    F32
    [1]
  • a.blk.11.attn_v.input_min
    F32
    [1]
  • a.blk.11.attn_v.output_max
    F32
    [1]
  • a.blk.11.attn_v.output_min
    F32
    [1]
  • a.blk.11.attn_v.weight
    BF16
    [1024, 1024]
  • a.blk.11.conv_dw.weight
    F32
    [5, 1024]
  • a.blk.11.conv_norm.weight
    F32
    [1024]
  • a.blk.11.conv_pw1.input_max
    F32
    [1]
  • a.blk.11.conv_pw1.input_min
    F32
    [1]
  • a.blk.11.conv_pw1.output_max
    F32
    [1]
  • a.blk.11.conv_pw1.output_min
    F32
    [1]
  • a.blk.11.conv_pw1.weight
    BF16
    [1024, 2048]
  • a.blk.11.conv_pw2.input_max
    F32
    [1]
  • a.blk.11.conv_pw2.input_min
    F32
    [1]
  • a.blk.11.conv_pw2.output_max
    F32
    [1]
  • a.blk.11.conv_pw2.output_min
    F32
    [1]
  • a.blk.11.conv_pw2.weight
    BF16
    [1024, 1024]
  • a.blk.11.ffn_down.input_max
    F32
    [1]
  • a.blk.11.ffn_down.input_min
    F32
    [1]
  • a.blk.11.ffn_down.output_max
    F32
    [1]
  • a.blk.11.ffn_down.output_min
    F32
    [1]
  • a.blk.11.ffn_down.weight
    BF16
    [4096, 1024]
  • a.blk.11.ffn_down_1.input_max
    F32
    [1]
  • a.blk.11.ffn_down_1.input_min
    F32
    [1]
  • a.blk.11.ffn_down_1.output_max
    F32
    [1]
  • a.blk.11.ffn_down_1.output_min
    F32
    [1]
  • a.blk.11.ffn_down_1.weight
    BF16
    [4096, 1024]
  • a.blk.11.ffn_norm.weight
    F32
    [1024]
  • a.blk.11.ffn_norm_1.weight
    F32
    [1024]
  • a.blk.11.ffn_post_norm.weight
    F32
    [1024]
  • a.blk.11.ffn_post_norm_1.weight
    F32
    [1024]
  • a.blk.11.ffn_up.input_max
    F32
    [1]
  • a.blk.11.ffn_up.input_min
    F32
    [1]
  • a.blk.11.ffn_up.output_max
    F32
    [1]
  • a.blk.11.ffn_up.output_min
    F32
    [1]
  • a.blk.11.ffn_up.weight
    BF16
    [1024, 4096]
  • a.blk.11.ffn_up_1.input_max
    F32
    [1]
  • a.blk.11.ffn_up_1.input_min
    F32
    [1]
  • a.blk.11.ffn_up_1.output_max
    F32
    [1]
  • a.blk.11.ffn_up_1.output_min
    F32
    [1]
  • a.blk.11.ffn_up_1.weight
    BF16
    [1024, 4096]
  • a.blk.11.layer_pre_norm.weight
    F32
    [1024]
  • a.blk.11.linear_pos.weight
    BF16
    [1024, 1024]
  • a.blk.11.ln1.weight
    F32
    [1024]
  • a.blk.11.ln2.weight
    F32
    [1024]
  • a.blk.11.norm_conv.weight
    F32
    [1024]
  • a.blk.11.per_dim_scale.weight
    F32
    [128]
  • a.conv1d.0.norm.weight
    F32
    [128]
  • a.conv1d.0.weight
    F32
    [3, 3, 1, 128]
  • a.conv1d.1.norm.weight
    F32
    [32]
  • a.conv1d.1.weight
    F32
    [3, 3, 128, 32]
  • a.pre_encode.out.weight
    BF16
    [1024, 1024]
  • blk.0
  • blk.0.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.0.attn_k_norm.weight
    F32
    [256]
  • blk.0.attn_norm.weight
    F32
    [1536]
  • blk.0.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.0.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.0.attn_q_norm.weight
    F32
    [256]
  • blk.0.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.0.ffn_down.weight
    Q5_K
    [6144, 1536]
  • blk.0.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.0.ffn_norm.weight
    F32
    [1536]
  • blk.0.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.0.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.0.layer_output_scale.weight
    F32
    [1]
  • blk.0.post_attention_norm.weight
    F32
    [1536]
  • blk.0.post_ffw_norm.weight
    F32
    [1536]
  • blk.0.post_norm.weight
    F32
    [1536]
  • blk.0.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.1
  • blk.1.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.1.attn_k_norm.weight
    F32
    [256]
  • blk.1.attn_norm.weight
    F32
    [1536]
  • blk.1.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.1.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.1.attn_q_norm.weight
    F32
    [256]
  • blk.1.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.1.ffn_down.weight
    Q5_K
    [6144, 1536]
  • blk.1.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.1.ffn_norm.weight
    F32
    [1536]
  • blk.1.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.1.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.1.layer_output_scale.weight
    F32
    [1]
  • blk.1.post_attention_norm.weight
    F32
    [1536]
  • blk.1.post_ffw_norm.weight
    F32
    [1536]
  • blk.1.post_norm.weight
    F32
    [1536]
  • blk.1.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.2
  • blk.2.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.2.attn_k_norm.weight
    F32
    [256]
  • blk.2.attn_norm.weight
    F32
    [1536]
  • blk.2.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.2.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.2.attn_q_norm.weight
    F32
    [256]
  • blk.2.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.2.ffn_down.weight
    Q5_K
    [6144, 1536]
  • blk.2.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.2.ffn_norm.weight
    F32
    [1536]
  • blk.2.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.2.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.2.layer_output_scale.weight
    F32
    [1]
  • blk.2.post_attention_norm.weight
    F32
    [1536]
  • blk.2.post_ffw_norm.weight
    F32
    [1536]
  • blk.2.post_norm.weight
    F32
    [1536]
  • blk.2.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.3
  • blk.3.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.3.attn_k_norm.weight
    F32
    [256]
  • blk.3.attn_norm.weight
    F32
    [1536]
  • blk.3.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.3.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.3.attn_q_norm.weight
    F32
    [256]
  • blk.3.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.3.ffn_down.weight
    Q5_K
    [6144, 1536]
  • blk.3.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.3.ffn_norm.weight
    F32
    [1536]
  • blk.3.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.3.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.3.layer_output_scale.weight
    F32
    [1]
  • blk.3.post_attention_norm.weight
    F32
    [1536]
  • blk.3.post_ffw_norm.weight
    F32
    [1536]
  • blk.3.post_norm.weight
    F32
    [1536]
  • blk.3.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.4
  • blk.4.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.4.attn_k_norm.weight
    F32
    [512]
  • blk.4.attn_norm.weight
    F32
    [1536]
  • blk.4.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.4.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.4.attn_q_norm.weight
    F32
    [512]
  • blk.4.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.4.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.4.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.4.ffn_norm.weight
    F32
    [1536]
  • blk.4.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.4.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.4.layer_output_scale.weight
    F32
    [1]
  • blk.4.post_attention_norm.weight
    F32
    [1536]
  • blk.4.post_ffw_norm.weight
    F32
    [1536]
  • blk.4.post_norm.weight
    F32
    [1536]
  • blk.4.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.5
  • blk.5.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.5.attn_k_norm.weight
    F32
    [256]
  • blk.5.attn_norm.weight
    F32
    [1536]
  • blk.5.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.5.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.5.attn_q_norm.weight
    F32
    [256]
  • blk.5.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.5.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.5.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.5.ffn_norm.weight
    F32
    [1536]
  • blk.5.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.5.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.5.layer_output_scale.weight
    F32
    [1]
  • blk.5.post_attention_norm.weight
    F32
    [1536]
  • blk.5.post_ffw_norm.weight
    F32
    [1536]
  • blk.5.post_norm.weight
    F32
    [1536]
  • blk.5.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.6
  • blk.6.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.6.attn_k_norm.weight
    F32
    [256]
  • blk.6.attn_norm.weight
    F32
    [1536]
  • blk.6.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.6.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.6.attn_q_norm.weight
    F32
    [256]
  • blk.6.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.6.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.6.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.6.ffn_norm.weight
    F32
    [1536]
  • blk.6.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.6.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.6.layer_output_scale.weight
    F32
    [1]
  • blk.6.post_attention_norm.weight
    F32
    [1536]
  • blk.6.post_ffw_norm.weight
    F32
    [1536]
  • blk.6.post_norm.weight
    F32
    [1536]
  • blk.6.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.7
  • blk.7.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.7.attn_k_norm.weight
    F32
    [256]
  • blk.7.attn_norm.weight
    F32
    [1536]
  • blk.7.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.7.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.7.attn_q_norm.weight
    F32
    [256]
  • blk.7.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.7.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.7.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.7.ffn_norm.weight
    F32
    [1536]
  • blk.7.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.7.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.7.layer_output_scale.weight
    F32
    [1]
  • blk.7.post_attention_norm.weight
    F32
    [1536]
  • blk.7.post_ffw_norm.weight
    F32
    [1536]
  • blk.7.post_norm.weight
    F32
    [1536]
  • blk.7.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.8
  • blk.8.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.8.attn_k_norm.weight
    F32
    [256]
  • blk.8.attn_norm.weight
    F32
    [1536]
  • blk.8.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.8.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.8.attn_q_norm.weight
    F32
    [256]
  • blk.8.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.8.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.8.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.8.ffn_norm.weight
    F32
    [1536]
  • blk.8.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.8.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.8.layer_output_scale.weight
    F32
    [1]
  • blk.8.post_attention_norm.weight
    F32
    [1536]
  • blk.8.post_ffw_norm.weight
    F32
    [1536]
  • blk.8.post_norm.weight
    F32
    [1536]
  • blk.8.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.9
  • blk.9.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.9.attn_k_norm.weight
    F32
    [512]
  • blk.9.attn_norm.weight
    F32
    [1536]
  • blk.9.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.9.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.9.attn_q_norm.weight
    F32
    [512]
  • blk.9.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.9.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.9.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.9.ffn_norm.weight
    F32
    [1536]
  • blk.9.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.9.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.9.layer_output_scale.weight
    F32
    [1]
  • blk.9.post_attention_norm.weight
    F32
    [1536]
  • blk.9.post_ffw_norm.weight
    F32
    [1536]
  • blk.9.post_norm.weight
    F32
    [1536]
  • blk.9.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.10
  • blk.10.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.10.attn_k_norm.weight
    F32
    [256]
  • blk.10.attn_norm.weight
    F32
    [1536]
  • blk.10.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.10.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.10.attn_q_norm.weight
    F32
    [256]
  • blk.10.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.10.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.10.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.10.ffn_norm.weight
    F32
    [1536]
  • blk.10.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.10.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.10.layer_output_scale.weight
    F32
    [1]
  • blk.10.post_attention_norm.weight
    F32
    [1536]
  • blk.10.post_ffw_norm.weight
    F32
    [1536]
  • blk.10.post_norm.weight
    F32
    [1536]
  • blk.10.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.11
  • blk.11.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.11.attn_k_norm.weight
    F32
    [256]
  • blk.11.attn_norm.weight
    F32
    [1536]
  • blk.11.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.11.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.11.attn_q_norm.weight
    F32
    [256]
  • blk.11.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.11.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.11.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.11.ffn_norm.weight
    F32
    [1536]
  • blk.11.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.11.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.11.layer_output_scale.weight
    F32
    [1]
  • blk.11.post_attention_norm.weight
    F32
    [1536]
  • blk.11.post_ffw_norm.weight
    F32
    [1536]
  • blk.11.post_norm.weight
    F32
    [1536]
  • blk.11.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.12
  • blk.12.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.12.attn_k_norm.weight
    F32
    [256]
  • blk.12.attn_norm.weight
    F32
    [1536]
  • blk.12.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.12.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.12.attn_q_norm.weight
    F32
    [256]
  • blk.12.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.12.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.12.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.12.ffn_norm.weight
    F32
    [1536]
  • blk.12.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.12.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.12.layer_output_scale.weight
    F32
    [1]
  • blk.12.post_attention_norm.weight
    F32
    [1536]
  • blk.12.post_ffw_norm.weight
    F32
    [1536]
  • blk.12.post_norm.weight
    F32
    [1536]
  • blk.12.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.13
  • blk.13.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.13.attn_k_norm.weight
    F32
    [256]
  • blk.13.attn_norm.weight
    F32
    [1536]
  • blk.13.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.13.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.13.attn_q_norm.weight
    F32
    [256]
  • blk.13.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.13.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.13.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.13.ffn_norm.weight
    F32
    [1536]
  • blk.13.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.13.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.13.layer_output_scale.weight
    F32
    [1]
  • blk.13.post_attention_norm.weight
    F32
    [1536]
  • blk.13.post_ffw_norm.weight
    F32
    [1536]
  • blk.13.post_norm.weight
    F32
    [1536]
  • blk.13.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.14
  • blk.14.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.14.attn_k_norm.weight
    F32
    [512]
  • blk.14.attn_norm.weight
    F32
    [1536]
  • blk.14.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.14.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.14.attn_q_norm.weight
    F32
    [512]
  • blk.14.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.14.ffn_down.weight
    IQ4_XS
    [6144, 1536]
  • blk.14.ffn_gate.weight
    IQ4_XS
    [1536, 6144]
  • blk.14.ffn_norm.weight
    F32
    [1536]
  • blk.14.ffn_up.weight
    IQ4_XS
    [1536, 6144]
  • blk.14.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.14.layer_output_scale.weight
    F32
    [1]
  • blk.14.post_attention_norm.weight
    F32
    [1536]
  • blk.14.post_ffw_norm.weight
    F32
    [1536]
  • blk.14.post_norm.weight
    F32
    [1536]
  • blk.14.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.15
  • blk.15.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.15.attn_k_norm.weight
    F32
    [256]
  • blk.15.attn_norm.weight
    F32
    [1536]
  • blk.15.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.15.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.15.attn_q_norm.weight
    F32
    [256]
  • blk.15.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.15.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.15.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.15.ffn_norm.weight
    F32
    [1536]
  • blk.15.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.15.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.15.layer_output_scale.weight
    F32
    [1]
  • blk.15.post_attention_norm.weight
    F32
    [1536]
  • blk.15.post_ffw_norm.weight
    F32
    [1536]
  • blk.15.post_norm.weight
    F32
    [1536]
  • blk.15.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.16
  • blk.16.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.16.attn_k_norm.weight
    F32
    [256]
  • blk.16.attn_norm.weight
    F32
    [1536]
  • blk.16.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.16.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.16.attn_q_norm.weight
    F32
    [256]
  • blk.16.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.16.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.16.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.16.ffn_norm.weight
    F32
    [1536]
  • blk.16.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.16.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.16.layer_output_scale.weight
    F32
    [1]
  • blk.16.post_attention_norm.weight
    F32
    [1536]
  • blk.16.post_ffw_norm.weight
    F32
    [1536]
  • blk.16.post_norm.weight
    F32
    [1536]
  • blk.16.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.17
  • blk.17.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.17.attn_k_norm.weight
    F32
    [256]
  • blk.17.attn_norm.weight
    F32
    [1536]
  • blk.17.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.17.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.17.attn_q_norm.weight
    F32
    [256]
  • blk.17.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.17.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.17.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.17.ffn_norm.weight
    F32
    [1536]
  • blk.17.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.17.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.17.layer_output_scale.weight
    F32
    [1]
  • blk.17.post_attention_norm.weight
    F32
    [1536]
  • blk.17.post_ffw_norm.weight
    F32
    [1536]
  • blk.17.post_norm.weight
    F32
    [1536]
  • blk.17.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.18
  • blk.18.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.18.attn_k_norm.weight
    F32
    [256]
  • blk.18.attn_norm.weight
    F32
    [1536]
  • blk.18.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.18.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.18.attn_q_norm.weight
    F32
    [256]
  • blk.18.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.18.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.18.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.18.ffn_norm.weight
    F32
    [1536]
  • blk.18.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.18.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.18.layer_output_scale.weight
    F32
    [1]
  • blk.18.post_attention_norm.weight
    F32
    [1536]
  • blk.18.post_ffw_norm.weight
    F32
    [1536]
  • blk.18.post_norm.weight
    F32
    [1536]
  • blk.18.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.19
  • blk.19.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.19.attn_k_norm.weight
    F32
    [512]
  • blk.19.attn_norm.weight
    F32
    [1536]
  • blk.19.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.19.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.19.attn_q_norm.weight
    F32
    [512]
  • blk.19.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.19.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.19.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.19.ffn_norm.weight
    F32
    [1536]
  • blk.19.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.19.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.19.layer_output_scale.weight
    F32
    [1]
  • blk.19.post_attention_norm.weight
    F32
    [1536]
  • blk.19.post_ffw_norm.weight
    F32
    [1536]
  • blk.19.post_norm.weight
    F32
    [1536]
  • blk.19.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.20
  • blk.20.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.20.attn_k_norm.weight
    F32
    [256]
  • blk.20.attn_norm.weight
    F32
    [1536]
  • blk.20.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.20.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.20.attn_q_norm.weight
    F32
    [256]
  • blk.20.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.20.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.20.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.20.ffn_norm.weight
    F32
    [1536]
  • blk.20.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.20.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.20.layer_output_scale.weight
    F32
    [1]
  • blk.20.post_attention_norm.weight
    F32
    [1536]
  • blk.20.post_ffw_norm.weight
    F32
    [1536]
  • blk.20.post_norm.weight
    F32
    [1536]
  • blk.20.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.21
  • blk.21.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.21.attn_k_norm.weight
    F32
    [256]
  • blk.21.attn_norm.weight
    F32
    [1536]
  • blk.21.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.21.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.21.attn_q_norm.weight
    F32
    [256]
  • blk.21.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.21.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.21.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.21.ffn_norm.weight
    F32
    [1536]
  • blk.21.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.21.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.21.layer_output_scale.weight
    F32
    [1]
  • blk.21.post_attention_norm.weight
    F32
    [1536]
  • blk.21.post_ffw_norm.weight
    F32
    [1536]
  • blk.21.post_norm.weight
    F32
    [1536]
  • blk.21.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.22
  • blk.22.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.22.attn_k_norm.weight
    F32
    [256]
  • blk.22.attn_norm.weight
    F32
    [1536]
  • blk.22.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.22.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.22.attn_q_norm.weight
    F32
    [256]
  • blk.22.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.22.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.22.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.22.ffn_norm.weight
    F32
    [1536]
  • blk.22.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.22.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.22.layer_output_scale.weight
    F32
    [1]
  • blk.22.post_attention_norm.weight
    F32
    [1536]
  • blk.22.post_ffw_norm.weight
    F32
    [1536]
  • blk.22.post_norm.weight
    F32
    [1536]
  • blk.22.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.23
  • blk.23.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.23.attn_k_norm.weight
    F32
    [256]
  • blk.23.attn_norm.weight
    F32
    [1536]
  • blk.23.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.23.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.23.attn_q_norm.weight
    F32
    [256]
  • blk.23.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.23.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.23.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.23.ffn_norm.weight
    F32
    [1536]
  • blk.23.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.23.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.23.layer_output_scale.weight
    F32
    [1]
  • blk.23.post_attention_norm.weight
    F32
    [1536]
  • blk.23.post_ffw_norm.weight
    F32
    [1536]
  • blk.23.post_norm.weight
    F32
    [1536]
  • blk.23.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.24
  • blk.24.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.24.attn_k_norm.weight
    F32
    [512]
  • blk.24.attn_norm.weight
    F32
    [1536]
  • blk.24.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.24.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.24.attn_q_norm.weight
    F32
    [512]
  • blk.24.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.24.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.24.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.24.ffn_norm.weight
    F32
    [1536]
  • blk.24.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.24.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.24.layer_output_scale.weight
    F32
    [1]
  • blk.24.post_attention_norm.weight
    F32
    [1536]
  • blk.24.post_ffw_norm.weight
    F32
    [1536]
  • blk.24.post_norm.weight
    F32
    [1536]
  • blk.24.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.25
  • blk.25.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.25.attn_k_norm.weight
    F32
    [256]
  • blk.25.attn_norm.weight
    F32
    [1536]
  • blk.25.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.25.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.25.attn_q_norm.weight
    F32
    [256]
  • blk.25.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.25.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.25.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.25.ffn_norm.weight
    F32
    [1536]
  • blk.25.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.25.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.25.layer_output_scale.weight
    F32
    [1]
  • blk.25.post_attention_norm.weight
    F32
    [1536]
  • blk.25.post_ffw_norm.weight
    F32
    [1536]
  • blk.25.post_norm.weight
    F32
    [1536]
  • blk.25.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.26
  • blk.26.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.26.attn_k_norm.weight
    F32
    [256]
  • blk.26.attn_norm.weight
    F32
    [1536]
  • blk.26.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.26.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.26.attn_q_norm.weight
    F32
    [256]
  • blk.26.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.26.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.26.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.26.ffn_norm.weight
    F32
    [1536]
  • blk.26.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.26.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.26.layer_output_scale.weight
    F32
    [1]
  • blk.26.post_attention_norm.weight
    F32
    [1536]
  • blk.26.post_ffw_norm.weight
    F32
    [1536]
  • blk.26.post_norm.weight
    F32
    [1536]
  • blk.26.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.27
  • blk.27.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.27.attn_k_norm.weight
    F32
    [256]
  • blk.27.attn_norm.weight
    F32
    [1536]
  • blk.27.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.27.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.27.attn_q_norm.weight
    F32
    [256]
  • blk.27.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.27.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.27.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.27.ffn_norm.weight
    F32
    [1536]
  • blk.27.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.27.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.27.layer_output_scale.weight
    F32
    [1]
  • blk.27.post_attention_norm.weight
    F32
    [1536]
  • blk.27.post_ffw_norm.weight
    F32
    [1536]
  • blk.27.post_norm.weight
    F32
    [1536]
  • blk.27.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.28
  • blk.28.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.28.attn_k_norm.weight
    F32
    [256]
  • blk.28.attn_norm.weight
    F32
    [1536]
  • blk.28.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.28.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.28.attn_q_norm.weight
    F32
    [256]
  • blk.28.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.28.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.28.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.28.ffn_norm.weight
    F32
    [1536]
  • blk.28.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.28.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.28.layer_output_scale.weight
    F32
    [1]
  • blk.28.post_attention_norm.weight
    F32
    [1536]
  • blk.28.post_ffw_norm.weight
    F32
    [1536]
  • blk.28.post_norm.weight
    F32
    [1536]
  • blk.28.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.29
  • blk.29.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.29.attn_k_norm.weight
    F32
    [512]
  • blk.29.attn_norm.weight
    F32
    [1536]
  • blk.29.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.29.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.29.attn_q_norm.weight
    F32
    [512]
  • blk.29.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.29.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.29.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.29.ffn_norm.weight
    F32
    [1536]
  • blk.29.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.29.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.29.layer_output_scale.weight
    F32
    [1]
  • blk.29.post_attention_norm.weight
    F32
    [1536]
  • blk.29.post_ffw_norm.weight
    F32
    [1536]
  • blk.29.post_norm.weight
    F32
    [1536]
  • blk.29.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.30
  • blk.30.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.30.attn_k_norm.weight
    F32
    [256]
  • blk.30.attn_norm.weight
    F32
    [1536]
  • blk.30.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.30.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.30.attn_q_norm.weight
    F32
    [256]
  • blk.30.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.30.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.30.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.30.ffn_norm.weight
    F32
    [1536]
  • blk.30.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.30.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.30.layer_output_scale.weight
    F32
    [1]
  • blk.30.post_attention_norm.weight
    F32
    [1536]
  • blk.30.post_ffw_norm.weight
    F32
    [1536]
  • blk.30.post_norm.weight
    F32
    [1536]
  • blk.30.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.31
  • blk.31.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.31.attn_k_norm.weight
    F32
    [256]
  • blk.31.attn_norm.weight
    F32
    [1536]
  • blk.31.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.31.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.31.attn_q_norm.weight
    F32
    [256]
  • blk.31.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.31.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.31.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.31.ffn_norm.weight
    F32
    [1536]
  • blk.31.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.31.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.31.layer_output_scale.weight
    F32
    [1]
  • blk.31.post_attention_norm.weight
    F32
    [1536]
  • blk.31.post_ffw_norm.weight
    F32
    [1536]
  • blk.31.post_norm.weight
    F32
    [1536]
  • blk.31.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.32
  • blk.32.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.32.attn_k_norm.weight
    F32
    [256]
  • blk.32.attn_norm.weight
    F32
    [1536]
  • blk.32.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.32.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.32.attn_q_norm.weight
    F32
    [256]
  • blk.32.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.32.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.32.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.32.ffn_norm.weight
    F32
    [1536]
  • blk.32.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.32.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.32.layer_output_scale.weight
    F32
    [1]
  • blk.32.post_attention_norm.weight
    F32
    [1536]
  • blk.32.post_ffw_norm.weight
    F32
    [1536]
  • blk.32.post_norm.weight
    F32
    [1536]
  • blk.32.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.33
  • blk.33.attn_k.weight
    IQ4_XS
    [1536, 256]
  • blk.33.attn_k_norm.weight
    F32
    [256]
  • blk.33.attn_norm.weight
    F32
    [1536]
  • blk.33.attn_output.weight
    IQ4_XS
    [2048, 1536]
  • blk.33.attn_q.weight
    IQ4_XS
    [1536, 2048]
  • blk.33.attn_q_norm.weight
    F32
    [256]
  • blk.33.attn_v.weight
    Q5_K
    [1536, 256]
  • blk.33.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.33.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.33.ffn_norm.weight
    F32
    [1536]
  • blk.33.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.33.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.33.layer_output_scale.weight
    F32
    [1]
  • blk.33.post_attention_norm.weight
    F32
    [1536]
  • blk.33.post_ffw_norm.weight
    F32
    [1536]
  • blk.33.post_norm.weight
    F32
    [1536]
  • blk.33.proj.weight
    IQ4_XS
    [256, 1536]
  • blk.34
  • blk.34.attn_k.weight
    IQ4_XS
    [1536, 512]
  • blk.34.attn_k_norm.weight
    F32
    [512]
  • blk.34.attn_norm.weight
    F32
    [1536]
  • blk.34.attn_output.weight
    IQ4_XS
    [4096, 1536]
  • blk.34.attn_q.weight
    IQ4_XS
    [1536, 4096]
  • blk.34.attn_q_norm.weight
    F32
    [512]
  • blk.34.attn_v.weight
    Q5_K
    [1536, 512]
  • blk.34.ffn_down.weight
    IQ4_XS
    [12288, 1536]
  • blk.34.ffn_gate.weight
    IQ4_XS
    [1536, 12288]
  • blk.34.ffn_norm.weight
    F32
    [1536]
  • blk.34.ffn_up.weight
    IQ4_XS
    [1536, 12288]
  • blk.34.inp_gate.weight
    IQ4_XS
    [1536, 256]
  • blk.34.layer_output_scale.weight
    F32
    [1]
  • blk.34.post_attention_norm.weight
    F32
    [1536]
  • blk.34.post_ffw_norm.weight
    F32
    [1536]
  • blk.34.post_norm.weight
    F32
    [1536]
  • blk.34.proj.weight
    IQ4_XS
    [256, 1536]
  • mm.a.fc.bias
    F32
    [1536]
  • mm.a.fc.weight
    F16
    [1024, 1536]
  • mm.a.input_projection.weight
    F16
    [1536, 1536]
  • mm.input_projection.weight
    F16
    [768, 1536]
  • per_layer_model_proj.weight
    BF16
    [1536, 8960]
  • per_layer_proj_norm.weight
    F32
    [256]
  • per_layer_token_embd.weight
    Q6_K
    [8960, 262144]
  • rope_freqs.weight
    F32
    [256]
  • v.blk.0
  • v.blk.0.attn_k.input_max
    F32
    [1]
  • v.blk.0.attn_k.input_min
    F32
    [1]
  • v.blk.0.attn_k.output_max
    F32
    [1]
  • v.blk.0.attn_k.output_min
    F32
    [1]
  • v.blk.0.attn_k.weight
    F16
    [768, 768]
  • v.blk.0.attn_k_norm.weight
    F32
    [64]
  • v.blk.0.attn_out.input_max
    F32
    [1]
  • v.blk.0.attn_out.input_min
    F32
    [1]
  • v.blk.0.attn_out.output_max
    F32
    [1]
  • v.blk.0.attn_out.output_min
    F32
    [1]
  • v.blk.0.attn_out.weight
    F16
    [768, 768]
  • v.blk.0.attn_post_norm.weight
    F32
    [768]
  • v.blk.0.attn_q.input_max
    F32
    [1]
  • v.blk.0.attn_q.input_min
    F32
    [1]
  • v.blk.0.attn_q.output_max
    F32
    [1]
  • v.blk.0.attn_q.output_min
    F32
    [1]
  • v.blk.0.attn_q.weight
    F16
    [768, 768]
  • v.blk.0.attn_q_norm.weight
    F32
    [64]
  • v.blk.0.attn_v.input_max
    F32
    [1]
  • v.blk.0.attn_v.input_min
    F32
    [1]
  • v.blk.0.attn_v.output_max
    F32
    [1]
  • v.blk.0.attn_v.output_min
    F32
    [1]
  • v.blk.0.attn_v.weight
    F16
    [768, 768]
  • v.blk.0.ffn_down.input_max
    F32
    [1]
  • v.blk.0.ffn_down.input_min
    F32
    [1]
  • v.blk.0.ffn_down.output_max
    F32
    [1]
  • v.blk.0.ffn_down.output_min
    F32
    [1]
  • v.blk.0.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.0.ffn_gate.input_max
    F32
    [1]
  • v.blk.0.ffn_gate.input_min
    F32
    [1]
  • v.blk.0.ffn_gate.output_max
    F32
    [1]
  • v.blk.0.ffn_gate.output_min
    F32
    [1]
  • v.blk.0.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.0.ffn_post_norm.weight
    F32
    [768]
  • v.blk.0.ffn_up.input_max
    F32
    [1]
  • v.blk.0.ffn_up.input_min
    F32
    [1]
  • v.blk.0.ffn_up.output_max
    F32
    [1]
  • v.blk.0.ffn_up.output_min
    F32
    [1]
  • v.blk.0.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.0.ln1.weight
    F32
    [768]
  • v.blk.0.ln2.weight
    F32
    [768]
  • v.blk.1
  • v.blk.1.attn_k.input_max
    F32
    [1]
  • v.blk.1.attn_k.input_min
    F32
    [1]
  • v.blk.1.attn_k.output_max
    F32
    [1]
  • v.blk.1.attn_k.output_min
    F32
    [1]
  • v.blk.1.attn_k.weight
    F16
    [768, 768]
  • v.blk.1.attn_k_norm.weight
    F32
    [64]
  • v.blk.1.attn_out.input_max
    F32
    [1]
  • v.blk.1.attn_out.input_min
    F32
    [1]
  • v.blk.1.attn_out.output_max
    F32
    [1]
  • v.blk.1.attn_out.output_min
    F32
    [1]
  • v.blk.1.attn_out.weight
    F16
    [768, 768]
  • v.blk.1.attn_post_norm.weight
    F32
    [768]
  • v.blk.1.attn_q.input_max
    F32
    [1]
  • v.blk.1.attn_q.input_min
    F32
    [1]
  • v.blk.1.attn_q.output_max
    F32
    [1]
  • v.blk.1.attn_q.output_min
    F32
    [1]
  • v.blk.1.attn_q.weight
    F16
    [768, 768]
  • v.blk.1.attn_q_norm.weight
    F32
    [64]
  • v.blk.1.attn_v.input_max
    F32
    [1]
  • v.blk.1.attn_v.input_min
    F32
    [1]
  • v.blk.1.attn_v.output_max
    F32
    [1]
  • v.blk.1.attn_v.output_min
    F32
    [1]
  • v.blk.1.attn_v.weight
    F16
    [768, 768]
  • v.blk.1.ffn_down.input_max
    F32
    [1]
  • v.blk.1.ffn_down.input_min
    F32
    [1]
  • v.blk.1.ffn_down.output_max
    F32
    [1]
  • v.blk.1.ffn_down.output_min
    F32
    [1]
  • v.blk.1.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.1.ffn_gate.input_max
    F32
    [1]
  • v.blk.1.ffn_gate.input_min
    F32
    [1]
  • v.blk.1.ffn_gate.output_max
    F32
    [1]
  • v.blk.1.ffn_gate.output_min
    F32
    [1]
  • v.blk.1.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.1.ffn_post_norm.weight
    F32
    [768]
  • v.blk.1.ffn_up.input_max
    F32
    [1]
  • v.blk.1.ffn_up.input_min
    F32
    [1]
  • v.blk.1.ffn_up.output_max
    F32
    [1]
  • v.blk.1.ffn_up.output_min
    F32
    [1]
  • v.blk.1.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.1.ln1.weight
    F32
    [768]
  • v.blk.1.ln2.weight
    F32
    [768]
  • v.blk.2
  • v.blk.2.attn_k.input_max
    F32
    [1]
  • v.blk.2.attn_k.input_min
    F32
    [1]
  • v.blk.2.attn_k.output_max
    F32
    [1]
  • v.blk.2.attn_k.output_min
    F32
    [1]
  • v.blk.2.attn_k.weight
    F16
    [768, 768]
  • v.blk.2.attn_k_norm.weight
    F32
    [64]
  • v.blk.2.attn_out.input_max
    F32
    [1]
  • v.blk.2.attn_out.input_min
    F32
    [1]
  • v.blk.2.attn_out.output_max
    F32
    [1]
  • v.blk.2.attn_out.output_min
    F32
    [1]
  • v.blk.2.attn_out.weight
    F16
    [768, 768]
  • v.blk.2.attn_post_norm.weight
    F32
    [768]
  • v.blk.2.attn_q.input_max
    F32
    [1]
  • v.blk.2.attn_q.input_min
    F32
    [1]
  • v.blk.2.attn_q.output_max
    F32
    [1]
  • v.blk.2.attn_q.output_min
    F32
    [1]
  • v.blk.2.attn_q.weight
    F16
    [768, 768]
  • v.blk.2.attn_q_norm.weight
    F32
    [64]
  • v.blk.2.attn_v.input_max
    F32
    [1]
  • v.blk.2.attn_v.input_min
    F32
    [1]
  • v.blk.2.attn_v.output_max
    F32
    [1]
  • v.blk.2.attn_v.output_min
    F32
    [1]
  • v.blk.2.attn_v.weight
    F16
    [768, 768]
  • v.blk.2.ffn_down.input_max
    F32
    [1]
  • v.blk.2.ffn_down.input_min
    F32
    [1]
  • v.blk.2.ffn_down.output_max
    F32
    [1]
  • v.blk.2.ffn_down.output_min
    F32
    [1]
  • v.blk.2.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.2.ffn_gate.input_max
    F32
    [1]
  • v.blk.2.ffn_gate.input_min
    F32
    [1]
  • v.blk.2.ffn_gate.output_max
    F32
    [1]
  • v.blk.2.ffn_gate.output_min
    F32
    [1]
  • v.blk.2.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.2.ffn_post_norm.weight
    F32
    [768]
  • v.blk.2.ffn_up.input_max
    F32
    [1]
  • v.blk.2.ffn_up.input_min
    F32
    [1]
  • v.blk.2.ffn_up.output_max
    F32
    [1]
  • v.blk.2.ffn_up.output_min
    F32
    [1]
  • v.blk.2.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.2.ln1.weight
    F32
    [768]
  • v.blk.2.ln2.weight
    F32
    [768]
  • v.blk.3
  • v.blk.3.attn_k.input_max
    F32
    [1]
  • v.blk.3.attn_k.input_min
    F32
    [1]
  • v.blk.3.attn_k.output_max
    F32
    [1]
  • v.blk.3.attn_k.output_min
    F32
    [1]
  • v.blk.3.attn_k.weight
    F16
    [768, 768]
  • v.blk.3.attn_k_norm.weight
    F32
    [64]
  • v.blk.3.attn_out.input_max
    F32
    [1]
  • v.blk.3.attn_out.input_min
    F32
    [1]
  • v.blk.3.attn_out.output_max
    F32
    [1]
  • v.blk.3.attn_out.output_min
    F32
    [1]
  • v.blk.3.attn_out.weight
    F16
    [768, 768]
  • v.blk.3.attn_post_norm.weight
    F32
    [768]
  • v.blk.3.attn_q.input_max
    F32
    [1]
  • v.blk.3.attn_q.input_min
    F32
    [1]
  • v.blk.3.attn_q.output_max
    F32
    [1]
  • v.blk.3.attn_q.output_min
    F32
    [1]
  • v.blk.3.attn_q.weight
    F16
    [768, 768]
  • v.blk.3.attn_q_norm.weight
    F32
    [64]
  • v.blk.3.attn_v.input_max
    F32
    [1]
  • v.blk.3.attn_v.input_min
    F32
    [1]
  • v.blk.3.attn_v.output_max
    F32
    [1]
  • v.blk.3.attn_v.output_min
    F32
    [1]
  • v.blk.3.attn_v.weight
    F16
    [768, 768]
  • v.blk.3.ffn_down.input_max
    F32
    [1]
  • v.blk.3.ffn_down.input_min
    F32
    [1]
  • v.blk.3.ffn_down.output_max
    F32
    [1]
  • v.blk.3.ffn_down.output_min
    F32
    [1]
  • v.blk.3.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.3.ffn_gate.input_max
    F32
    [1]
  • v.blk.3.ffn_gate.input_min
    F32
    [1]
  • v.blk.3.ffn_gate.output_max
    F32
    [1]
  • v.blk.3.ffn_gate.output_min
    F32
    [1]
  • v.blk.3.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.3.ffn_post_norm.weight
    F32
    [768]
  • v.blk.3.ffn_up.input_max
    F32
    [1]
  • v.blk.3.ffn_up.input_min
    F32
    [1]
  • v.blk.3.ffn_up.output_max
    F32
    [1]
  • v.blk.3.ffn_up.output_min
    F32
    [1]
  • v.blk.3.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.3.ln1.weight
    F32
    [768]
  • v.blk.3.ln2.weight
    F32
    [768]
  • v.blk.4
  • v.blk.4.attn_k.input_max
    F32
    [1]
  • v.blk.4.attn_k.input_min
    F32
    [1]
  • v.blk.4.attn_k.output_max
    F32
    [1]
  • v.blk.4.attn_k.output_min
    F32
    [1]
  • v.blk.4.attn_k.weight
    F16
    [768, 768]
  • v.blk.4.attn_k_norm.weight
    F32
    [64]
  • v.blk.4.attn_out.input_max
    F32
    [1]
  • v.blk.4.attn_out.input_min
    F32
    [1]
  • v.blk.4.attn_out.output_max
    F32
    [1]
  • v.blk.4.attn_out.output_min
    F32
    [1]
  • v.blk.4.attn_out.weight
    F16
    [768, 768]
  • v.blk.4.attn_post_norm.weight
    F32
    [768]
  • v.blk.4.attn_q.input_max
    F32
    [1]
  • v.blk.4.attn_q.input_min
    F32
    [1]
  • v.blk.4.attn_q.output_max
    F32
    [1]
  • v.blk.4.attn_q.output_min
    F32
    [1]
  • v.blk.4.attn_q.weight
    F16
    [768, 768]
  • v.blk.4.attn_q_norm.weight
    F32
    [64]
  • v.blk.4.attn_v.input_max
    F32
    [1]
  • v.blk.4.attn_v.input_min
    F32
    [1]
  • v.blk.4.attn_v.output_max
    F32
    [1]
  • v.blk.4.attn_v.output_min
    F32
    [1]
  • v.blk.4.attn_v.weight
    F16
    [768, 768]
  • v.blk.4.ffn_down.input_max
    F32
    [1]
  • v.blk.4.ffn_down.input_min
    F32
    [1]
  • v.blk.4.ffn_down.output_max
    F32
    [1]
  • v.blk.4.ffn_down.output_min
    F32
    [1]
  • v.blk.4.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.4.ffn_gate.input_max
    F32
    [1]
  • v.blk.4.ffn_gate.input_min
    F32
    [1]
  • v.blk.4.ffn_gate.output_max
    F32
    [1]
  • v.blk.4.ffn_gate.output_min
    F32
    [1]
  • v.blk.4.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.4.ffn_post_norm.weight
    F32
    [768]
  • v.blk.4.ffn_up.input_max
    F32
    [1]
  • v.blk.4.ffn_up.input_min
    F32
    [1]
  • v.blk.4.ffn_up.output_max
    F32
    [1]
  • v.blk.4.ffn_up.output_min
    F32
    [1]
  • v.blk.4.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.4.ln1.weight
    F32
    [768]
  • v.blk.4.ln2.weight
    F32
    [768]
  • v.blk.5
  • v.blk.5.attn_k.input_max
    F32
    [1]
  • v.blk.5.attn_k.input_min
    F32
    [1]
  • v.blk.5.attn_k.output_max
    F32
    [1]
  • v.blk.5.attn_k.output_min
    F32
    [1]
  • v.blk.5.attn_k.weight
    F16
    [768, 768]
  • v.blk.5.attn_k_norm.weight
    F32
    [64]
  • v.blk.5.attn_out.input_max
    F32
    [1]
  • v.blk.5.attn_out.input_min
    F32
    [1]
  • v.blk.5.attn_out.output_max
    F32
    [1]
  • v.blk.5.attn_out.output_min
    F32
    [1]
  • v.blk.5.attn_out.weight
    F16
    [768, 768]
  • v.blk.5.attn_post_norm.weight
    F32
    [768]
  • v.blk.5.attn_q.input_max
    F32
    [1]
  • v.blk.5.attn_q.input_min
    F32
    [1]
  • v.blk.5.attn_q.output_max
    F32
    [1]
  • v.blk.5.attn_q.output_min
    F32
    [1]
  • v.blk.5.attn_q.weight
    F16
    [768, 768]
  • v.blk.5.attn_q_norm.weight
    F32
    [64]
  • v.blk.5.attn_v.input_max
    F32
    [1]
  • v.blk.5.attn_v.input_min
    F32
    [1]
  • v.blk.5.attn_v.output_max
    F32
    [1]
  • v.blk.5.attn_v.output_min
    F32
    [1]
  • v.blk.5.attn_v.weight
    F16
    [768, 768]
  • v.blk.5.ffn_down.input_max
    F32
    [1]
  • v.blk.5.ffn_down.input_min
    F32
    [1]
  • v.blk.5.ffn_down.output_max
    F32
    [1]
  • v.blk.5.ffn_down.output_min
    F32
    [1]
  • v.blk.5.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.5.ffn_gate.input_max
    F32
    [1]
  • v.blk.5.ffn_gate.input_min
    F32
    [1]
  • v.blk.5.ffn_gate.output_max
    F32
    [1]
  • v.blk.5.ffn_gate.output_min
    F32
    [1]
  • v.blk.5.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.5.ffn_post_norm.weight
    F32
    [768]
  • v.blk.5.ffn_up.input_max
    F32
    [1]
  • v.blk.5.ffn_up.input_min
    F32
    [1]
  • v.blk.5.ffn_up.output_max
    F32
    [1]
  • v.blk.5.ffn_up.output_min
    F32
    [1]
  • v.blk.5.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.5.ln1.weight
    F32
    [768]
  • v.blk.5.ln2.weight
    F32
    [768]
  • v.blk.6
  • v.blk.6.attn_k.input_max
    F32
    [1]
  • v.blk.6.attn_k.input_min
    F32
    [1]
  • v.blk.6.attn_k.output_max
    F32
    [1]
  • v.blk.6.attn_k.output_min
    F32
    [1]
  • v.blk.6.attn_k.weight
    F16
    [768, 768]
  • v.blk.6.attn_k_norm.weight
    F32
    [64]
  • v.blk.6.attn_out.input_max
    F32
    [1]
  • v.blk.6.attn_out.input_min
    F32
    [1]
  • v.blk.6.attn_out.output_max
    F32
    [1]
  • v.blk.6.attn_out.output_min
    F32
    [1]
  • v.blk.6.attn_out.weight
    F16
    [768, 768]
  • v.blk.6.attn_post_norm.weight
    F32
    [768]
  • v.blk.6.attn_q.input_max
    F32
    [1]
  • v.blk.6.attn_q.input_min
    F32
    [1]
  • v.blk.6.attn_q.output_max
    F32
    [1]
  • v.blk.6.attn_q.output_min
    F32
    [1]
  • v.blk.6.attn_q.weight
    F16
    [768, 768]
  • v.blk.6.attn_q_norm.weight
    F32
    [64]
  • v.blk.6.attn_v.input_max
    F32
    [1]
  • v.blk.6.attn_v.input_min
    F32
    [1]
  • v.blk.6.attn_v.output_max
    F32
    [1]
  • v.blk.6.attn_v.output_min
    F32
    [1]
  • v.blk.6.attn_v.weight
    F16
    [768, 768]
  • v.blk.6.ffn_down.input_max
    F32
    [1]
  • v.blk.6.ffn_down.input_min
    F32
    [1]
  • v.blk.6.ffn_down.output_max
    F32
    [1]
  • v.blk.6.ffn_down.output_min
    F32
    [1]
  • v.blk.6.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.6.ffn_gate.input_max
    F32
    [1]
  • v.blk.6.ffn_gate.input_min
    F32
    [1]
  • v.blk.6.ffn_gate.output_max
    F32
    [1]
  • v.blk.6.ffn_gate.output_min
    F32
    [1]
  • v.blk.6.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.6.ffn_post_norm.weight
    F32
    [768]
  • v.blk.6.ffn_up.input_max
    F32
    [1]
  • v.blk.6.ffn_up.input_min
    F32
    [1]
  • v.blk.6.ffn_up.output_max
    F32
    [1]
  • v.blk.6.ffn_up.output_min
    F32
    [1]
  • v.blk.6.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.6.ln1.weight
    F32
    [768]
  • v.blk.6.ln2.weight
    F32
    [768]
  • v.blk.7
  • v.blk.7.attn_k.input_max
    F32
    [1]
  • v.blk.7.attn_k.input_min
    F32
    [1]
  • v.blk.7.attn_k.output_max
    F32
    [1]
  • v.blk.7.attn_k.output_min
    F32
    [1]
  • v.blk.7.attn_k.weight
    F16
    [768, 768]
  • v.blk.7.attn_k_norm.weight
    F32
    [64]
  • v.blk.7.attn_out.input_max
    F32
    [1]
  • v.blk.7.attn_out.input_min
    F32
    [1]
  • v.blk.7.attn_out.output_max
    F32
    [1]
  • v.blk.7.attn_out.output_min
    F32
    [1]
  • v.blk.7.attn_out.weight
    F16
    [768, 768]
  • v.blk.7.attn_post_norm.weight
    F32
    [768]
  • v.blk.7.attn_q.input_max
    F32
    [1]
  • v.blk.7.attn_q.input_min
    F32
    [1]
  • v.blk.7.attn_q.output_max
    F32
    [1]
  • v.blk.7.attn_q.output_min
    F32
    [1]
  • v.blk.7.attn_q.weight
    F16
    [768, 768]
  • v.blk.7.attn_q_norm.weight
    F32
    [64]
  • v.blk.7.attn_v.input_max
    F32
    [1]
  • v.blk.7.attn_v.input_min
    F32
    [1]
  • v.blk.7.attn_v.output_max
    F32
    [1]
  • v.blk.7.attn_v.output_min
    F32
    [1]
  • v.blk.7.attn_v.weight
    F16
    [768, 768]
  • v.blk.7.ffn_down.input_max
    F32
    [1]
  • v.blk.7.ffn_down.input_min
    F32
    [1]
  • v.blk.7.ffn_down.output_max
    F32
    [1]
  • v.blk.7.ffn_down.output_min
    F32
    [1]
  • v.blk.7.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.7.ffn_gate.input_max
    F32
    [1]
  • v.blk.7.ffn_gate.input_min
    F32
    [1]
  • v.blk.7.ffn_gate.output_max
    F32
    [1]
  • v.blk.7.ffn_gate.output_min
    F32
    [1]
  • v.blk.7.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.7.ffn_post_norm.weight
    F32
    [768]
  • v.blk.7.ffn_up.input_max
    F32
    [1]
  • v.blk.7.ffn_up.input_min
    F32
    [1]
  • v.blk.7.ffn_up.output_max
    F32
    [1]
  • v.blk.7.ffn_up.output_min
    F32
    [1]
  • v.blk.7.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.7.ln1.weight
    F32
    [768]
  • v.blk.7.ln2.weight
    F32
    [768]
  • v.blk.8
  • v.blk.8.attn_k.input_max
    F32
    [1]
  • v.blk.8.attn_k.input_min
    F32
    [1]
  • v.blk.8.attn_k.output_max
    F32
    [1]
  • v.blk.8.attn_k.output_min
    F32
    [1]
  • v.blk.8.attn_k.weight
    F16
    [768, 768]
  • v.blk.8.attn_k_norm.weight
    F32
    [64]
  • v.blk.8.attn_out.input_max
    F32
    [1]
  • v.blk.8.attn_out.input_min
    F32
    [1]
  • v.blk.8.attn_out.output_max
    F32
    [1]
  • v.blk.8.attn_out.output_min
    F32
    [1]
  • v.blk.8.attn_out.weight
    F16
    [768, 768]
  • v.blk.8.attn_post_norm.weight
    F32
    [768]
  • v.blk.8.attn_q.input_max
    F32
    [1]
  • v.blk.8.attn_q.input_min
    F32
    [1]
  • v.blk.8.attn_q.output_max
    F32
    [1]
  • v.blk.8.attn_q.output_min
    F32
    [1]
  • v.blk.8.attn_q.weight
    F16
    [768, 768]
  • v.blk.8.attn_q_norm.weight
    F32
    [64]
  • v.blk.8.attn_v.input_max
    F32
    [1]
  • v.blk.8.attn_v.input_min
    F32
    [1]
  • v.blk.8.attn_v.output_max
    F32
    [1]
  • v.blk.8.attn_v.output_min
    F32
    [1]
  • v.blk.8.attn_v.weight
    F16
    [768, 768]
  • v.blk.8.ffn_down.input_max
    F32
    [1]
  • v.blk.8.ffn_down.input_min
    F32
    [1]
  • v.blk.8.ffn_down.output_max
    F32
    [1]
  • v.blk.8.ffn_down.output_min
    F32
    [1]
  • v.blk.8.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.8.ffn_gate.input_max
    F32
    [1]
  • v.blk.8.ffn_gate.input_min
    F32
    [1]
  • v.blk.8.ffn_gate.output_max
    F32
    [1]
  • v.blk.8.ffn_gate.output_min
    F32
    [1]
  • v.blk.8.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.8.ffn_post_norm.weight
    F32
    [768]
  • v.blk.8.ffn_up.input_max
    F32
    [1]
  • v.blk.8.ffn_up.input_min
    F32
    [1]
  • v.blk.8.ffn_up.output_max
    F32
    [1]
  • v.blk.8.ffn_up.output_min
    F32
    [1]
  • v.blk.8.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.8.ln1.weight
    F32
    [768]
  • v.blk.8.ln2.weight
    F32
    [768]
  • v.blk.9
  • v.blk.9.attn_k.input_max
    F32
    [1]
  • v.blk.9.attn_k.input_min
    F32
    [1]
  • v.blk.9.attn_k.output_max
    F32
    [1]
  • v.blk.9.attn_k.output_min
    F32
    [1]
  • v.blk.9.attn_k.weight
    F16
    [768, 768]
  • v.blk.9.attn_k_norm.weight
    F32
    [64]
  • v.blk.9.attn_out.input_max
    F32
    [1]
  • v.blk.9.attn_out.input_min
    F32
    [1]
  • v.blk.9.attn_out.output_max
    F32
    [1]
  • v.blk.9.attn_out.output_min
    F32
    [1]
  • v.blk.9.attn_out.weight
    F16
    [768, 768]
  • v.blk.9.attn_post_norm.weight
    F32
    [768]
  • v.blk.9.attn_q.input_max
    F32
    [1]
  • v.blk.9.attn_q.input_min
    F32
    [1]
  • v.blk.9.attn_q.output_max
    F32
    [1]
  • v.blk.9.attn_q.output_min
    F32
    [1]
  • v.blk.9.attn_q.weight
    F16
    [768, 768]
  • v.blk.9.attn_q_norm.weight
    F32
    [64]
  • v.blk.9.attn_v.input_max
    F32
    [1]
  • v.blk.9.attn_v.input_min
    F32
    [1]
  • v.blk.9.attn_v.output_max
    F32
    [1]
  • v.blk.9.attn_v.output_min
    F32
    [1]
  • v.blk.9.attn_v.weight
    F16
    [768, 768]
  • v.blk.9.ffn_down.input_max
    F32
    [1]
  • v.blk.9.ffn_down.input_min
    F32
    [1]
  • v.blk.9.ffn_down.output_max
    F32
    [1]
  • v.blk.9.ffn_down.output_min
    F32
    [1]
  • v.blk.9.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.9.ffn_gate.input_max
    F32
    [1]
  • v.blk.9.ffn_gate.input_min
    F32
    [1]
  • v.blk.9.ffn_gate.output_max
    F32
    [1]
  • v.blk.9.ffn_gate.output_min
    F32
    [1]
  • v.blk.9.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.9.ffn_post_norm.weight
    F32
    [768]
  • v.blk.9.ffn_up.input_max
    F32
    [1]
  • v.blk.9.ffn_up.input_min
    F32
    [1]
  • v.blk.9.ffn_up.output_max
    F32
    [1]
  • v.blk.9.ffn_up.output_min
    F32
    [1]
  • v.blk.9.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.9.ln1.weight
    F32
    [768]
  • v.blk.9.ln2.weight
    F32
    [768]
  • v.blk.10
  • v.blk.10.attn_k.input_max
    F32
    [1]
  • v.blk.10.attn_k.input_min
    F32
    [1]
  • v.blk.10.attn_k.output_max
    F32
    [1]
  • v.blk.10.attn_k.output_min
    F32
    [1]
  • v.blk.10.attn_k.weight
    F16
    [768, 768]
  • v.blk.10.attn_k_norm.weight
    F32
    [64]
  • v.blk.10.attn_out.input_max
    F32
    [1]
  • v.blk.10.attn_out.input_min
    F32
    [1]
  • v.blk.10.attn_out.output_max
    F32
    [1]
  • v.blk.10.attn_out.output_min
    F32
    [1]
  • v.blk.10.attn_out.weight
    F16
    [768, 768]
  • v.blk.10.attn_post_norm.weight
    F32
    [768]
  • v.blk.10.attn_q.input_max
    F32
    [1]
  • v.blk.10.attn_q.input_min
    F32
    [1]
  • v.blk.10.attn_q.output_max
    F32
    [1]
  • v.blk.10.attn_q.output_min
    F32
    [1]
  • v.blk.10.attn_q.weight
    F16
    [768, 768]
  • v.blk.10.attn_q_norm.weight
    F32
    [64]
  • v.blk.10.attn_v.input_max
    F32
    [1]
  • v.blk.10.attn_v.input_min
    F32
    [1]
  • v.blk.10.attn_v.output_max
    F32
    [1]
  • v.blk.10.attn_v.output_min
    F32
    [1]
  • v.blk.10.attn_v.weight
    F16
    [768, 768]
  • v.blk.10.ffn_down.input_max
    F32
    [1]
  • v.blk.10.ffn_down.input_min
    F32
    [1]
  • v.blk.10.ffn_down.output_max
    F32
    [1]
  • v.blk.10.ffn_down.output_min
    F32
    [1]
  • v.blk.10.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.10.ffn_gate.input_max
    F32
    [1]
  • v.blk.10.ffn_gate.input_min
    F32
    [1]
  • v.blk.10.ffn_gate.output_max
    F32
    [1]
  • v.blk.10.ffn_gate.output_min
    F32
    [1]
  • v.blk.10.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.10.ffn_post_norm.weight
    F32
    [768]
  • v.blk.10.ffn_up.input_max
    F32
    [1]
  • v.blk.10.ffn_up.input_min
    F32
    [1]
  • v.blk.10.ffn_up.output_max
    F32
    [1]
  • v.blk.10.ffn_up.output_min
    F32
    [1]
  • v.blk.10.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.10.ln1.weight
    F32
    [768]
  • v.blk.10.ln2.weight
    F32
    [768]
  • v.blk.11
  • v.blk.11.attn_k.input_max
    F32
    [1]
  • v.blk.11.attn_k.input_min
    F32
    [1]
  • v.blk.11.attn_k.output_max
    F32
    [1]
  • v.blk.11.attn_k.output_min
    F32
    [1]
  • v.blk.11.attn_k.weight
    F16
    [768, 768]
  • v.blk.11.attn_k_norm.weight
    F32
    [64]
  • v.blk.11.attn_out.input_max
    F32
    [1]
  • v.blk.11.attn_out.input_min
    F32
    [1]
  • v.blk.11.attn_out.output_max
    F32
    [1]
  • v.blk.11.attn_out.output_min
    F32
    [1]
  • v.blk.11.attn_out.weight
    F16
    [768, 768]
  • v.blk.11.attn_post_norm.weight
    F32
    [768]
  • v.blk.11.attn_q.input_max
    F32
    [1]
  • v.blk.11.attn_q.input_min
    F32
    [1]
  • v.blk.11.attn_q.output_max
    F32
    [1]
  • v.blk.11.attn_q.output_min
    F32
    [1]
  • v.blk.11.attn_q.weight
    F16
    [768, 768]
  • v.blk.11.attn_q_norm.weight
    F32
    [64]
  • v.blk.11.attn_v.input_max
    F32
    [1]
  • v.blk.11.attn_v.input_min
    F32
    [1]
  • v.blk.11.attn_v.output_max
    F32
    [1]
  • v.blk.11.attn_v.output_min
    F32
    [1]
  • v.blk.11.attn_v.weight
    F16
    [768, 768]
  • v.blk.11.ffn_down.input_max
    F32
    [1]
  • v.blk.11.ffn_down.input_min
    F32
    [1]
  • v.blk.11.ffn_down.output_max
    F32
    [1]
  • v.blk.11.ffn_down.output_min
    F32
    [1]
  • v.blk.11.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.11.ffn_gate.input_max
    F32
    [1]
  • v.blk.11.ffn_gate.input_min
    F32
    [1]
  • v.blk.11.ffn_gate.output_max
    F32
    [1]
  • v.blk.11.ffn_gate.output_min
    F32
    [1]
  • v.blk.11.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.11.ffn_post_norm.weight
    F32
    [768]
  • v.blk.11.ffn_up.input_max
    F32
    [1]
  • v.blk.11.ffn_up.input_min
    F32
    [1]
  • v.blk.11.ffn_up.output_max
    F32
    [1]
  • v.blk.11.ffn_up.output_min
    F32
    [1]
  • v.blk.11.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.11.ln1.weight
    F32
    [768]
  • v.blk.11.ln2.weight
    F32
    [768]
  • v.blk.12
  • v.blk.12.attn_k.input_max
    F32
    [1]
  • v.blk.12.attn_k.input_min
    F32
    [1]
  • v.blk.12.attn_k.output_max
    F32
    [1]
  • v.blk.12.attn_k.output_min
    F32
    [1]
  • v.blk.12.attn_k.weight
    F16
    [768, 768]
  • v.blk.12.attn_k_norm.weight
    F32
    [64]
  • v.blk.12.attn_out.input_max
    F32
    [1]
  • v.blk.12.attn_out.input_min
    F32
    [1]
  • v.blk.12.attn_out.output_max
    F32
    [1]
  • v.blk.12.attn_out.output_min
    F32
    [1]
  • v.blk.12.attn_out.weight
    F16
    [768, 768]
  • v.blk.12.attn_post_norm.weight
    F32
    [768]
  • v.blk.12.attn_q.input_max
    F32
    [1]
  • v.blk.12.attn_q.input_min
    F32
    [1]
  • v.blk.12.attn_q.output_max
    F32
    [1]
  • v.blk.12.attn_q.output_min
    F32
    [1]
  • v.blk.12.attn_q.weight
    F16
    [768, 768]
  • v.blk.12.attn_q_norm.weight
    F32
    [64]
  • v.blk.12.attn_v.input_max
    F32
    [1]
  • v.blk.12.attn_v.input_min
    F32
    [1]
  • v.blk.12.attn_v.output_max
    F32
    [1]
  • v.blk.12.attn_v.output_min
    F32
    [1]
  • v.blk.12.attn_v.weight
    F16
    [768, 768]
  • v.blk.12.ffn_down.input_max
    F32
    [1]
  • v.blk.12.ffn_down.input_min
    F32
    [1]
  • v.blk.12.ffn_down.output_max
    F32
    [1]
  • v.blk.12.ffn_down.output_min
    F32
    [1]
  • v.blk.12.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.12.ffn_gate.input_max
    F32
    [1]
  • v.blk.12.ffn_gate.input_min
    F32
    [1]
  • v.blk.12.ffn_gate.output_max
    F32
    [1]
  • v.blk.12.ffn_gate.output_min
    F32
    [1]
  • v.blk.12.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.12.ffn_post_norm.weight
    F32
    [768]
  • v.blk.12.ffn_up.input_max
    F32
    [1]
  • v.blk.12.ffn_up.input_min
    F32
    [1]
  • v.blk.12.ffn_up.output_max
    F32
    [1]
  • v.blk.12.ffn_up.output_min
    F32
    [1]
  • v.blk.12.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.12.ln1.weight
    F32
    [768]
  • v.blk.12.ln2.weight
    F32
    [768]
  • v.blk.13
  • v.blk.13.attn_k.input_max
    F32
    [1]
  • v.blk.13.attn_k.input_min
    F32
    [1]
  • v.blk.13.attn_k.output_max
    F32
    [1]
  • v.blk.13.attn_k.output_min
    F32
    [1]
  • v.blk.13.attn_k.weight
    F16
    [768, 768]
  • v.blk.13.attn_k_norm.weight
    F32
    [64]
  • v.blk.13.attn_out.input_max
    F32
    [1]
  • v.blk.13.attn_out.input_min
    F32
    [1]
  • v.blk.13.attn_out.output_max
    F32
    [1]
  • v.blk.13.attn_out.output_min
    F32
    [1]
  • v.blk.13.attn_out.weight
    F16
    [768, 768]
  • v.blk.13.attn_post_norm.weight
    F32
    [768]
  • v.blk.13.attn_q.input_max
    F32
    [1]
  • v.blk.13.attn_q.input_min
    F32
    [1]
  • v.blk.13.attn_q.output_max
    F32
    [1]
  • v.blk.13.attn_q.output_min
    F32
    [1]
  • v.blk.13.attn_q.weight
    F16
    [768, 768]
  • v.blk.13.attn_q_norm.weight
    F32
    [64]
  • v.blk.13.attn_v.input_max
    F32
    [1]
  • v.blk.13.attn_v.input_min
    F32
    [1]
  • v.blk.13.attn_v.output_max
    F32
    [1]
  • v.blk.13.attn_v.output_min
    F32
    [1]
  • v.blk.13.attn_v.weight
    F16
    [768, 768]
  • v.blk.13.ffn_down.input_max
    F32
    [1]
  • v.blk.13.ffn_down.input_min
    F32
    [1]
  • v.blk.13.ffn_down.output_max
    F32
    [1]
  • v.blk.13.ffn_down.output_min
    F32
    [1]
  • v.blk.13.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.13.ffn_gate.input_max
    F32
    [1]
  • v.blk.13.ffn_gate.input_min
    F32
    [1]
  • v.blk.13.ffn_gate.output_max
    F32
    [1]
  • v.blk.13.ffn_gate.output_min
    F32
    [1]
  • v.blk.13.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.13.ffn_post_norm.weight
    F32
    [768]
  • v.blk.13.ffn_up.input_max
    F32
    [1]
  • v.blk.13.ffn_up.input_min
    F32
    [1]
  • v.blk.13.ffn_up.output_max
    F32
    [1]
  • v.blk.13.ffn_up.output_min
    F32
    [1]
  • v.blk.13.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.13.ln1.weight
    F32
    [768]
  • v.blk.13.ln2.weight
    F32
    [768]
  • v.blk.14
  • v.blk.14.attn_k.input_max
    F32
    [1]
  • v.blk.14.attn_k.input_min
    F32
    [1]
  • v.blk.14.attn_k.output_max
    F32
    [1]
  • v.blk.14.attn_k.output_min
    F32
    [1]
  • v.blk.14.attn_k.weight
    F16
    [768, 768]
  • v.blk.14.attn_k_norm.weight
    F32
    [64]
  • v.blk.14.attn_out.input_max
    F32
    [1]
  • v.blk.14.attn_out.input_min
    F32
    [1]
  • v.blk.14.attn_out.output_max
    F32
    [1]
  • v.blk.14.attn_out.output_min
    F32
    [1]
  • v.blk.14.attn_out.weight
    F16
    [768, 768]
  • v.blk.14.attn_post_norm.weight
    F32
    [768]
  • v.blk.14.attn_q.input_max
    F32
    [1]
  • v.blk.14.attn_q.input_min
    F32
    [1]
  • v.blk.14.attn_q.output_max
    F32
    [1]
  • v.blk.14.attn_q.output_min
    F32
    [1]
  • v.blk.14.attn_q.weight
    F16
    [768, 768]
  • v.blk.14.attn_q_norm.weight
    F32
    [64]
  • v.blk.14.attn_v.input_max
    F32
    [1]
  • v.blk.14.attn_v.input_min
    F32
    [1]
  • v.blk.14.attn_v.output_max
    F32
    [1]
  • v.blk.14.attn_v.output_min
    F32
    [1]
  • v.blk.14.attn_v.weight
    F16
    [768, 768]
  • v.blk.14.ffn_down.input_max
    F32
    [1]
  • v.blk.14.ffn_down.input_min
    F32
    [1]
  • v.blk.14.ffn_down.output_max
    F32
    [1]
  • v.blk.14.ffn_down.output_min
    F32
    [1]
  • v.blk.14.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.14.ffn_gate.input_max
    F32
    [1]
  • v.blk.14.ffn_gate.input_min
    F32
    [1]
  • v.blk.14.ffn_gate.output_max
    F32
    [1]
  • v.blk.14.ffn_gate.output_min
    F32
    [1]
  • v.blk.14.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.14.ffn_post_norm.weight
    F32
    [768]
  • v.blk.14.ffn_up.input_max
    F32
    [1]
  • v.blk.14.ffn_up.input_min
    F32
    [1]
  • v.blk.14.ffn_up.output_max
    F32
    [1]
  • v.blk.14.ffn_up.output_min
    F32
    [1]
  • v.blk.14.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.14.ln1.weight
    F32
    [768]
  • v.blk.14.ln2.weight
    F32
    [768]
  • v.blk.15
  • v.blk.15.attn_k.input_max
    F32
    [1]
  • v.blk.15.attn_k.input_min
    F32
    [1]
  • v.blk.15.attn_k.output_max
    F32
    [1]
  • v.blk.15.attn_k.output_min
    F32
    [1]
  • v.blk.15.attn_k.weight
    F16
    [768, 768]
  • v.blk.15.attn_k_norm.weight
    F32
    [64]
  • v.blk.15.attn_out.input_max
    F32
    [1]
  • v.blk.15.attn_out.input_min
    F32
    [1]
  • v.blk.15.attn_out.output_max
    F32
    [1]
  • v.blk.15.attn_out.output_min
    F32
    [1]
  • v.blk.15.attn_out.weight
    F16
    [768, 768]
  • v.blk.15.attn_post_norm.weight
    F32
    [768]
  • v.blk.15.attn_q.input_max
    F32
    [1]
  • v.blk.15.attn_q.input_min
    F32
    [1]
  • v.blk.15.attn_q.output_max
    F32
    [1]
  • v.blk.15.attn_q.output_min
    F32
    [1]
  • v.blk.15.attn_q.weight
    F16
    [768, 768]
  • v.blk.15.attn_q_norm.weight
    F32
    [64]
  • v.blk.15.attn_v.input_max
    F32
    [1]
  • v.blk.15.attn_v.input_min
    F32
    [1]
  • v.blk.15.attn_v.output_max
    F32
    [1]
  • v.blk.15.attn_v.output_min
    F32
    [1]
  • v.blk.15.attn_v.weight
    F16
    [768, 768]
  • v.blk.15.ffn_down.input_max
    F32
    [1]
  • v.blk.15.ffn_down.input_min
    F32
    [1]
  • v.blk.15.ffn_down.output_max
    F32
    [1]
  • v.blk.15.ffn_down.output_min
    F32
    [1]
  • v.blk.15.ffn_down.weight
    F16
    [3072, 768]
  • v.blk.15.ffn_gate.input_max
    F32
    [1]
  • v.blk.15.ffn_gate.input_min
    F32
    [1]
  • v.blk.15.ffn_gate.output_max
    F32
    [1]
  • v.blk.15.ffn_gate.output_min
    F32
    [1]
  • v.blk.15.ffn_gate.weight
    F16
    [768, 3072]
  • v.blk.15.ffn_post_norm.weight
    F32
    [768]
  • v.blk.15.ffn_up.input_max
    F32
    [1]
  • v.blk.15.ffn_up.input_min
    F32
    [1]
  • v.blk.15.ffn_up.output_max
    F32
    [1]
  • v.blk.15.ffn_up.output_min
    F32
    [1]
  • v.blk.15.ffn_up.weight
    F16
    [768, 3072]
  • v.blk.15.ln1.weight
    F32
    [768]
  • v.blk.15.ln2.weight
    F32
    [768]
  • v.patch_embd.weight
    F16
    [16, 16, 3, 768]
  • v.position_embd.weight
    F32
    [768, 10240, 2]
  • output_norm.weight
    F32
    [1536]