16.3K 23 minutes ago

Gemma 4 Turbo is an optimized version of Google's Gemma 4 (9B) model, achieving 51% faster CPU inference through int4 quantization and performance tuning. Ideal for local AI assistants, tool calling, and chat applications on Windows systems without GPU.

vision tools thinking audio e2b e4b 26b 31b
cc321582aada · 18GB
    Metadata
  • general.architecture
    gemma4
  • general.file_type
    IQ4_XS
  • gemma4.attention.head_count
    32
  • gemma4.attention.head_count_kv
    [16, 16, 16, 16, 16, ...]
  • gemma4.attention.key_length
    512
  • gemma4.attention.key_length_swa
    256
  • gemma4.attention.layer_norm_rms_epsilon
    1e-06
  • gemma4.attention.shared_kv_layers
    0
  • gemma4.attention.sliding_window
    1024
  • gemma4.attention.sliding_window_pattern
    [true, true, true, true, true, ...]
  • gemma4.attention.value_length
    512
  • gemma4.attention.value_length_swa
    256
  • gemma4.block_count
    60
  • gemma4.context_length
    262144
  • gemma4.embedding_length
    5376
  • gemma4.embedding_length_per_layer_input
    0
  • gemma4.feed_forward_length
    21504
  • gemma4.final_logit_softcapping
    30
  • gemma4.rope.dimension_count
    512
  • gemma4.rope.dimension_count_swa
    256
  • gemma4.rope.freq_base
    1e+06
  • gemma4.rope.freq_base_swa
    10000
  • gemma4.vision.attention.head_count
    16
  • gemma4.vision.attention.layer_norm_epsilon
    1e-06
  • gemma4.vision.block_count
    27
  • gemma4.vision.embedding_length
    1152
  • gemma4.vision.feed_forward_length
    4304
  • gemma4.vision.num_channels
    3
  • gemma4.vision.patch_size
    16
  • gemma4.vision.projector.scale_factor
    3
  • tokenizer.ggml.add_bos_token
    true
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    2
  • tokenizer.ggml.eos_token_id
    1
  • tokenizer.ggml.mask_token_id
    4
  • tokenizer.ggml.merges
    [ , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁, , , ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁, ...]
  • tokenizer.ggml.model
    gemma4
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.scores
    [-1000, -1000, -1000, -1000, -1000, ...]
  • tokenizer.ggml.token_type
    [3, 3, 3, 3, 3, ...]
  • tokenizer.ggml.tokens
    [<pad>, <eos>, <bos>, <unk>, <mask>, ...]
  • tokenizer.ggml.unknown_token_id
    3
  • Tensor
  • token_embd.weight
    Q6_K
    [5376, 262144]
  • blk.0
  • blk.0.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.0.attn_k_norm.weight
    F32
    [256]
  • blk.0.attn_norm.weight
    F32
    [5376]
  • blk.0.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.0.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.0.attn_q_norm.weight
    F32
    [256]
  • blk.0.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.0.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.0.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.0.ffn_norm.weight
    F32
    [5376]
  • blk.0.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.0.layer_output_scale.weight
    F32
    [1]
  • blk.0.post_attention_norm.weight
    F32
    [5376]
  • blk.0.post_ffw_norm.weight
    F32
    [5376]
  • blk.1
  • blk.1.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.1.attn_k_norm.weight
    F32
    [256]
  • blk.1.attn_norm.weight
    F32
    [5376]
  • blk.1.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.1.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.1.attn_q_norm.weight
    F32
    [256]
  • blk.1.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.1.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.1.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.1.ffn_norm.weight
    F32
    [5376]
  • blk.1.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.1.layer_output_scale.weight
    F32
    [1]
  • blk.1.post_attention_norm.weight
    F32
    [5376]
  • blk.1.post_ffw_norm.weight
    F32
    [5376]
  • blk.2
  • blk.2.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.2.attn_k_norm.weight
    F32
    [256]
  • blk.2.attn_norm.weight
    F32
    [5376]
  • blk.2.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.2.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.2.attn_q_norm.weight
    F32
    [256]
  • blk.2.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.2.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.2.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.2.ffn_norm.weight
    F32
    [5376]
  • blk.2.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.2.layer_output_scale.weight
    F32
    [1]
  • blk.2.post_attention_norm.weight
    F32
    [5376]
  • blk.2.post_ffw_norm.weight
    F32
    [5376]
  • blk.3
  • blk.3.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.3.attn_k_norm.weight
    F32
    [256]
  • blk.3.attn_norm.weight
    F32
    [5376]
  • blk.3.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.3.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.3.attn_q_norm.weight
    F32
    [256]
  • blk.3.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.3.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.3.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.3.ffn_norm.weight
    F32
    [5376]
  • blk.3.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.3.layer_output_scale.weight
    F32
    [1]
  • blk.3.post_attention_norm.weight
    F32
    [5376]
  • blk.3.post_ffw_norm.weight
    F32
    [5376]
  • blk.4
  • blk.4.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.4.attn_k_norm.weight
    F32
    [256]
  • blk.4.attn_norm.weight
    F32
    [5376]
  • blk.4.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.4.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.4.attn_q_norm.weight
    F32
    [256]
  • blk.4.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.4.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.4.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.4.ffn_norm.weight
    F32
    [5376]
  • blk.4.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.4.layer_output_scale.weight
    F32
    [1]
  • blk.4.post_attention_norm.weight
    F32
    [5376]
  • blk.4.post_ffw_norm.weight
    F32
    [5376]
  • blk.5
  • blk.5.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.5.attn_k_norm.weight
    F32
    [512]
  • blk.5.attn_norm.weight
    F32
    [5376]
  • blk.5.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.5.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.5.attn_q_norm.weight
    F32
    [512]
  • blk.5.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.5.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.5.ffn_norm.weight
    F32
    [5376]
  • blk.5.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.5.layer_output_scale.weight
    F32
    [1]
  • blk.5.post_attention_norm.weight
    F32
    [5376]
  • blk.5.post_ffw_norm.weight
    F32
    [5376]
  • blk.6
  • blk.6.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.6.attn_k_norm.weight
    F32
    [256]
  • blk.6.attn_norm.weight
    F32
    [5376]
  • blk.6.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.6.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.6.attn_q_norm.weight
    F32
    [256]
  • blk.6.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.6.ffn_down.weight
    Q5_K
    [21504, 5376]
  • blk.6.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.6.ffn_norm.weight
    F32
    [5376]
  • blk.6.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.6.layer_output_scale.weight
    F32
    [1]
  • blk.6.post_attention_norm.weight
    F32
    [5376]
  • blk.6.post_ffw_norm.weight
    F32
    [5376]
  • blk.7
  • blk.7.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.7.attn_k_norm.weight
    F32
    [256]
  • blk.7.attn_norm.weight
    F32
    [5376]
  • blk.7.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.7.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.7.attn_q_norm.weight
    F32
    [256]
  • blk.7.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.7.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.7.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.7.ffn_norm.weight
    F32
    [5376]
  • blk.7.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.7.layer_output_scale.weight
    F32
    [1]
  • blk.7.post_attention_norm.weight
    F32
    [5376]
  • blk.7.post_ffw_norm.weight
    F32
    [5376]
  • blk.8
  • blk.8.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.8.attn_k_norm.weight
    F32
    [256]
  • blk.8.attn_norm.weight
    F32
    [5376]
  • blk.8.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.8.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.8.attn_q_norm.weight
    F32
    [256]
  • blk.8.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.8.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.8.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.8.ffn_norm.weight
    F32
    [5376]
  • blk.8.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.8.layer_output_scale.weight
    F32
    [1]
  • blk.8.post_attention_norm.weight
    F32
    [5376]
  • blk.8.post_ffw_norm.weight
    F32
    [5376]
  • blk.9
  • blk.9.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.9.attn_k_norm.weight
    F32
    [256]
  • blk.9.attn_norm.weight
    F32
    [5376]
  • blk.9.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.9.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.9.attn_q_norm.weight
    F32
    [256]
  • blk.9.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.9.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.9.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.9.ffn_norm.weight
    F32
    [5376]
  • blk.9.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.9.layer_output_scale.weight
    F32
    [1]
  • blk.9.post_attention_norm.weight
    F32
    [5376]
  • blk.9.post_ffw_norm.weight
    F32
    [5376]
  • blk.10
  • blk.10.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.10.attn_k_norm.weight
    F32
    [256]
  • blk.10.attn_norm.weight
    F32
    [5376]
  • blk.10.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.10.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.10.attn_q_norm.weight
    F32
    [256]
  • blk.10.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.10.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.10.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.10.ffn_norm.weight
    F32
    [5376]
  • blk.10.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.10.layer_output_scale.weight
    F32
    [1]
  • blk.10.post_attention_norm.weight
    F32
    [5376]
  • blk.10.post_ffw_norm.weight
    F32
    [5376]
  • blk.11
  • blk.11.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.11.attn_k_norm.weight
    F32
    [512]
  • blk.11.attn_norm.weight
    F32
    [5376]
  • blk.11.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.11.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.11.attn_q_norm.weight
    F32
    [512]
  • blk.11.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.11.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.11.ffn_norm.weight
    F32
    [5376]
  • blk.11.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.11.layer_output_scale.weight
    F32
    [1]
  • blk.11.post_attention_norm.weight
    F32
    [5376]
  • blk.11.post_ffw_norm.weight
    F32
    [5376]
  • blk.12
  • blk.12.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.12.attn_k_norm.weight
    F32
    [256]
  • blk.12.attn_norm.weight
    F32
    [5376]
  • blk.12.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.12.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.12.attn_q_norm.weight
    F32
    [256]
  • blk.12.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.12.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.12.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.12.ffn_norm.weight
    F32
    [5376]
  • blk.12.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.12.layer_output_scale.weight
    F32
    [1]
  • blk.12.post_attention_norm.weight
    F32
    [5376]
  • blk.12.post_ffw_norm.weight
    F32
    [5376]
  • blk.13
  • blk.13.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.13.attn_k_norm.weight
    F32
    [256]
  • blk.13.attn_norm.weight
    F32
    [5376]
  • blk.13.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.13.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.13.attn_q_norm.weight
    F32
    [256]
  • blk.13.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.13.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.13.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.13.ffn_norm.weight
    F32
    [5376]
  • blk.13.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.13.layer_output_scale.weight
    F32
    [1]
  • blk.13.post_attention_norm.weight
    F32
    [5376]
  • blk.13.post_ffw_norm.weight
    F32
    [5376]
  • blk.14
  • blk.14.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.14.attn_k_norm.weight
    F32
    [256]
  • blk.14.attn_norm.weight
    F32
    [5376]
  • blk.14.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.14.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.14.attn_q_norm.weight
    F32
    [256]
  • blk.14.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.14.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.14.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.14.ffn_norm.weight
    F32
    [5376]
  • blk.14.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.14.layer_output_scale.weight
    F32
    [1]
  • blk.14.post_attention_norm.weight
    F32
    [5376]
  • blk.14.post_ffw_norm.weight
    F32
    [5376]
  • blk.15
  • blk.15.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.15.attn_k_norm.weight
    F32
    [256]
  • blk.15.attn_norm.weight
    F32
    [5376]
  • blk.15.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.15.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.15.attn_q_norm.weight
    F32
    [256]
  • blk.15.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.15.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.15.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.15.ffn_norm.weight
    F32
    [5376]
  • blk.15.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.15.layer_output_scale.weight
    F32
    [1]
  • blk.15.post_attention_norm.weight
    F32
    [5376]
  • blk.15.post_ffw_norm.weight
    F32
    [5376]
  • blk.16
  • blk.16.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.16.attn_k_norm.weight
    F32
    [256]
  • blk.16.attn_norm.weight
    F32
    [5376]
  • blk.16.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.16.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.16.attn_q_norm.weight
    F32
    [256]
  • blk.16.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.16.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.16.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.16.ffn_norm.weight
    F32
    [5376]
  • blk.16.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.16.layer_output_scale.weight
    F32
    [1]
  • blk.16.post_attention_norm.weight
    F32
    [5376]
  • blk.16.post_ffw_norm.weight
    F32
    [5376]
  • blk.17
  • blk.17.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.17.attn_k_norm.weight
    F32
    [512]
  • blk.17.attn_norm.weight
    F32
    [5376]
  • blk.17.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.17.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.17.attn_q_norm.weight
    F32
    [512]
  • blk.17.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.17.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.17.ffn_norm.weight
    F32
    [5376]
  • blk.17.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.17.layer_output_scale.weight
    F32
    [1]
  • blk.17.post_attention_norm.weight
    F32
    [5376]
  • blk.17.post_ffw_norm.weight
    F32
    [5376]
  • blk.18
  • blk.18.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.18.attn_k_norm.weight
    F32
    [256]
  • blk.18.attn_norm.weight
    F32
    [5376]
  • blk.18.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.18.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.18.attn_q_norm.weight
    F32
    [256]
  • blk.18.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.18.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.18.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.18.ffn_norm.weight
    F32
    [5376]
  • blk.18.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.18.layer_output_scale.weight
    F32
    [1]
  • blk.18.post_attention_norm.weight
    F32
    [5376]
  • blk.18.post_ffw_norm.weight
    F32
    [5376]
  • blk.19
  • blk.19.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.19.attn_k_norm.weight
    F32
    [256]
  • blk.19.attn_norm.weight
    F32
    [5376]
  • blk.19.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.19.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.19.attn_q_norm.weight
    F32
    [256]
  • blk.19.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.19.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.19.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.19.ffn_norm.weight
    F32
    [5376]
  • blk.19.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.19.layer_output_scale.weight
    F32
    [1]
  • blk.19.post_attention_norm.weight
    F32
    [5376]
  • blk.19.post_ffw_norm.weight
    F32
    [5376]
  • blk.20
  • blk.20.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.20.attn_k_norm.weight
    F32
    [256]
  • blk.20.attn_norm.weight
    F32
    [5376]
  • blk.20.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.20.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.20.attn_q_norm.weight
    F32
    [256]
  • blk.20.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.20.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.20.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.20.ffn_norm.weight
    F32
    [5376]
  • blk.20.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.20.layer_output_scale.weight
    F32
    [1]
  • blk.20.post_attention_norm.weight
    F32
    [5376]
  • blk.20.post_ffw_norm.weight
    F32
    [5376]
  • blk.21
  • blk.21.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.21.attn_k_norm.weight
    F32
    [256]
  • blk.21.attn_norm.weight
    F32
    [5376]
  • blk.21.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.21.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.21.attn_q_norm.weight
    F32
    [256]
  • blk.21.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.21.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.21.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.21.ffn_norm.weight
    F32
    [5376]
  • blk.21.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.21.layer_output_scale.weight
    F32
    [1]
  • blk.21.post_attention_norm.weight
    F32
    [5376]
  • blk.21.post_ffw_norm.weight
    F32
    [5376]
  • blk.22
  • blk.22.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.22.attn_k_norm.weight
    F32
    [256]
  • blk.22.attn_norm.weight
    F32
    [5376]
  • blk.22.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.22.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.22.attn_q_norm.weight
    F32
    [256]
  • blk.22.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.22.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.22.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.22.ffn_norm.weight
    F32
    [5376]
  • blk.22.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.22.layer_output_scale.weight
    F32
    [1]
  • blk.22.post_attention_norm.weight
    F32
    [5376]
  • blk.22.post_ffw_norm.weight
    F32
    [5376]
  • blk.23
  • blk.23.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.23.attn_k_norm.weight
    F32
    [512]
  • blk.23.attn_norm.weight
    F32
    [5376]
  • blk.23.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.23.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.23.attn_q_norm.weight
    F32
    [512]
  • blk.23.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.23.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.23.ffn_norm.weight
    F32
    [5376]
  • blk.23.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.23.layer_output_scale.weight
    F32
    [1]
  • blk.23.post_attention_norm.weight
    F32
    [5376]
  • blk.23.post_ffw_norm.weight
    F32
    [5376]
  • blk.24
  • blk.24.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.24.attn_k_norm.weight
    F32
    [256]
  • blk.24.attn_norm.weight
    F32
    [5376]
  • blk.24.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.24.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.24.attn_q_norm.weight
    F32
    [256]
  • blk.24.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.24.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.24.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.24.ffn_norm.weight
    F32
    [5376]
  • blk.24.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.24.layer_output_scale.weight
    F32
    [1]
  • blk.24.post_attention_norm.weight
    F32
    [5376]
  • blk.24.post_ffw_norm.weight
    F32
    [5376]
  • blk.25
  • blk.25.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.25.attn_k_norm.weight
    F32
    [256]
  • blk.25.attn_norm.weight
    F32
    [5376]
  • blk.25.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.25.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.25.attn_q_norm.weight
    F32
    [256]
  • blk.25.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.25.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.25.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.25.ffn_norm.weight
    F32
    [5376]
  • blk.25.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.25.layer_output_scale.weight
    F32
    [1]
  • blk.25.post_attention_norm.weight
    F32
    [5376]
  • blk.25.post_ffw_norm.weight
    F32
    [5376]
  • blk.26
  • blk.26.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.26.attn_k_norm.weight
    F32
    [256]
  • blk.26.attn_norm.weight
    F32
    [5376]
  • blk.26.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.26.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.26.attn_q_norm.weight
    F32
    [256]
  • blk.26.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.26.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.26.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.26.ffn_norm.weight
    F32
    [5376]
  • blk.26.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.26.layer_output_scale.weight
    F32
    [1]
  • blk.26.post_attention_norm.weight
    F32
    [5376]
  • blk.26.post_ffw_norm.weight
    F32
    [5376]
  • blk.27
  • blk.27.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.27.attn_k_norm.weight
    F32
    [256]
  • blk.27.attn_norm.weight
    F32
    [5376]
  • blk.27.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.27.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.27.attn_q_norm.weight
    F32
    [256]
  • blk.27.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.27.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.27.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.27.ffn_norm.weight
    F32
    [5376]
  • blk.27.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.27.layer_output_scale.weight
    F32
    [1]
  • blk.27.post_attention_norm.weight
    F32
    [5376]
  • blk.27.post_ffw_norm.weight
    F32
    [5376]
  • blk.28
  • blk.28.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.28.attn_k_norm.weight
    F32
    [256]
  • blk.28.attn_norm.weight
    F32
    [5376]
  • blk.28.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.28.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.28.attn_q_norm.weight
    F32
    [256]
  • blk.28.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.28.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.28.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.28.ffn_norm.weight
    F32
    [5376]
  • blk.28.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.28.layer_output_scale.weight
    F32
    [1]
  • blk.28.post_attention_norm.weight
    F32
    [5376]
  • blk.28.post_ffw_norm.weight
    F32
    [5376]
  • blk.29
  • blk.29.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.29.attn_k_norm.weight
    F32
    [512]
  • blk.29.attn_norm.weight
    F32
    [5376]
  • blk.29.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.29.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.29.attn_q_norm.weight
    F32
    [512]
  • blk.29.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.29.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.29.ffn_norm.weight
    F32
    [5376]
  • blk.29.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.29.layer_output_scale.weight
    F32
    [1]
  • blk.29.post_attention_norm.weight
    F32
    [5376]
  • blk.29.post_ffw_norm.weight
    F32
    [5376]
  • blk.30
  • blk.30.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.30.attn_k_norm.weight
    F32
    [256]
  • blk.30.attn_norm.weight
    F32
    [5376]
  • blk.30.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.30.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.30.attn_q_norm.weight
    F32
    [256]
  • blk.30.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.30.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.30.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.30.ffn_norm.weight
    F32
    [5376]
  • blk.30.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.30.layer_output_scale.weight
    F32
    [1]
  • blk.30.post_attention_norm.weight
    F32
    [5376]
  • blk.30.post_ffw_norm.weight
    F32
    [5376]
  • blk.31
  • blk.31.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.31.attn_k_norm.weight
    F32
    [256]
  • blk.31.attn_norm.weight
    F32
    [5376]
  • blk.31.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.31.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.31.attn_q_norm.weight
    F32
    [256]
  • blk.31.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.31.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.31.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.31.ffn_norm.weight
    F32
    [5376]
  • blk.31.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.31.layer_output_scale.weight
    F32
    [1]
  • blk.31.post_attention_norm.weight
    F32
    [5376]
  • blk.31.post_ffw_norm.weight
    F32
    [5376]
  • blk.32
  • blk.32.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.32.attn_k_norm.weight
    F32
    [256]
  • blk.32.attn_norm.weight
    F32
    [5376]
  • blk.32.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.32.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.32.attn_q_norm.weight
    F32
    [256]
  • blk.32.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.32.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.32.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.32.ffn_norm.weight
    F32
    [5376]
  • blk.32.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.32.layer_output_scale.weight
    F32
    [1]
  • blk.32.post_attention_norm.weight
    F32
    [5376]
  • blk.32.post_ffw_norm.weight
    F32
    [5376]
  • blk.33
  • blk.33.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.33.attn_k_norm.weight
    F32
    [256]
  • blk.33.attn_norm.weight
    F32
    [5376]
  • blk.33.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.33.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.33.attn_q_norm.weight
    F32
    [256]
  • blk.33.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.33.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.33.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.33.ffn_norm.weight
    F32
    [5376]
  • blk.33.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.33.layer_output_scale.weight
    F32
    [1]
  • blk.33.post_attention_norm.weight
    F32
    [5376]
  • blk.33.post_ffw_norm.weight
    F32
    [5376]
  • blk.34
  • blk.34.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.34.attn_k_norm.weight
    F32
    [256]
  • blk.34.attn_norm.weight
    F32
    [5376]
  • blk.34.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.34.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.34.attn_q_norm.weight
    F32
    [256]
  • blk.34.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.34.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.34.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.34.ffn_norm.weight
    F32
    [5376]
  • blk.34.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.34.layer_output_scale.weight
    F32
    [1]
  • blk.34.post_attention_norm.weight
    F32
    [5376]
  • blk.34.post_ffw_norm.weight
    F32
    [5376]
  • blk.35
  • blk.35.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.35.attn_k_norm.weight
    F32
    [512]
  • blk.35.attn_norm.weight
    F32
    [5376]
  • blk.35.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.35.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.35.attn_q_norm.weight
    F32
    [512]
  • blk.35.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.35.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.35.ffn_norm.weight
    F32
    [5376]
  • blk.35.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.35.layer_output_scale.weight
    F32
    [1]
  • blk.35.post_attention_norm.weight
    F32
    [5376]
  • blk.35.post_ffw_norm.weight
    F32
    [5376]
  • blk.36
  • blk.36.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.36.attn_k_norm.weight
    F32
    [256]
  • blk.36.attn_norm.weight
    F32
    [5376]
  • blk.36.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.36.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.36.attn_q_norm.weight
    F32
    [256]
  • blk.36.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.36.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.36.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.36.ffn_norm.weight
    F32
    [5376]
  • blk.36.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.36.layer_output_scale.weight
    F32
    [1]
  • blk.36.post_attention_norm.weight
    F32
    [5376]
  • blk.36.post_ffw_norm.weight
    F32
    [5376]
  • blk.37
  • blk.37.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.37.attn_k_norm.weight
    F32
    [256]
  • blk.37.attn_norm.weight
    F32
    [5376]
  • blk.37.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.37.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.37.attn_q_norm.weight
    F32
    [256]
  • blk.37.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.37.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.37.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.37.ffn_norm.weight
    F32
    [5376]
  • blk.37.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.37.layer_output_scale.weight
    F32
    [1]
  • blk.37.post_attention_norm.weight
    F32
    [5376]
  • blk.37.post_ffw_norm.weight
    F32
    [5376]
  • blk.38
  • blk.38.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.38.attn_k_norm.weight
    F32
    [256]
  • blk.38.attn_norm.weight
    F32
    [5376]
  • blk.38.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.38.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.38.attn_q_norm.weight
    F32
    [256]
  • blk.38.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.38.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.38.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.38.ffn_norm.weight
    F32
    [5376]
  • blk.38.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.38.layer_output_scale.weight
    F32
    [1]
  • blk.38.post_attention_norm.weight
    F32
    [5376]
  • blk.38.post_ffw_norm.weight
    F32
    [5376]
  • blk.39
  • blk.39.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.39.attn_k_norm.weight
    F32
    [256]
  • blk.39.attn_norm.weight
    F32
    [5376]
  • blk.39.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.39.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.39.attn_q_norm.weight
    F32
    [256]
  • blk.39.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.39.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.39.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.39.ffn_norm.weight
    F32
    [5376]
  • blk.39.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.39.layer_output_scale.weight
    F32
    [1]
  • blk.39.post_attention_norm.weight
    F32
    [5376]
  • blk.39.post_ffw_norm.weight
    F32
    [5376]
  • blk.40
  • blk.40.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.40.attn_k_norm.weight
    F32
    [256]
  • blk.40.attn_norm.weight
    F32
    [5376]
  • blk.40.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.40.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.40.attn_q_norm.weight
    F32
    [256]
  • blk.40.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.40.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.40.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.40.ffn_norm.weight
    F32
    [5376]
  • blk.40.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.40.layer_output_scale.weight
    F32
    [1]
  • blk.40.post_attention_norm.weight
    F32
    [5376]
  • blk.40.post_ffw_norm.weight
    F32
    [5376]
  • blk.41
  • blk.41.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.41.attn_k_norm.weight
    F32
    [512]
  • blk.41.attn_norm.weight
    F32
    [5376]
  • blk.41.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.41.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.41.attn_q_norm.weight
    F32
    [512]
  • blk.41.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.41.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.41.ffn_norm.weight
    F32
    [5376]
  • blk.41.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.41.layer_output_scale.weight
    F32
    [1]
  • blk.41.post_attention_norm.weight
    F32
    [5376]
  • blk.41.post_ffw_norm.weight
    F32
    [5376]
  • blk.42
  • blk.42.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.42.attn_k_norm.weight
    F32
    [256]
  • blk.42.attn_norm.weight
    F32
    [5376]
  • blk.42.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.42.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.42.attn_q_norm.weight
    F32
    [256]
  • blk.42.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.42.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.42.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.42.ffn_norm.weight
    F32
    [5376]
  • blk.42.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.42.layer_output_scale.weight
    F32
    [1]
  • blk.42.post_attention_norm.weight
    F32
    [5376]
  • blk.42.post_ffw_norm.weight
    F32
    [5376]
  • blk.43
  • blk.43.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.43.attn_k_norm.weight
    F32
    [256]
  • blk.43.attn_norm.weight
    F32
    [5376]
  • blk.43.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.43.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.43.attn_q_norm.weight
    F32
    [256]
  • blk.43.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.43.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.43.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.43.ffn_norm.weight
    F32
    [5376]
  • blk.43.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.43.layer_output_scale.weight
    F32
    [1]
  • blk.43.post_attention_norm.weight
    F32
    [5376]
  • blk.43.post_ffw_norm.weight
    F32
    [5376]
  • blk.44
  • blk.44.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.44.attn_k_norm.weight
    F32
    [256]
  • blk.44.attn_norm.weight
    F32
    [5376]
  • blk.44.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.44.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.44.attn_q_norm.weight
    F32
    [256]
  • blk.44.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.44.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.44.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.44.ffn_norm.weight
    F32
    [5376]
  • blk.44.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.44.layer_output_scale.weight
    F32
    [1]
  • blk.44.post_attention_norm.weight
    F32
    [5376]
  • blk.44.post_ffw_norm.weight
    F32
    [5376]
  • blk.45
  • blk.45.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.45.attn_k_norm.weight
    F32
    [256]
  • blk.45.attn_norm.weight
    F32
    [5376]
  • blk.45.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.45.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.45.attn_q_norm.weight
    F32
    [256]
  • blk.45.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.45.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.45.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.45.ffn_norm.weight
    F32
    [5376]
  • blk.45.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.45.layer_output_scale.weight
    F32
    [1]
  • blk.45.post_attention_norm.weight
    F32
    [5376]
  • blk.45.post_ffw_norm.weight
    F32
    [5376]
  • blk.46
  • blk.46.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.46.attn_k_norm.weight
    F32
    [256]
  • blk.46.attn_norm.weight
    F32
    [5376]
  • blk.46.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.46.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.46.attn_q_norm.weight
    F32
    [256]
  • blk.46.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.46.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.46.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.46.ffn_norm.weight
    F32
    [5376]
  • blk.46.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.46.layer_output_scale.weight
    F32
    [1]
  • blk.46.post_attention_norm.weight
    F32
    [5376]
  • blk.46.post_ffw_norm.weight
    F32
    [5376]
  • blk.47
  • blk.47.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.47.attn_k_norm.weight
    F32
    [512]
  • blk.47.attn_norm.weight
    F32
    [5376]
  • blk.47.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.47.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.47.attn_q_norm.weight
    F32
    [512]
  • blk.47.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.47.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.47.ffn_norm.weight
    F32
    [5376]
  • blk.47.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.47.layer_output_scale.weight
    F32
    [1]
  • blk.47.post_attention_norm.weight
    F32
    [5376]
  • blk.47.post_ffw_norm.weight
    F32
    [5376]
  • blk.48
  • blk.48.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.48.attn_k_norm.weight
    F32
    [256]
  • blk.48.attn_norm.weight
    F32
    [5376]
  • blk.48.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.48.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.48.attn_q_norm.weight
    F32
    [256]
  • blk.48.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.48.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.48.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.48.ffn_norm.weight
    F32
    [5376]
  • blk.48.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.48.layer_output_scale.weight
    F32
    [1]
  • blk.48.post_attention_norm.weight
    F32
    [5376]
  • blk.48.post_ffw_norm.weight
    F32
    [5376]
  • blk.49
  • blk.49.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.49.attn_k_norm.weight
    F32
    [256]
  • blk.49.attn_norm.weight
    F32
    [5376]
  • blk.49.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.49.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.49.attn_q_norm.weight
    F32
    [256]
  • blk.49.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.49.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.49.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.49.ffn_norm.weight
    F32
    [5376]
  • blk.49.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.49.layer_output_scale.weight
    F32
    [1]
  • blk.49.post_attention_norm.weight
    F32
    [5376]
  • blk.49.post_ffw_norm.weight
    F32
    [5376]
  • blk.50
  • blk.50.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.50.attn_k_norm.weight
    F32
    [256]
  • blk.50.attn_norm.weight
    F32
    [5376]
  • blk.50.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.50.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.50.attn_q_norm.weight
    F32
    [256]
  • blk.50.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.50.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.50.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.50.ffn_norm.weight
    F32
    [5376]
  • blk.50.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.50.layer_output_scale.weight
    F32
    [1]
  • blk.50.post_attention_norm.weight
    F32
    [5376]
  • blk.50.post_ffw_norm.weight
    F32
    [5376]
  • blk.51
  • blk.51.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.51.attn_k_norm.weight
    F32
    [256]
  • blk.51.attn_norm.weight
    F32
    [5376]
  • blk.51.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.51.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.51.attn_q_norm.weight
    F32
    [256]
  • blk.51.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.51.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.51.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.51.ffn_norm.weight
    F32
    [5376]
  • blk.51.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.51.layer_output_scale.weight
    F32
    [1]
  • blk.51.post_attention_norm.weight
    F32
    [5376]
  • blk.51.post_ffw_norm.weight
    F32
    [5376]
  • blk.52
  • blk.52.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.52.attn_k_norm.weight
    F32
    [256]
  • blk.52.attn_norm.weight
    F32
    [5376]
  • blk.52.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.52.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.52.attn_q_norm.weight
    F32
    [256]
  • blk.52.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.52.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.52.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.52.ffn_norm.weight
    F32
    [5376]
  • blk.52.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.52.layer_output_scale.weight
    F32
    [1]
  • blk.52.post_attention_norm.weight
    F32
    [5376]
  • blk.52.post_ffw_norm.weight
    F32
    [5376]
  • blk.53
  • blk.53.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.53.attn_k_norm.weight
    F32
    [512]
  • blk.53.attn_norm.weight
    F32
    [5376]
  • blk.53.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.53.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.53.attn_q_norm.weight
    F32
    [512]
  • blk.53.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.53.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.53.ffn_norm.weight
    F32
    [5376]
  • blk.53.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.53.layer_output_scale.weight
    F32
    [1]
  • blk.53.post_attention_norm.weight
    F32
    [5376]
  • blk.53.post_ffw_norm.weight
    F32
    [5376]
  • blk.54
  • blk.54.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.54.attn_k_norm.weight
    F32
    [256]
  • blk.54.attn_norm.weight
    F32
    [5376]
  • blk.54.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.54.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.54.attn_q_norm.weight
    F32
    [256]
  • blk.54.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.54.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.54.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.54.ffn_norm.weight
    F32
    [5376]
  • blk.54.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.54.layer_output_scale.weight
    F32
    [1]
  • blk.54.post_attention_norm.weight
    F32
    [5376]
  • blk.54.post_ffw_norm.weight
    F32
    [5376]
  • blk.55
  • blk.55.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.55.attn_k_norm.weight
    F32
    [256]
  • blk.55.attn_norm.weight
    F32
    [5376]
  • blk.55.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.55.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.55.attn_q_norm.weight
    F32
    [256]
  • blk.55.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.55.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.55.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.55.ffn_norm.weight
    F32
    [5376]
  • blk.55.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.55.layer_output_scale.weight
    F32
    [1]
  • blk.55.post_attention_norm.weight
    F32
    [5376]
  • blk.55.post_ffw_norm.weight
    F32
    [5376]
  • blk.56
  • blk.56.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.56.attn_k_norm.weight
    F32
    [256]
  • blk.56.attn_norm.weight
    F32
    [5376]
  • blk.56.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.56.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.56.attn_q_norm.weight
    F32
    [256]
  • blk.56.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.56.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.56.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.56.ffn_norm.weight
    F32
    [5376]
  • blk.56.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.56.layer_output_scale.weight
    F32
    [1]
  • blk.56.post_attention_norm.weight
    F32
    [5376]
  • blk.56.post_ffw_norm.weight
    F32
    [5376]
  • blk.57
  • blk.57.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.57.attn_k_norm.weight
    F32
    [256]
  • blk.57.attn_norm.weight
    F32
    [5376]
  • blk.57.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.57.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.57.attn_q_norm.weight
    F32
    [256]
  • blk.57.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.57.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.57.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.57.ffn_norm.weight
    F32
    [5376]
  • blk.57.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.57.layer_output_scale.weight
    F32
    [1]
  • blk.57.post_attention_norm.weight
    F32
    [5376]
  • blk.57.post_ffw_norm.weight
    F32
    [5376]
  • blk.58
  • blk.58.attn_k.weight
    IQ4_XS
    [5376, 4096]
  • blk.58.attn_k_norm.weight
    F32
    [256]
  • blk.58.attn_norm.weight
    F32
    [5376]
  • blk.58.attn_output.weight
    IQ4_XS
    [8192, 5376]
  • blk.58.attn_q.weight
    IQ4_XS
    [5376, 8192]
  • blk.58.attn_q_norm.weight
    F32
    [256]
  • blk.58.attn_v.weight
    IQ4_XS
    [5376, 4096]
  • blk.58.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.58.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.58.ffn_norm.weight
    F32
    [5376]
  • blk.58.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.58.layer_output_scale.weight
    F32
    [1]
  • blk.58.post_attention_norm.weight
    F32
    [5376]
  • blk.58.post_ffw_norm.weight
    F32
    [5376]
  • blk.59
  • blk.59.attn_k.weight
    IQ4_XS
    [5376, 2048]
  • blk.59.attn_k_norm.weight
    F32
    [512]
  • blk.59.attn_norm.weight
    F32
    [5376]
  • blk.59.attn_output.weight
    IQ4_XS
    [16384, 5376]
  • blk.59.attn_q.weight
    IQ4_XS
    [5376, 16384]
  • blk.59.attn_q_norm.weight
    F32
    [512]
  • blk.59.ffn_down.weight
    IQ4_XS
    [21504, 5376]
  • blk.59.ffn_gate.weight
    IQ4_XS
    [5376, 21504]
  • blk.59.ffn_norm.weight
    F32
    [5376]
  • blk.59.ffn_up.weight
    IQ4_XS
    [5376, 21504]
  • blk.59.layer_output_scale.weight
    F32
    [1]
  • blk.59.post_attention_norm.weight
    F32
    [5376]
  • blk.59.post_ffw_norm.weight
    F32
    [5376]
  • mm.input_projection.weight
    F16
    [1152, 5376]
  • rope_freqs.weight
    F32
    [256]
  • v.blk.0
  • v.blk.0.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.0.attn_k_norm.weight
    F32
    [72]
  • v.blk.0.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.0.attn_post_norm.weight
    F32
    [1152]
  • v.blk.0.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.0.attn_q_norm.weight
    F32
    [72]
  • v.blk.0.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.0.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.0.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.0.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.0.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.0.ln1.weight
    F32
    [1152]
  • v.blk.0.ln2.weight
    F32
    [1152]
  • v.blk.1
  • v.blk.1.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.1.attn_k_norm.weight
    F32
    [72]
  • v.blk.1.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.1.attn_post_norm.weight
    F32
    [1152]
  • v.blk.1.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.1.attn_q_norm.weight
    F32
    [72]
  • v.blk.1.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.1.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.1.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.1.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.1.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.1.ln1.weight
    F32
    [1152]
  • v.blk.1.ln2.weight
    F32
    [1152]
  • v.blk.2
  • v.blk.2.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.2.attn_k_norm.weight
    F32
    [72]
  • v.blk.2.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.2.attn_post_norm.weight
    F32
    [1152]
  • v.blk.2.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.2.attn_q_norm.weight
    F32
    [72]
  • v.blk.2.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.2.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.2.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.2.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.2.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.2.ln1.weight
    F32
    [1152]
  • v.blk.2.ln2.weight
    F32
    [1152]
  • v.blk.3
  • v.blk.3.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.3.attn_k_norm.weight
    F32
    [72]
  • v.blk.3.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.3.attn_post_norm.weight
    F32
    [1152]
  • v.blk.3.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.3.attn_q_norm.weight
    F32
    [72]
  • v.blk.3.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.3.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.3.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.3.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.3.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.3.ln1.weight
    F32
    [1152]
  • v.blk.3.ln2.weight
    F32
    [1152]
  • v.blk.4
  • v.blk.4.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.4.attn_k_norm.weight
    F32
    [72]
  • v.blk.4.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.4.attn_post_norm.weight
    F32
    [1152]
  • v.blk.4.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.4.attn_q_norm.weight
    F32
    [72]
  • v.blk.4.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.4.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.4.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.4.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.4.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.4.ln1.weight
    F32
    [1152]
  • v.blk.4.ln2.weight
    F32
    [1152]
  • v.blk.5
  • v.blk.5.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.5.attn_k_norm.weight
    F32
    [72]
  • v.blk.5.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.5.attn_post_norm.weight
    F32
    [1152]
  • v.blk.5.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.5.attn_q_norm.weight
    F32
    [72]
  • v.blk.5.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.5.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.5.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.5.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.5.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.5.ln1.weight
    F32
    [1152]
  • v.blk.5.ln2.weight
    F32
    [1152]
  • v.blk.6
  • v.blk.6.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.6.attn_k_norm.weight
    F32
    [72]
  • v.blk.6.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.6.attn_post_norm.weight
    F32
    [1152]
  • v.blk.6.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.6.attn_q_norm.weight
    F32
    [72]
  • v.blk.6.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.6.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.6.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.6.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.6.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.6.ln1.weight
    F32
    [1152]
  • v.blk.6.ln2.weight
    F32
    [1152]
  • v.blk.7
  • v.blk.7.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.7.attn_k_norm.weight
    F32
    [72]
  • v.blk.7.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.7.attn_post_norm.weight
    F32
    [1152]
  • v.blk.7.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.7.attn_q_norm.weight
    F32
    [72]
  • v.blk.7.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.7.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.7.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.7.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.7.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.7.ln1.weight
    F32
    [1152]
  • v.blk.7.ln2.weight
    F32
    [1152]
  • v.blk.8
  • v.blk.8.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.8.attn_k_norm.weight
    F32
    [72]
  • v.blk.8.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.8.attn_post_norm.weight
    F32
    [1152]
  • v.blk.8.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.8.attn_q_norm.weight
    F32
    [72]
  • v.blk.8.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.8.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.8.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.8.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.8.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.8.ln1.weight
    F32
    [1152]
  • v.blk.8.ln2.weight
    F32
    [1152]
  • v.blk.9
  • v.blk.9.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.9.attn_k_norm.weight
    F32
    [72]
  • v.blk.9.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.9.attn_post_norm.weight
    F32
    [1152]
  • v.blk.9.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.9.attn_q_norm.weight
    F32
    [72]
  • v.blk.9.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.9.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.9.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.9.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.9.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.9.ln1.weight
    F32
    [1152]
  • v.blk.9.ln2.weight
    F32
    [1152]
  • v.blk.10
  • v.blk.10.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.10.attn_k_norm.weight
    F32
    [72]
  • v.blk.10.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.10.attn_post_norm.weight
    F32
    [1152]
  • v.blk.10.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.10.attn_q_norm.weight
    F32
    [72]
  • v.blk.10.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.10.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.10.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.10.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.10.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.10.ln1.weight
    F32
    [1152]
  • v.blk.10.ln2.weight
    F32
    [1152]
  • v.blk.11
  • v.blk.11.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.11.attn_k_norm.weight
    F32
    [72]
  • v.blk.11.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.11.attn_post_norm.weight
    F32
    [1152]
  • v.blk.11.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.11.attn_q_norm.weight
    F32
    [72]
  • v.blk.11.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.11.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.11.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.11.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.11.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.11.ln1.weight
    F32
    [1152]
  • v.blk.11.ln2.weight
    F32
    [1152]
  • v.blk.12
  • v.blk.12.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.12.attn_k_norm.weight
    F32
    [72]
  • v.blk.12.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.12.attn_post_norm.weight
    F32
    [1152]
  • v.blk.12.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.12.attn_q_norm.weight
    F32
    [72]
  • v.blk.12.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.12.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.12.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.12.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.12.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.12.ln1.weight
    F32
    [1152]
  • v.blk.12.ln2.weight
    F32
    [1152]
  • v.blk.13
  • v.blk.13.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.13.attn_k_norm.weight
    F32
    [72]
  • v.blk.13.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.13.attn_post_norm.weight
    F32
    [1152]
  • v.blk.13.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.13.attn_q_norm.weight
    F32
    [72]
  • v.blk.13.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.13.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.13.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.13.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.13.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.13.ln1.weight
    F32
    [1152]
  • v.blk.13.ln2.weight
    F32
    [1152]
  • v.blk.14
  • v.blk.14.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.14.attn_k_norm.weight
    F32
    [72]
  • v.blk.14.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.14.attn_post_norm.weight
    F32
    [1152]
  • v.blk.14.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.14.attn_q_norm.weight
    F32
    [72]
  • v.blk.14.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.14.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.14.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.14.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.14.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.14.ln1.weight
    F32
    [1152]
  • v.blk.14.ln2.weight
    F32
    [1152]
  • v.blk.15
  • v.blk.15.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.15.attn_k_norm.weight
    F32
    [72]
  • v.blk.15.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.15.attn_post_norm.weight
    F32
    [1152]
  • v.blk.15.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.15.attn_q_norm.weight
    F32
    [72]
  • v.blk.15.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.15.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.15.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.15.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.15.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.15.ln1.weight
    F32
    [1152]
  • v.blk.15.ln2.weight
    F32
    [1152]
  • v.blk.16
  • v.blk.16.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.16.attn_k_norm.weight
    F32
    [72]
  • v.blk.16.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.16.attn_post_norm.weight
    F32
    [1152]
  • v.blk.16.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.16.attn_q_norm.weight
    F32
    [72]
  • v.blk.16.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.16.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.16.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.16.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.16.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.16.ln1.weight
    F32
    [1152]
  • v.blk.16.ln2.weight
    F32
    [1152]
  • v.blk.17
  • v.blk.17.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.17.attn_k_norm.weight
    F32
    [72]
  • v.blk.17.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.17.attn_post_norm.weight
    F32
    [1152]
  • v.blk.17.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.17.attn_q_norm.weight
    F32
    [72]
  • v.blk.17.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.17.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.17.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.17.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.17.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.17.ln1.weight
    F32
    [1152]
  • v.blk.17.ln2.weight
    F32
    [1152]
  • v.blk.18
  • v.blk.18.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.18.attn_k_norm.weight
    F32
    [72]
  • v.blk.18.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.18.attn_post_norm.weight
    F32
    [1152]
  • v.blk.18.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.18.attn_q_norm.weight
    F32
    [72]
  • v.blk.18.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.18.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.18.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.18.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.18.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.18.ln1.weight
    F32
    [1152]
  • v.blk.18.ln2.weight
    F32
    [1152]
  • v.blk.19
  • v.blk.19.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.19.attn_k_norm.weight
    F32
    [72]
  • v.blk.19.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.19.attn_post_norm.weight
    F32
    [1152]
  • v.blk.19.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.19.attn_q_norm.weight
    F32
    [72]
  • v.blk.19.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.19.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.19.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.19.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.19.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.19.ln1.weight
    F32
    [1152]
  • v.blk.19.ln2.weight
    F32
    [1152]
  • v.blk.20
  • v.blk.20.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.20.attn_k_norm.weight
    F32
    [72]
  • v.blk.20.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.20.attn_post_norm.weight
    F32
    [1152]
  • v.blk.20.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.20.attn_q_norm.weight
    F32
    [72]
  • v.blk.20.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.20.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.20.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.20.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.20.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.20.ln1.weight
    F32
    [1152]
  • v.blk.20.ln2.weight
    F32
    [1152]
  • v.blk.21
  • v.blk.21.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.21.attn_k_norm.weight
    F32
    [72]
  • v.blk.21.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.21.attn_post_norm.weight
    F32
    [1152]
  • v.blk.21.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.21.attn_q_norm.weight
    F32
    [72]
  • v.blk.21.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.21.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.21.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.21.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.21.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.21.ln1.weight
    F32
    [1152]
  • v.blk.21.ln2.weight
    F32
    [1152]
  • v.blk.22
  • v.blk.22.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.22.attn_k_norm.weight
    F32
    [72]
  • v.blk.22.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.22.attn_post_norm.weight
    F32
    [1152]
  • v.blk.22.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.22.attn_q_norm.weight
    F32
    [72]
  • v.blk.22.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.22.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.22.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.22.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.22.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.22.ln1.weight
    F32
    [1152]
  • v.blk.22.ln2.weight
    F32
    [1152]
  • v.blk.23
  • v.blk.23.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.23.attn_k_norm.weight
    F32
    [72]
  • v.blk.23.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.23.attn_post_norm.weight
    F32
    [1152]
  • v.blk.23.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.23.attn_q_norm.weight
    F32
    [72]
  • v.blk.23.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.23.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.23.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.23.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.23.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.23.ln1.weight
    F32
    [1152]
  • v.blk.23.ln2.weight
    F32
    [1152]
  • v.blk.24
  • v.blk.24.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.24.attn_k_norm.weight
    F32
    [72]
  • v.blk.24.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.24.attn_post_norm.weight
    F32
    [1152]
  • v.blk.24.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.24.attn_q_norm.weight
    F32
    [72]
  • v.blk.24.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.24.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.24.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.24.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.24.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.24.ln1.weight
    F32
    [1152]
  • v.blk.24.ln2.weight
    F32
    [1152]
  • v.blk.25
  • v.blk.25.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.25.attn_k_norm.weight
    F32
    [72]
  • v.blk.25.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.25.attn_post_norm.weight
    F32
    [1152]
  • v.blk.25.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.25.attn_q_norm.weight
    F32
    [72]
  • v.blk.25.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.25.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.25.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.25.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.25.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.25.ln1.weight
    F32
    [1152]
  • v.blk.25.ln2.weight
    F32
    [1152]
  • v.blk.26
  • v.blk.26.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.26.attn_k_norm.weight
    F32
    [72]
  • v.blk.26.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.26.attn_post_norm.weight
    F32
    [1152]
  • v.blk.26.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.26.attn_q_norm.weight
    F32
    [72]
  • v.blk.26.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.26.ffn_down.weight
    F16
    [4304, 1152]
  • v.blk.26.ffn_gate.weight
    F16
    [1152, 4304]
  • v.blk.26.ffn_post_norm.weight
    F32
    [1152]
  • v.blk.26.ffn_up.weight
    F16
    [1152, 4304]
  • v.blk.26.ln1.weight
    F32
    [1152]
  • v.blk.26.ln2.weight
    F32
    [1152]
  • v.patch_embd.weight
    F16
    [16, 16, 3, 1152]
  • v.position_embd.weight
    F32
    [1152, 10240, 2]
  • v.std_bias
    F32
    [1152]
  • v.std_scale
    F32
    [1152]
  • output_norm.weight
    F32
    [5376]