kavai/ qwen3.5-GPT5:35b-a3b

947 3 days ago

Qwen 3.5 is a family of open-source models that delivers exceptional utility and performance for tool calling and Agentic abilities. Smaller Models may suffer from slower speeds.

vision tools thinking 0.8b 2b 4b 9b 27b 35b 122b
900dde62fb7e · 24GB
    Metadata
  • general.architecture
    qwen35moe
  • general.file_type
    Q4_K_M
  • qwen35moe.attention.head_count
    16
  • qwen35moe.attention.head_count_kv
    [0, 0, 0, 2, 0, ...]
  • qwen35moe.attention.key_length
    256
  • qwen35moe.attention.layer_norm_rms_epsilon
    1e-06
  • qwen35moe.attention.value_length
    256
  • qwen35moe.block_count
    40
  • qwen35moe.context_length
    262144
  • qwen35moe.embedding_length
    2048
  • qwen35moe.expert_count
    256
  • qwen35moe.expert_feed_forward_length
    512
  • qwen35moe.expert_shared_feed_forward_length
    512
  • qwen35moe.expert_used_count
    8
  • qwen35moe.feed_forward_length
    0
  • qwen35moe.full_attention_interval
    4
  • qwen35moe.image_token_id
    248056
  • qwen35moe.mrope_sections
    [11, 11, 10]
  • qwen35moe.rope.dimension_count
    64
  • qwen35moe.rope.dimension_sections
    [11, 11, 10]
  • qwen35moe.rope.freq_base
    1e+07
  • qwen35moe.rope.mrope_interleaved
    true
  • qwen35moe.rope.mrope_section
    [11, 11, 10]
  • qwen35moe.ssm.conv_kernel
    4
  • qwen35moe.ssm.group_count
    16
  • qwen35moe.ssm.inner_size
    4096
  • qwen35moe.ssm.state_size
    128
  • qwen35moe.ssm.time_step_rank
    32
  • qwen35moe.ssm.v_head_reordered
    true
  • qwen35moe.vision.attention.head_count
    16
  • qwen35moe.vision.block_count
    27
  • qwen35moe.vision.deepstack_visual_indexes
    []
  • qwen35moe.vision.embedding_length
    1152
  • qwen35moe.vision.image_mean
    [0.5, 0.5, 0.5]
  • qwen35moe.vision.image_std
    [0.5, 0.5, 0.5]
  • qwen35moe.vision.longest_edge
    16777216
  • qwen35moe.vision.num_channels
    3
  • qwen35moe.vision.patch_size
    16
  • qwen35moe.vision.shortest_edge
    65536
  • qwen35moe.vision.spatial_merge_size
    2
  • qwen35moe.vision.temporal_patch_size
    2
  • qwen35moe.vision_end_token_id
    248054
  • qwen35moe.vision_start_token_id
    248053
  • tokenizer.ggml.add_eos_token
    false
  • tokenizer.ggml.add_padding_token
    false
  • tokenizer.ggml.eos_token_id
    248046
  • tokenizer.ggml.eos_token_ids
    [248046, 248044]
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    248044
  • tokenizer.ggml.pre
    qwen35
  • tokenizer.ggml.scores
    [0, 1, 2, 3, 4, ...]
  • tokenizer.ggml.token_type
    [1, 1, 1, 1, 1, ...]
  • tokenizer.ggml.tokens
    [!, ", #, $, %, ...]
  • Tensor
  • token_embd.weight
    Q4_K
    [2048, 248320]
  • blk.0
  • blk.0.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.0.attn_norm.weight
    F32
    [2048]
  • blk.0.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.0.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.0.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.0.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.0.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.0.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.0.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.0.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.0.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.0.post_attention_norm.weight
    F32
    [2048]
  • blk.0.ssm_a
    F32
    [32]
  • blk.0.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.0.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.0.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.0.ssm_dt
    F32
    [32]
  • blk.0.ssm_norm.weight
    F32
    [128]
  • blk.0.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.1
  • blk.1.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.1.attn_norm.weight
    F32
    [2048]
  • blk.1.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.1.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.1.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.1.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.1.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.1.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.1.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.1.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.1.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.1.post_attention_norm.weight
    F32
    [2048]
  • blk.1.ssm_a
    F32
    [32]
  • blk.1.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.1.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.1.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.1.ssm_dt
    F32
    [32]
  • blk.1.ssm_norm.weight
    F32
    [128]
  • blk.1.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.2
  • blk.2.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.2.attn_norm.weight
    F32
    [2048]
  • blk.2.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.2.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.2.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.2.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.2.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.2.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.2.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.2.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.2.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.2.post_attention_norm.weight
    F32
    [2048]
  • blk.2.ssm_a
    F32
    [32]
  • blk.2.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.2.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.2.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.2.ssm_dt
    F32
    [32]
  • blk.2.ssm_norm.weight
    F32
    [128]
  • blk.2.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.3
  • blk.3.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.3.attn_k_norm.weight
    F32
    [256]
  • blk.3.attn_norm.weight
    F32
    [2048]
  • blk.3.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.3.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.3.attn_q_norm.weight
    F32
    [256]
  • blk.3.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.3.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.3.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.3.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.3.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.3.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.3.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.3.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.3.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.3.post_attention_norm.weight
    F32
    [2048]
  • blk.4
  • blk.4.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.4.attn_norm.weight
    F32
    [2048]
  • blk.4.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.4.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.4.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.4.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.4.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.4.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.4.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.4.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.4.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.4.post_attention_norm.weight
    F32
    [2048]
  • blk.4.ssm_a
    F32
    [32]
  • blk.4.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.4.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.4.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.4.ssm_dt
    F32
    [32]
  • blk.4.ssm_norm.weight
    F32
    [128]
  • blk.4.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.5
  • blk.5.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.5.attn_norm.weight
    F32
    [2048]
  • blk.5.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.5.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.5.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.5.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.5.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.5.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.5.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.5.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.5.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.5.post_attention_norm.weight
    F32
    [2048]
  • blk.5.ssm_a
    F32
    [32]
  • blk.5.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.5.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.5.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.5.ssm_dt
    F32
    [32]
  • blk.5.ssm_norm.weight
    F32
    [128]
  • blk.5.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.6
  • blk.6.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.6.attn_norm.weight
    F32
    [2048]
  • blk.6.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.6.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.6.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.6.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.6.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.6.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.6.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.6.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.6.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.6.post_attention_norm.weight
    F32
    [2048]
  • blk.6.ssm_a
    F32
    [32]
  • blk.6.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.6.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.6.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.6.ssm_dt
    F32
    [32]
  • blk.6.ssm_norm.weight
    F32
    [128]
  • blk.6.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.7
  • blk.7.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.7.attn_k_norm.weight
    F32
    [256]
  • blk.7.attn_norm.weight
    F32
    [2048]
  • blk.7.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.7.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.7.attn_q_norm.weight
    F32
    [256]
  • blk.7.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.7.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.7.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.7.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.7.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.7.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.7.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.7.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.7.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.7.post_attention_norm.weight
    F32
    [2048]
  • blk.8
  • blk.8.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.8.attn_norm.weight
    F32
    [2048]
  • blk.8.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.8.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.8.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.8.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.8.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.8.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.8.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.8.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.8.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.8.post_attention_norm.weight
    F32
    [2048]
  • blk.8.ssm_a
    F32
    [32]
  • blk.8.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.8.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.8.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.8.ssm_dt
    F32
    [32]
  • blk.8.ssm_norm.weight
    F32
    [128]
  • blk.8.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.9
  • blk.9.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.9.attn_norm.weight
    F32
    [2048]
  • blk.9.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.9.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.9.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.9.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.9.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.9.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.9.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.9.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.9.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.9.post_attention_norm.weight
    F32
    [2048]
  • blk.9.ssm_a
    F32
    [32]
  • blk.9.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.9.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.9.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.9.ssm_dt
    F32
    [32]
  • blk.9.ssm_norm.weight
    F32
    [128]
  • blk.9.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.10
  • blk.10.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.10.attn_norm.weight
    F32
    [2048]
  • blk.10.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.10.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.10.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.10.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.10.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.10.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.10.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.10.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.10.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.10.post_attention_norm.weight
    F32
    [2048]
  • blk.10.ssm_a
    F32
    [32]
  • blk.10.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.10.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.10.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.10.ssm_dt
    F32
    [32]
  • blk.10.ssm_norm.weight
    F32
    [128]
  • blk.10.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.11
  • blk.11.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.11.attn_k_norm.weight
    F32
    [256]
  • blk.11.attn_norm.weight
    F32
    [2048]
  • blk.11.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.11.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.11.attn_q_norm.weight
    F32
    [256]
  • blk.11.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.11.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.11.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.11.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.11.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.11.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.11.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.11.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.11.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.11.post_attention_norm.weight
    F32
    [2048]
  • blk.12
  • blk.12.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.12.attn_norm.weight
    F32
    [2048]
  • blk.12.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.12.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.12.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.12.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.12.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.12.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.12.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.12.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.12.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.12.post_attention_norm.weight
    F32
    [2048]
  • blk.12.ssm_a
    F32
    [32]
  • blk.12.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.12.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.12.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.12.ssm_dt
    F32
    [32]
  • blk.12.ssm_norm.weight
    F32
    [128]
  • blk.12.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.13
  • blk.13.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.13.attn_norm.weight
    F32
    [2048]
  • blk.13.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.13.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.13.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.13.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.13.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.13.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.13.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.13.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.13.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.13.post_attention_norm.weight
    F32
    [2048]
  • blk.13.ssm_a
    F32
    [32]
  • blk.13.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.13.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.13.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.13.ssm_dt
    F32
    [32]
  • blk.13.ssm_norm.weight
    F32
    [128]
  • blk.13.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.14
  • blk.14.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.14.attn_norm.weight
    F32
    [2048]
  • blk.14.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.14.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.14.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.14.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.14.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.14.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.14.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.14.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.14.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.14.post_attention_norm.weight
    F32
    [2048]
  • blk.14.ssm_a
    F32
    [32]
  • blk.14.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.14.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.14.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.14.ssm_dt
    F32
    [32]
  • blk.14.ssm_norm.weight
    F32
    [128]
  • blk.14.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.15
  • blk.15.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.15.attn_k_norm.weight
    F32
    [256]
  • blk.15.attn_norm.weight
    F32
    [2048]
  • blk.15.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.15.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.15.attn_q_norm.weight
    F32
    [256]
  • blk.15.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.15.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.15.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.15.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.15.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.15.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.15.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.15.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.15.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.15.post_attention_norm.weight
    F32
    [2048]
  • blk.16
  • blk.16.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.16.attn_norm.weight
    F32
    [2048]
  • blk.16.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.16.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.16.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.16.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.16.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.16.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.16.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.16.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.16.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.16.post_attention_norm.weight
    F32
    [2048]
  • blk.16.ssm_a
    F32
    [32]
  • blk.16.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.16.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.16.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.16.ssm_dt
    F32
    [32]
  • blk.16.ssm_norm.weight
    F32
    [128]
  • blk.16.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.17
  • blk.17.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.17.attn_norm.weight
    F32
    [2048]
  • blk.17.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.17.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.17.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.17.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.17.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.17.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.17.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.17.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.17.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.17.post_attention_norm.weight
    F32
    [2048]
  • blk.17.ssm_a
    F32
    [32]
  • blk.17.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.17.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.17.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.17.ssm_dt
    F32
    [32]
  • blk.17.ssm_norm.weight
    F32
    [128]
  • blk.17.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.18
  • blk.18.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.18.attn_norm.weight
    F32
    [2048]
  • blk.18.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.18.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.18.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.18.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.18.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.18.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.18.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.18.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.18.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.18.post_attention_norm.weight
    F32
    [2048]
  • blk.18.ssm_a
    F32
    [32]
  • blk.18.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.18.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.18.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.18.ssm_dt
    F32
    [32]
  • blk.18.ssm_norm.weight
    F32
    [128]
  • blk.18.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.19
  • blk.19.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.19.attn_k_norm.weight
    F32
    [256]
  • blk.19.attn_norm.weight
    F32
    [2048]
  • blk.19.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.19.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.19.attn_q_norm.weight
    F32
    [256]
  • blk.19.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.19.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.19.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.19.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.19.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.19.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.19.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.19.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.19.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.19.post_attention_norm.weight
    F32
    [2048]
  • blk.20
  • blk.20.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.20.attn_norm.weight
    F32
    [2048]
  • blk.20.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.20.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.20.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.20.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.20.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.20.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.20.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.20.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.20.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.20.post_attention_norm.weight
    F32
    [2048]
  • blk.20.ssm_a
    F32
    [32]
  • blk.20.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.20.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.20.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.20.ssm_dt
    F32
    [32]
  • blk.20.ssm_norm.weight
    F32
    [128]
  • blk.20.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.21
  • blk.21.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.21.attn_norm.weight
    F32
    [2048]
  • blk.21.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.21.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.21.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.21.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.21.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.21.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.21.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.21.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.21.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.21.post_attention_norm.weight
    F32
    [2048]
  • blk.21.ssm_a
    F32
    [32]
  • blk.21.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.21.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.21.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.21.ssm_dt
    F32
    [32]
  • blk.21.ssm_norm.weight
    F32
    [128]
  • blk.21.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.22
  • blk.22.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.22.attn_norm.weight
    F32
    [2048]
  • blk.22.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.22.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.22.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.22.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.22.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.22.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.22.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.22.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.22.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.22.post_attention_norm.weight
    F32
    [2048]
  • blk.22.ssm_a
    F32
    [32]
  • blk.22.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.22.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.22.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.22.ssm_dt
    F32
    [32]
  • blk.22.ssm_norm.weight
    F32
    [128]
  • blk.22.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.23
  • blk.23.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.23.attn_k_norm.weight
    F32
    [256]
  • blk.23.attn_norm.weight
    F32
    [2048]
  • blk.23.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.23.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.23.attn_q_norm.weight
    F32
    [256]
  • blk.23.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.23.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.23.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.23.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.23.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.23.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.23.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.23.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.23.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.23.post_attention_norm.weight
    F32
    [2048]
  • blk.24
  • blk.24.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.24.attn_norm.weight
    F32
    [2048]
  • blk.24.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.24.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.24.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.24.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.24.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.24.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.24.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.24.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.24.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.24.post_attention_norm.weight
    F32
    [2048]
  • blk.24.ssm_a
    F32
    [32]
  • blk.24.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.24.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.24.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.24.ssm_dt
    F32
    [32]
  • blk.24.ssm_norm.weight
    F32
    [128]
  • blk.24.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.25
  • blk.25.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.25.attn_norm.weight
    F32
    [2048]
  • blk.25.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.25.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.25.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.25.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.25.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.25.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.25.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.25.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.25.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.25.post_attention_norm.weight
    F32
    [2048]
  • blk.25.ssm_a
    F32
    [32]
  • blk.25.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.25.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.25.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.25.ssm_dt
    F32
    [32]
  • blk.25.ssm_norm.weight
    F32
    [128]
  • blk.25.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.26
  • blk.26.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.26.attn_norm.weight
    F32
    [2048]
  • blk.26.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.26.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.26.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.26.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.26.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.26.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.26.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.26.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.26.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.26.post_attention_norm.weight
    F32
    [2048]
  • blk.26.ssm_a
    F32
    [32]
  • blk.26.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.26.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.26.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.26.ssm_dt
    F32
    [32]
  • blk.26.ssm_norm.weight
    F32
    [128]
  • blk.26.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.27
  • blk.27.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.27.attn_k_norm.weight
    F32
    [256]
  • blk.27.attn_norm.weight
    F32
    [2048]
  • blk.27.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.27.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.27.attn_q_norm.weight
    F32
    [256]
  • blk.27.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.27.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.27.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.27.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.27.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.27.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.27.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.27.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.27.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.27.post_attention_norm.weight
    F32
    [2048]
  • blk.28
  • blk.28.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.28.attn_norm.weight
    F32
    [2048]
  • blk.28.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.28.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.28.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.28.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.28.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.28.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.28.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.28.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.28.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.28.post_attention_norm.weight
    F32
    [2048]
  • blk.28.ssm_a
    F32
    [32]
  • blk.28.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.28.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.28.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.28.ssm_dt
    F32
    [32]
  • blk.28.ssm_norm.weight
    F32
    [128]
  • blk.28.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.29
  • blk.29.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.29.attn_norm.weight
    F32
    [2048]
  • blk.29.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.29.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.29.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.29.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.29.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.29.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.29.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.29.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.29.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.29.post_attention_norm.weight
    F32
    [2048]
  • blk.29.ssm_a
    F32
    [32]
  • blk.29.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.29.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.29.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.29.ssm_dt
    F32
    [32]
  • blk.29.ssm_norm.weight
    F32
    [128]
  • blk.29.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.30
  • blk.30.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.30.attn_norm.weight
    F32
    [2048]
  • blk.30.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.30.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.30.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.30.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.30.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.30.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.30.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.30.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.30.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.30.post_attention_norm.weight
    F32
    [2048]
  • blk.30.ssm_a
    F32
    [32]
  • blk.30.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.30.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.30.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.30.ssm_dt
    F32
    [32]
  • blk.30.ssm_norm.weight
    F32
    [128]
  • blk.30.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.31
  • blk.31.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.31.attn_k_norm.weight
    F32
    [256]
  • blk.31.attn_norm.weight
    F32
    [2048]
  • blk.31.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.31.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.31.attn_q_norm.weight
    F32
    [256]
  • blk.31.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.31.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.31.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.31.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.31.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.31.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.31.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.31.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.31.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.31.post_attention_norm.weight
    F32
    [2048]
  • blk.32
  • blk.32.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.32.attn_norm.weight
    F32
    [2048]
  • blk.32.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.32.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.32.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.32.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.32.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.32.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.32.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.32.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.32.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.32.post_attention_norm.weight
    F32
    [2048]
  • blk.32.ssm_a
    F32
    [32]
  • blk.32.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.32.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.32.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.32.ssm_dt
    F32
    [32]
  • blk.32.ssm_norm.weight
    F32
    [128]
  • blk.32.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.33
  • blk.33.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.33.attn_norm.weight
    F32
    [2048]
  • blk.33.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.33.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.33.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.33.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.33.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.33.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.33.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.33.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.33.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.33.post_attention_norm.weight
    F32
    [2048]
  • blk.33.ssm_a
    F32
    [32]
  • blk.33.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.33.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.33.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.33.ssm_dt
    F32
    [32]
  • blk.33.ssm_norm.weight
    F32
    [128]
  • blk.33.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.34
  • blk.34.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.34.attn_norm.weight
    F32
    [2048]
  • blk.34.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.34.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.34.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.34.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.34.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.34.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.34.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.34.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.34.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.34.post_attention_norm.weight
    F32
    [2048]
  • blk.34.ssm_a
    F32
    [32]
  • blk.34.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.34.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.34.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.34.ssm_dt
    F32
    [32]
  • blk.34.ssm_norm.weight
    F32
    [128]
  • blk.34.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.35
  • blk.35.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.35.attn_k_norm.weight
    F32
    [256]
  • blk.35.attn_norm.weight
    F32
    [2048]
  • blk.35.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.35.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.35.attn_q_norm.weight
    F32
    [256]
  • blk.35.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.35.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.35.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.35.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.35.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.35.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.35.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.35.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.35.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.35.post_attention_norm.weight
    F32
    [2048]
  • blk.36
  • blk.36.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.36.attn_norm.weight
    F32
    [2048]
  • blk.36.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.36.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.36.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.36.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.36.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.36.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.36.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.36.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.36.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.36.post_attention_norm.weight
    F32
    [2048]
  • blk.36.ssm_a
    F32
    [32]
  • blk.36.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.36.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.36.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.36.ssm_dt
    F32
    [32]
  • blk.36.ssm_norm.weight
    F32
    [128]
  • blk.36.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.37
  • blk.37.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.37.attn_norm.weight
    F32
    [2048]
  • blk.37.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.37.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.37.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.37.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.37.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.37.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.37.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.37.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.37.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.37.post_attention_norm.weight
    F32
    [2048]
  • blk.37.ssm_a
    F32
    [32]
  • blk.37.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.37.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.37.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.37.ssm_dt
    F32
    [32]
  • blk.37.ssm_norm.weight
    F32
    [128]
  • blk.37.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.38
  • blk.38.attn_gate.weight
    Q4_K
    [2048, 4096]
  • blk.38.attn_norm.weight
    F32
    [2048]
  • blk.38.attn_qkv.weight
    Q4_K
    [2048, 8192]
  • blk.38.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.38.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.38.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.38.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.38.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.38.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.38.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.38.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.38.post_attention_norm.weight
    F32
    [2048]
  • blk.38.ssm_a
    F32
    [32]
  • blk.38.ssm_alpha.weight
    Q4_K
    [2048, 32]
  • blk.38.ssm_beta.weight
    Q4_K
    [2048, 32]
  • blk.38.ssm_conv1d.weight
    F32
    [4, 8192]
  • blk.38.ssm_dt
    F32
    [32]
  • blk.38.ssm_norm.weight
    F32
    [128]
  • blk.38.ssm_out.weight
    Q4_K
    [4096, 2048]
  • blk.39
  • blk.39.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.39.attn_k_norm.weight
    F32
    [256]
  • blk.39.attn_norm.weight
    F32
    [2048]
  • blk.39.attn_output.weight
    Q4_K
    [4096, 2048]
  • blk.39.attn_q.weight
    Q4_K
    [2048, 8192]
  • blk.39.attn_q_norm.weight
    F32
    [256]
  • blk.39.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.39.ffn_down_exps.weight
    Q6_K
    [512, 2048, 256]
  • blk.39.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • blk.39.ffn_gate_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.39.ffn_gate_inp.weight
    F32
    [2048, 256]
  • blk.39.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • blk.39.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • blk.39.ffn_up_exps.weight
    Q4_K
    [2048, 512, 256]
  • blk.39.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • blk.39.post_attention_norm.weight
    F32
    [2048]
  • mtp.fc.weight
    Q4_K
    [4096, 2048]
  • mtp.layers.0.attn_k.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.attn_k_norm.weight
    F32
    [256]
  • mtp.layers.0.attn_norm.weight
    F32
    [2048]
  • mtp.layers.0.attn_output.weight
    Q4_K
    [4096, 2048]
  • mtp.layers.0.attn_q.weight
    Q4_K
    [2048, 8192]
  • mtp.layers.0.attn_q_norm.weight
    F32
    [256]
  • mtp.layers.0.attn_v.weight
    Q6_K
    [2048, 512]
  • mtp.layers.0.ffn_down_shexp.weight
    Q6_K
    [512, 2048]
  • mtp.layers.0.ffn_gate_inp.weight
    F32
    [2048, 256]
  • mtp.layers.0.ffn_gate_inp_shexp.weight
    F16
    [2048, 1]
  • mtp.layers.0.ffn_gate_shexp.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.ffn_up_shexp.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.0.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.0.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.0.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.1.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.1.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.1.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.2.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.2.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.2.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.3.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.3.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.3.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.4.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.4.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.4.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.5.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.5.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.5.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.6.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.6.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.6.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.7.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.7.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.7.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.8.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.8.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.8.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.9.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.9.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.9.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.10.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.10.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.10.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.11.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.11.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.11.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.12.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.12.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.12.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.13.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.13.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.13.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.14.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.14.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.14.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.15.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.15.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.15.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.16.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.16.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.16.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.17.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.17.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.17.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.18.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.18.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.18.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.19.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.19.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.19.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.20.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.20.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.20.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.21.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.21.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.21.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.22.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.22.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.22.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.23.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.23.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.23.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.24.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.24.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.24.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.25.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.25.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.25.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.26.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.26.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.26.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.27.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.27.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.27.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.28.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.28.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.28.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.29.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.29.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.29.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.30.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.30.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.30.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.31.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.31.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.31.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.32.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.32.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.32.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.33.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.33.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.33.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.34.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.34.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.34.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.35.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.35.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.35.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.36.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.36.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.36.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.37.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.37.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.37.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.38.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.38.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.38.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.39.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.39.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.39.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.40.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.40.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.40.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.41.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.41.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.41.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.42.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.42.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.42.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.43.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.43.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.43.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.44.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.44.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.44.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.45.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.45.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.45.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.46.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.46.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.46.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.47.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.47.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.47.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.48.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.48.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.48.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.49.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.49.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.49.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.50.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.50.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.50.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.51.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.51.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.51.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.52.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.52.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.52.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.53.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.53.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.53.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.54.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.54.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.54.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.55.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.55.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.55.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.56.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.56.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.56.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.57.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.57.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.57.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.58.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.58.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.58.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.59.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.59.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.59.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.60.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.60.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.60.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.61.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.61.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.61.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.62.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.62.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.62.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.63.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.63.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.63.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.64.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.64.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.64.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.65.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.65.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.65.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.66.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.66.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.66.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.67.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.67.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.67.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.68.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.68.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.68.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.69.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.69.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.69.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.70.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.70.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.70.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.71.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.71.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.71.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.72.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.72.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.72.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.73.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.73.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.73.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.74.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.74.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.74.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.75.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.75.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.75.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.76.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.76.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.76.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.77.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.77.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.77.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.78.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.78.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.78.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.79.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.79.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.79.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.80.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.80.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.80.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.81.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.81.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.81.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.82.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.82.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.82.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.83.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.83.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.83.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.84.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.84.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.84.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.85.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.85.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.85.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.86.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.86.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.86.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.87.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.87.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.87.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.88.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.88.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.88.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.89.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.89.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.89.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.90.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.90.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.90.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.91.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.91.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.91.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.92.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.92.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.92.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.93.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.93.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.93.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.94.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.94.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.94.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.95.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.95.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.95.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.96.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.96.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.96.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.97.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.97.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.97.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.98.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.98.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.98.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.99.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.99.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.99.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.100.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.100.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.100.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.101.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.101.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.101.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.102.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.102.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.102.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.103.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.103.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.103.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.104.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.104.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.104.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.105.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.105.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.105.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.106.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.106.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.106.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.107.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.107.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.107.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.108.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.108.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.108.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.109.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.109.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.109.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.110.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.110.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.110.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.111.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.111.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.111.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.112.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.112.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.112.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.113.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.113.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.113.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.114.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.114.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.114.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.115.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.115.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.115.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.116.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.116.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.116.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.117.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.117.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.117.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.118.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.118.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.118.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.119.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.119.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.119.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.120.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.120.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.120.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.121.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.121.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.121.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.122.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.122.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.122.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.123.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.123.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.123.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.124.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.124.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.124.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.125.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.125.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.125.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.126.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.126.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.126.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.127.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.127.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.127.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.128.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.128.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.128.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.129.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.129.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.129.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.130.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.130.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.130.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.131.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.131.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.131.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.132.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.132.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.132.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.133.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.133.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.133.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.134.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.134.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.134.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.135.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.135.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.135.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.136.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.136.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.136.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.137.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.137.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.137.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.138.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.138.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.138.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.139.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.139.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.139.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.140.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.140.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.140.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.141.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.141.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.141.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.142.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.142.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.142.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.143.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.143.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.143.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.144.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.144.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.144.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.145.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.145.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.145.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.146.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.146.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.146.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.147.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.147.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.147.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.148.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.148.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.148.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.149.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.149.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.149.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.150.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.150.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.150.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.151.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.151.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.151.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.152.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.152.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.152.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.153.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.153.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.153.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.154.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.154.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.154.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.155.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.155.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.155.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.156.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.156.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.156.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.157.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.157.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.157.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.158.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.158.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.158.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.159.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.159.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.159.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.160.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.160.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.160.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.161.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.161.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.161.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.162.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.162.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.162.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.163.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.163.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.163.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.164.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.164.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.164.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.165.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.165.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.165.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.166.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.166.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.166.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.167.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.167.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.167.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.168.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.168.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.168.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.169.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.169.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.169.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.170.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.170.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.170.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.171.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.171.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.171.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.172.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.172.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.172.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.173.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.173.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.173.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.174.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.174.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.174.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.175.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.175.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.175.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.176.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.176.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.176.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.177.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.177.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.177.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.178.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.178.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.178.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.179.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.179.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.179.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.180.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.180.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.180.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.181.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.181.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.181.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.182.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.182.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.182.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.183.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.183.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.183.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.184.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.184.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.184.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.185.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.185.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.185.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.186.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.186.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.186.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.187.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.187.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.187.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.188.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.188.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.188.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.189.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.189.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.189.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.190.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.190.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.190.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.191.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.191.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.191.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.192.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.192.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.192.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.193.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.193.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.193.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.194.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.194.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.194.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.195.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.195.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.195.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.196.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.196.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.196.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.197.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.197.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.197.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.198.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.198.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.198.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.199.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.199.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.199.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.200.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.200.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.200.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.201.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.201.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.201.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.202.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.202.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.202.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.203.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.203.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.203.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.204.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.204.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.204.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.205.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.205.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.205.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.206.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.206.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.206.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.207.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.207.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.207.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.208.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.208.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.208.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.209.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.209.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.209.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.210.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.210.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.210.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.211.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.211.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.211.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.212.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.212.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.212.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.213.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.213.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.213.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.214.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.214.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.214.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.215.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.215.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.215.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.216.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.216.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.216.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.217.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.217.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.217.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.218.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.218.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.218.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.219.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.219.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.219.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.220.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.220.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.220.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.221.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.221.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.221.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.222.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.222.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.222.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.223.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.223.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.223.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.224.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.224.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.224.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.225.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.225.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.225.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.226.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.226.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.226.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.227.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.227.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.227.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.228.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.228.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.228.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.229.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.229.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.229.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.230.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.230.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.230.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.231.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.231.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.231.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.232.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.232.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.232.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.233.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.233.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.233.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.234.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.234.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.234.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.235.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.235.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.235.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.236.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.236.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.236.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.237.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.237.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.237.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.238.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.238.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.238.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.239.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.239.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.239.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.240.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.240.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.240.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.241.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.241.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.241.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.242.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.242.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.242.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.243.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.243.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.243.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.244.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.244.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.244.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.245.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.245.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.245.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.246.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.246.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.246.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.247.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.247.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.247.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.248.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.248.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.248.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.249.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.249.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.249.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.250.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.250.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.250.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.251.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.251.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.251.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.252.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.252.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.252.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.253.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.253.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.253.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.254.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.254.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.254.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.255.down_proj.weight
    Q4_K
    [512, 2048]
  • mtp.layers.0.mlp.experts.255.gate_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.mlp.experts.255.up_proj.weight
    Q4_K
    [2048, 512]
  • mtp.layers.0.post_attention_norm.weight
    F32
    [2048]
  • mtp.norm.weight
    F32
    [2048]
  • mtp.pre_fc_norm_embedding.weight
    F32
    [2048]
  • mtp.pre_fc_norm_hidden.weight
    F32
    [2048]
  • output.weight
    Q6_K
    [2048, 248320]
  • v.blk.0
  • v.blk.0.attn_k.bias
    F32
    [1152]
  • v.blk.0.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.0.attn_out.bias
    F32
    [1152]
  • v.blk.0.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.0.attn_q.bias
    F32
    [1152]
  • v.blk.0.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.0.attn_v.bias
    F32
    [1152]
  • v.blk.0.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.0.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.0.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.0.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.0.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.0.norm1.bias
    F32
    [1152]
  • v.blk.0.norm1.weight
    F32
    [1152]
  • v.blk.0.norm2.bias
    F32
    [1152]
  • v.blk.0.norm2.weight
    F32
    [1152]
  • v.blk.1
  • v.blk.1.attn_k.bias
    F32
    [1152]
  • v.blk.1.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.1.attn_out.bias
    F32
    [1152]
  • v.blk.1.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.1.attn_q.bias
    F32
    [1152]
  • v.blk.1.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.1.attn_v.bias
    F32
    [1152]
  • v.blk.1.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.1.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.1.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.1.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.1.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.1.norm1.bias
    F32
    [1152]
  • v.blk.1.norm1.weight
    F32
    [1152]
  • v.blk.1.norm2.bias
    F32
    [1152]
  • v.blk.1.norm2.weight
    F32
    [1152]
  • v.blk.2
  • v.blk.2.attn_k.bias
    F32
    [1152]
  • v.blk.2.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.2.attn_out.bias
    F32
    [1152]
  • v.blk.2.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.2.attn_q.bias
    F32
    [1152]
  • v.blk.2.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.2.attn_v.bias
    F32
    [1152]
  • v.blk.2.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.2.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.2.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.2.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.2.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.2.norm1.bias
    F32
    [1152]
  • v.blk.2.norm1.weight
    F32
    [1152]
  • v.blk.2.norm2.bias
    F32
    [1152]
  • v.blk.2.norm2.weight
    F32
    [1152]
  • v.blk.3
  • v.blk.3.attn_k.bias
    F32
    [1152]
  • v.blk.3.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.3.attn_out.bias
    F32
    [1152]
  • v.blk.3.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.3.attn_q.bias
    F32
    [1152]
  • v.blk.3.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.3.attn_v.bias
    F32
    [1152]
  • v.blk.3.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.3.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.3.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.3.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.3.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.3.norm1.bias
    F32
    [1152]
  • v.blk.3.norm1.weight
    F32
    [1152]
  • v.blk.3.norm2.bias
    F32
    [1152]
  • v.blk.3.norm2.weight
    F32
    [1152]
  • v.blk.4
  • v.blk.4.attn_k.bias
    F32
    [1152]
  • v.blk.4.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.4.attn_out.bias
    F32
    [1152]
  • v.blk.4.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.4.attn_q.bias
    F32
    [1152]
  • v.blk.4.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.4.attn_v.bias
    F32
    [1152]
  • v.blk.4.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.4.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.4.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.4.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.4.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.4.norm1.bias
    F32
    [1152]
  • v.blk.4.norm1.weight
    F32
    [1152]
  • v.blk.4.norm2.bias
    F32
    [1152]
  • v.blk.4.norm2.weight
    F32
    [1152]
  • v.blk.5
  • v.blk.5.attn_k.bias
    F32
    [1152]
  • v.blk.5.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.5.attn_out.bias
    F32
    [1152]
  • v.blk.5.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.5.attn_q.bias
    F32
    [1152]
  • v.blk.5.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.5.attn_v.bias
    F32
    [1152]
  • v.blk.5.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.5.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.5.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.5.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.5.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.5.norm1.bias
    F32
    [1152]
  • v.blk.5.norm1.weight
    F32
    [1152]
  • v.blk.5.norm2.bias
    F32
    [1152]
  • v.blk.5.norm2.weight
    F32
    [1152]
  • v.blk.6
  • v.blk.6.attn_k.bias
    F32
    [1152]
  • v.blk.6.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.6.attn_out.bias
    F32
    [1152]
  • v.blk.6.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.6.attn_q.bias
    F32
    [1152]
  • v.blk.6.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.6.attn_v.bias
    F32
    [1152]
  • v.blk.6.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.6.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.6.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.6.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.6.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.6.norm1.bias
    F32
    [1152]
  • v.blk.6.norm1.weight
    F32
    [1152]
  • v.blk.6.norm2.bias
    F32
    [1152]
  • v.blk.6.norm2.weight
    F32
    [1152]
  • v.blk.7
  • v.blk.7.attn_k.bias
    F32
    [1152]
  • v.blk.7.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.7.attn_out.bias
    F32
    [1152]
  • v.blk.7.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.7.attn_q.bias
    F32
    [1152]
  • v.blk.7.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.7.attn_v.bias
    F32
    [1152]
  • v.blk.7.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.7.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.7.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.7.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.7.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.7.norm1.bias
    F32
    [1152]
  • v.blk.7.norm1.weight
    F32
    [1152]
  • v.blk.7.norm2.bias
    F32
    [1152]
  • v.blk.7.norm2.weight
    F32
    [1152]
  • v.blk.8
  • v.blk.8.attn_k.bias
    F32
    [1152]
  • v.blk.8.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.8.attn_out.bias
    F32
    [1152]
  • v.blk.8.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.8.attn_q.bias
    F32
    [1152]
  • v.blk.8.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.8.attn_v.bias
    F32
    [1152]
  • v.blk.8.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.8.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.8.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.8.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.8.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.8.norm1.bias
    F32
    [1152]
  • v.blk.8.norm1.weight
    F32
    [1152]
  • v.blk.8.norm2.bias
    F32
    [1152]
  • v.blk.8.norm2.weight
    F32
    [1152]
  • v.blk.9
  • v.blk.9.attn_k.bias
    F32
    [1152]
  • v.blk.9.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.9.attn_out.bias
    F32
    [1152]
  • v.blk.9.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.9.attn_q.bias
    F32
    [1152]
  • v.blk.9.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.9.attn_v.bias
    F32
    [1152]
  • v.blk.9.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.9.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.9.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.9.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.9.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.9.norm1.bias
    F32
    [1152]
  • v.blk.9.norm1.weight
    F32
    [1152]
  • v.blk.9.norm2.bias
    F32
    [1152]
  • v.blk.9.norm2.weight
    F32
    [1152]
  • v.blk.10
  • v.blk.10.attn_k.bias
    F32
    [1152]
  • v.blk.10.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.10.attn_out.bias
    F32
    [1152]
  • v.blk.10.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.10.attn_q.bias
    F32
    [1152]
  • v.blk.10.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.10.attn_v.bias
    F32
    [1152]
  • v.blk.10.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.10.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.10.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.10.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.10.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.10.norm1.bias
    F32
    [1152]
  • v.blk.10.norm1.weight
    F32
    [1152]
  • v.blk.10.norm2.bias
    F32
    [1152]
  • v.blk.10.norm2.weight
    F32
    [1152]
  • v.blk.11
  • v.blk.11.attn_k.bias
    F32
    [1152]
  • v.blk.11.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.11.attn_out.bias
    F32
    [1152]
  • v.blk.11.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.11.attn_q.bias
    F32
    [1152]
  • v.blk.11.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.11.attn_v.bias
    F32
    [1152]
  • v.blk.11.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.11.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.11.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.11.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.11.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.11.norm1.bias
    F32
    [1152]
  • v.blk.11.norm1.weight
    F32
    [1152]
  • v.blk.11.norm2.bias
    F32
    [1152]
  • v.blk.11.norm2.weight
    F32
    [1152]
  • v.blk.12
  • v.blk.12.attn_k.bias
    F32
    [1152]
  • v.blk.12.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.12.attn_out.bias
    F32
    [1152]
  • v.blk.12.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.12.attn_q.bias
    F32
    [1152]
  • v.blk.12.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.12.attn_v.bias
    F32
    [1152]
  • v.blk.12.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.12.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.12.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.12.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.12.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.12.norm1.bias
    F32
    [1152]
  • v.blk.12.norm1.weight
    F32
    [1152]
  • v.blk.12.norm2.bias
    F32
    [1152]
  • v.blk.12.norm2.weight
    F32
    [1152]
  • v.blk.13
  • v.blk.13.attn_k.bias
    F32
    [1152]
  • v.blk.13.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.13.attn_out.bias
    F32
    [1152]
  • v.blk.13.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.13.attn_q.bias
    F32
    [1152]
  • v.blk.13.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.13.attn_v.bias
    F32
    [1152]
  • v.blk.13.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.13.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.13.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.13.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.13.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.13.norm1.bias
    F32
    [1152]
  • v.blk.13.norm1.weight
    F32
    [1152]
  • v.blk.13.norm2.bias
    F32
    [1152]
  • v.blk.13.norm2.weight
    F32
    [1152]
  • v.blk.14
  • v.blk.14.attn_k.bias
    F32
    [1152]
  • v.blk.14.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.14.attn_out.bias
    F32
    [1152]
  • v.blk.14.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.14.attn_q.bias
    F32
    [1152]
  • v.blk.14.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.14.attn_v.bias
    F32
    [1152]
  • v.blk.14.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.14.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.14.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.14.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.14.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.14.norm1.bias
    F32
    [1152]
  • v.blk.14.norm1.weight
    F32
    [1152]
  • v.blk.14.norm2.bias
    F32
    [1152]
  • v.blk.14.norm2.weight
    F32
    [1152]
  • v.blk.15
  • v.blk.15.attn_k.bias
    F32
    [1152]
  • v.blk.15.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.15.attn_out.bias
    F32
    [1152]
  • v.blk.15.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.15.attn_q.bias
    F32
    [1152]
  • v.blk.15.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.15.attn_v.bias
    F32
    [1152]
  • v.blk.15.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.15.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.15.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.15.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.15.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.15.norm1.bias
    F32
    [1152]
  • v.blk.15.norm1.weight
    F32
    [1152]
  • v.blk.15.norm2.bias
    F32
    [1152]
  • v.blk.15.norm2.weight
    F32
    [1152]
  • v.blk.16
  • v.blk.16.attn_k.bias
    F32
    [1152]
  • v.blk.16.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.16.attn_out.bias
    F32
    [1152]
  • v.blk.16.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.16.attn_q.bias
    F32
    [1152]
  • v.blk.16.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.16.attn_v.bias
    F32
    [1152]
  • v.blk.16.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.16.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.16.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.16.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.16.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.16.norm1.bias
    F32
    [1152]
  • v.blk.16.norm1.weight
    F32
    [1152]
  • v.blk.16.norm2.bias
    F32
    [1152]
  • v.blk.16.norm2.weight
    F32
    [1152]
  • v.blk.17
  • v.blk.17.attn_k.bias
    F32
    [1152]
  • v.blk.17.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.17.attn_out.bias
    F32
    [1152]
  • v.blk.17.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.17.attn_q.bias
    F32
    [1152]
  • v.blk.17.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.17.attn_v.bias
    F32
    [1152]
  • v.blk.17.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.17.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.17.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.17.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.17.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.17.norm1.bias
    F32
    [1152]
  • v.blk.17.norm1.weight
    F32
    [1152]
  • v.blk.17.norm2.bias
    F32
    [1152]
  • v.blk.17.norm2.weight
    F32
    [1152]
  • v.blk.18
  • v.blk.18.attn_k.bias
    F32
    [1152]
  • v.blk.18.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.18.attn_out.bias
    F32
    [1152]
  • v.blk.18.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.18.attn_q.bias
    F32
    [1152]
  • v.blk.18.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.18.attn_v.bias
    F32
    [1152]
  • v.blk.18.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.18.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.18.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.18.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.18.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.18.norm1.bias
    F32
    [1152]
  • v.blk.18.norm1.weight
    F32
    [1152]
  • v.blk.18.norm2.bias
    F32
    [1152]
  • v.blk.18.norm2.weight
    F32
    [1152]
  • v.blk.19
  • v.blk.19.attn_k.bias
    F32
    [1152]
  • v.blk.19.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.19.attn_out.bias
    F32
    [1152]
  • v.blk.19.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.19.attn_q.bias
    F32
    [1152]
  • v.blk.19.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.19.attn_v.bias
    F32
    [1152]
  • v.blk.19.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.19.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.19.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.19.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.19.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.19.norm1.bias
    F32
    [1152]
  • v.blk.19.norm1.weight
    F32
    [1152]
  • v.blk.19.norm2.bias
    F32
    [1152]
  • v.blk.19.norm2.weight
    F32
    [1152]
  • v.blk.20
  • v.blk.20.attn_k.bias
    F32
    [1152]
  • v.blk.20.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.20.attn_out.bias
    F32
    [1152]
  • v.blk.20.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.20.attn_q.bias
    F32
    [1152]
  • v.blk.20.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.20.attn_v.bias
    F32
    [1152]
  • v.blk.20.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.20.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.20.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.20.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.20.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.20.norm1.bias
    F32
    [1152]
  • v.blk.20.norm1.weight
    F32
    [1152]
  • v.blk.20.norm2.bias
    F32
    [1152]
  • v.blk.20.norm2.weight
    F32
    [1152]
  • v.blk.21
  • v.blk.21.attn_k.bias
    F32
    [1152]
  • v.blk.21.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.21.attn_out.bias
    F32
    [1152]
  • v.blk.21.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.21.attn_q.bias
    F32
    [1152]
  • v.blk.21.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.21.attn_v.bias
    F32
    [1152]
  • v.blk.21.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.21.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.21.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.21.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.21.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.21.norm1.bias
    F32
    [1152]
  • v.blk.21.norm1.weight
    F32
    [1152]
  • v.blk.21.norm2.bias
    F32
    [1152]
  • v.blk.21.norm2.weight
    F32
    [1152]
  • v.blk.22
  • v.blk.22.attn_k.bias
    F32
    [1152]
  • v.blk.22.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.22.attn_out.bias
    F32
    [1152]
  • v.blk.22.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.22.attn_q.bias
    F32
    [1152]
  • v.blk.22.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.22.attn_v.bias
    F32
    [1152]
  • v.blk.22.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.22.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.22.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.22.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.22.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.22.norm1.bias
    F32
    [1152]
  • v.blk.22.norm1.weight
    F32
    [1152]
  • v.blk.22.norm2.bias
    F32
    [1152]
  • v.blk.22.norm2.weight
    F32
    [1152]
  • v.blk.23
  • v.blk.23.attn_k.bias
    F32
    [1152]
  • v.blk.23.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.23.attn_out.bias
    F32
    [1152]
  • v.blk.23.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.23.attn_q.bias
    F32
    [1152]
  • v.blk.23.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.23.attn_v.bias
    F32
    [1152]
  • v.blk.23.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.23.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.23.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.23.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.23.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.23.norm1.bias
    F32
    [1152]
  • v.blk.23.norm1.weight
    F32
    [1152]
  • v.blk.23.norm2.bias
    F32
    [1152]
  • v.blk.23.norm2.weight
    F32
    [1152]
  • v.blk.24
  • v.blk.24.attn_k.bias
    F32
    [1152]
  • v.blk.24.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.24.attn_out.bias
    F32
    [1152]
  • v.blk.24.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.24.attn_q.bias
    F32
    [1152]
  • v.blk.24.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.24.attn_v.bias
    F32
    [1152]
  • v.blk.24.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.24.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.24.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.24.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.24.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.24.norm1.bias
    F32
    [1152]
  • v.blk.24.norm1.weight
    F32
    [1152]
  • v.blk.24.norm2.bias
    F32
    [1152]
  • v.blk.24.norm2.weight
    F32
    [1152]
  • v.blk.25
  • v.blk.25.attn_k.bias
    F32
    [1152]
  • v.blk.25.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.25.attn_out.bias
    F32
    [1152]
  • v.blk.25.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.25.attn_q.bias
    F32
    [1152]
  • v.blk.25.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.25.attn_v.bias
    F32
    [1152]
  • v.blk.25.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.25.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.25.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.25.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.25.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.25.norm1.bias
    F32
    [1152]
  • v.blk.25.norm1.weight
    F32
    [1152]
  • v.blk.25.norm2.bias
    F32
    [1152]
  • v.blk.25.norm2.weight
    F32
    [1152]
  • v.blk.26
  • v.blk.26.attn_k.bias
    F32
    [1152]
  • v.blk.26.attn_k.weight
    F16
    [1152, 1152]
  • v.blk.26.attn_out.bias
    F32
    [1152]
  • v.blk.26.attn_out.weight
    F16
    [1152, 1152]
  • v.blk.26.attn_q.bias
    F32
    [1152]
  • v.blk.26.attn_q.weight
    F16
    [1152, 1152]
  • v.blk.26.attn_v.bias
    F32
    [1152]
  • v.blk.26.attn_v.weight
    F16
    [1152, 1152]
  • v.blk.26.mlp.linear_fc1.bias
    F32
    [4304]
  • v.blk.26.mlp.linear_fc1.weight
    F16
    [1152, 4304]
  • v.blk.26.mlp.linear_fc2.bias
    F32
    [1152]
  • v.blk.26.mlp.linear_fc2.weight
    F16
    [4304, 1152]
  • v.blk.26.norm1.bias
    F32
    [1152]
  • v.blk.26.norm1.weight
    F32
    [1152]
  • v.blk.26.norm2.bias
    F32
    [1152]
  • v.blk.26.norm2.weight
    F32
    [1152]
  • v.merger.linear_fc1.bias
    F32
    [4608]
  • v.merger.linear_fc1.weight
    F16
    [4608, 4608]
  • v.merger.linear_fc2.bias
    F32
    [2048]
  • v.merger.linear_fc2.weight
    F16
    [4608, 2048]
  • v.merger.norm.bias
    F32
    [1152]
  • v.merger.norm.weight
    F32
    [1152]
  • v.patch_embed.bias
    F32
    [1152]
  • v.patch_embed.weight
    F16
    [16, 16, 2, 3456]
  • v.pos_embed.weight
    F16
    [1152, 2304]
  • output_norm.weight
    F32
    [2048]