752 1 month ago

GLM 4.6V Flash 9B model with vision, tools, and hybrid thinking enabled. using custom template to align it to ollama and the recomended sampling settigns by default. using unsloth quants at q4K_M

vision tools thinking
cbdf92acdfd2 · 1.8GB
    Metadata
  • general.architecture
    clip
  • general.file_type
    F16
  • clip.has_vision_encoder
    true
  • clip.projector_type
    glm4v
  • clip.use_silu
    true
  • clip.vision.attention.head_count
    12
  • clip.vision.attention.layer_norm_epsilon
    1e-05
  • clip.vision.block_count
    24
  • clip.vision.embedding_length
    1536
  • clip.vision.feed_forward_length
    13696
  • clip.vision.image_mean
    [0.48145467, 0.4578275, 0.40821072]
  • clip.vision.image_size
    336
  • clip.vision.image_std
    [0.26862955, 0.2613026, 0.2757771]
  • clip.vision.patch_size
    14
  • clip.vision.projection_dim
    4096
  • Tensor
  • mm.down.weight
    F16
    [13696, 4096]
  • mm.gate.weight
    F16
    [4096, 13696]
  • mm.model.fc.weight
    F16
    [4096, 4096]
  • mm.patch_merger.bias
    F32
    [4096]
  • mm.patch_merger.weight
    F16
    [2, 2, 1536, 4096]
  • mm.post_norm.bias
    F32
    [4096]
  • mm.post_norm.weight
    F32
    [4096]
  • mm.up.weight
    F16
    [4096, 13696]
  • v.blk.0
  • v.blk.0.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.0.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.0.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.0.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.0.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.0.ln1.weight
    F32
    [1536]
  • v.blk.0.ln2.weight
    F32
    [1536]
  • v.blk.1
  • v.blk.1.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.1.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.1.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.1.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.1.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.1.ln1.weight
    F32
    [1536]
  • v.blk.1.ln2.weight
    F32
    [1536]
  • v.blk.2
  • v.blk.2.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.2.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.2.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.2.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.2.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.2.ln1.weight
    F32
    [1536]
  • v.blk.2.ln2.weight
    F32
    [1536]
  • v.blk.3
  • v.blk.3.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.3.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.3.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.3.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.3.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.3.ln1.weight
    F32
    [1536]
  • v.blk.3.ln2.weight
    F32
    [1536]
  • v.blk.4
  • v.blk.4.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.4.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.4.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.4.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.4.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.4.ln1.weight
    F32
    [1536]
  • v.blk.4.ln2.weight
    F32
    [1536]
  • v.blk.5
  • v.blk.5.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.5.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.5.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.5.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.5.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.5.ln1.weight
    F32
    [1536]
  • v.blk.5.ln2.weight
    F32
    [1536]
  • v.blk.6
  • v.blk.6.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.6.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.6.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.6.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.6.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.6.ln1.weight
    F32
    [1536]
  • v.blk.6.ln2.weight
    F32
    [1536]
  • v.blk.7
  • v.blk.7.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.7.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.7.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.7.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.7.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.7.ln1.weight
    F32
    [1536]
  • v.blk.7.ln2.weight
    F32
    [1536]
  • v.blk.8
  • v.blk.8.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.8.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.8.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.8.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.8.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.8.ln1.weight
    F32
    [1536]
  • v.blk.8.ln2.weight
    F32
    [1536]
  • v.blk.9
  • v.blk.9.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.9.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.9.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.9.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.9.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.9.ln1.weight
    F32
    [1536]
  • v.blk.9.ln2.weight
    F32
    [1536]
  • v.blk.10
  • v.blk.10.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.10.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.10.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.10.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.10.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.10.ln1.weight
    F32
    [1536]
  • v.blk.10.ln2.weight
    F32
    [1536]
  • v.blk.11
  • v.blk.11.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.11.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.11.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.11.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.11.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.11.ln1.weight
    F32
    [1536]
  • v.blk.11.ln2.weight
    F32
    [1536]
  • v.blk.12
  • v.blk.12.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.12.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.12.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.12.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.12.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.12.ln1.weight
    F32
    [1536]
  • v.blk.12.ln2.weight
    F32
    [1536]
  • v.blk.13
  • v.blk.13.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.13.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.13.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.13.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.13.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.13.ln1.weight
    F32
    [1536]
  • v.blk.13.ln2.weight
    F32
    [1536]
  • v.blk.14
  • v.blk.14.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.14.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.14.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.14.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.14.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.14.ln1.weight
    F32
    [1536]
  • v.blk.14.ln2.weight
    F32
    [1536]
  • v.blk.15
  • v.blk.15.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.15.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.15.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.15.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.15.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.15.ln1.weight
    F32
    [1536]
  • v.blk.15.ln2.weight
    F32
    [1536]
  • v.blk.16
  • v.blk.16.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.16.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.16.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.16.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.16.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.16.ln1.weight
    F32
    [1536]
  • v.blk.16.ln2.weight
    F32
    [1536]
  • v.blk.17
  • v.blk.17.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.17.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.17.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.17.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.17.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.17.ln1.weight
    F32
    [1536]
  • v.blk.17.ln2.weight
    F32
    [1536]
  • v.blk.18
  • v.blk.18.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.18.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.18.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.18.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.18.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.18.ln1.weight
    F32
    [1536]
  • v.blk.18.ln2.weight
    F32
    [1536]
  • v.blk.19
  • v.blk.19.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.19.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.19.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.19.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.19.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.19.ln1.weight
    F32
    [1536]
  • v.blk.19.ln2.weight
    F32
    [1536]
  • v.blk.20
  • v.blk.20.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.20.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.20.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.20.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.20.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.20.ln1.weight
    F32
    [1536]
  • v.blk.20.ln2.weight
    F32
    [1536]
  • v.blk.21
  • v.blk.21.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.21.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.21.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.21.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.21.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.21.ln1.weight
    F32
    [1536]
  • v.blk.21.ln2.weight
    F32
    [1536]
  • v.blk.22
  • v.blk.22.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.22.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.22.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.22.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.22.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.22.ln1.weight
    F32
    [1536]
  • v.blk.22.ln2.weight
    F32
    [1536]
  • v.blk.23
  • v.blk.23.attn_out.weight
    F16
    [1536, 1536]
  • v.blk.23.attn_qkv.weight
    F16
    [1536, 4608]
  • v.blk.23.ffn_down.weight
    F16
    [4096, 1536]
  • v.blk.23.ffn_gate.weight
    F16
    [1536, 4096]
  • v.blk.23.ffn_up.weight
    F16
    [1536, 4096]
  • v.blk.23.ln1.weight
    F32
    [1536]
  • v.blk.23.ln2.weight
    F32
    [1536]
  • v.norm_embd.weight
    F32
    [1536]
  • v.patch_embd.bias
    F32
    [1536]
  • v.patch_embd.weight
    F16
    [14, 14, 3, 1536]
  • v.patch_embd.weight.1
    F16
    [14, 14, 3, 1536]
  • v.position_embd.weight
    F32
    [1536, 576]
  • v.post_ln.weight
    F32
    [1536]