The IBM Granite 1B and 3B models are the first mixture of experts (MoE) Granite models from IBM designed for low latency usage.

Tools

1,917 Pulls Updated 16 hours ago

fb4ca1fbef80 · 1.9GB
    Metadata
  • general.architecture
    granitemoe
  • general.file_type
    Q4_K_S
  • granitemoe.attention.head_count
    24
  • granitemoe.attention.head_count_kv
    8
  • granitemoe.attention.layer_norm_rms_epsilon
    1e-06
  • granitemoe.attention.scale
    0.015625
  • granitemoe.block_count
    32
  • granitemoe.context_length
    4096
  • granitemoe.embedding_length
    1536
  • granitemoe.embedding_scale
    12
  • granitemoe.expert_count
    40
  • granitemoe.expert_used_count
    8
  • granitemoe.feed_forward_length
    512
  • granitemoe.logit_scale
    6
  • granitemoe.residual_scale
    0.22
  • granitemoe.rope.dimension_count
    64
  • granitemoe.rope.freq_base
    10000
  • granitemoe.vocab_size
    49155
  • tokenizer.ggml.add_bos_token
    false
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    0
  • tokenizer.ggml.eos_token_id
    0
  • tokenizer.ggml.merges
    [Ġ Ġ ĠĠ ĠĠ ĠĠĠĠ ĠĠĠĠ ĠĠ Ġ e r ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.pre
    refact
  • tokenizer.ggml.token_type
    [3 3 3 3 3 ...]
  • tokenizer.ggml.tokens
    [<|end_of_text|> <fim_prefix> <fim_middle> <fim_suffix> <fim_pad> ...]
  • Tensors
  • Name
    Type
    Shape
  • token_embd.weight
    Q4_K
    [1536 49155]
  • blk.0
  • blk.0.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.0.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.0.ffn_down_exps.weight
    Q5_K
    [512 1536 40]
  • blk.0.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.0.attn_norm.weight
    F32
    [1536]
  • blk.0.ffn_norm.weight
    F32
    [1536]
  • blk.0.attn_k.weight
    Q4_K
    [1536 512]
  • blk.0.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.0.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.0.attn_v.weight
    Q5_K
    [1536 512]
  • blk.1
  • blk.1.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.1.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.1.ffn_down_exps.weight
    Q5_K
    [512 1536 40]
  • blk.1.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.1.attn_norm.weight
    F32
    [1536]
  • blk.1.ffn_norm.weight
    F32
    [1536]
  • blk.1.attn_k.weight
    Q4_K
    [1536 512]
  • blk.1.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.1.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.1.attn_v.weight
    Q5_K
    [1536 512]
  • blk.2
  • blk.2.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.2.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.2.ffn_down_exps.weight
    Q5_K
    [512 1536 40]
  • blk.2.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.2.attn_norm.weight
    F32
    [1536]
  • blk.2.ffn_norm.weight
    F32
    [1536]
  • blk.2.attn_k.weight
    Q4_K
    [1536 512]
  • blk.2.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.2.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.2.attn_v.weight
    Q4_K
    [1536 512]
  • blk.3
  • blk.3.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.3.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.3.ffn_down_exps.weight
    Q5_K
    [512 1536 40]
  • blk.3.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.3.attn_norm.weight
    F32
    [1536]
  • blk.3.ffn_norm.weight
    F32
    [1536]
  • blk.3.attn_k.weight
    Q4_K
    [1536 512]
  • blk.3.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.3.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.3.attn_v.weight
    Q4_K
    [1536 512]
  • blk.4
  • blk.4.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.4.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.4.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.4.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.4.attn_norm.weight
    F32
    [1536]
  • blk.4.ffn_norm.weight
    F32
    [1536]
  • blk.4.attn_k.weight
    Q4_K
    [1536 512]
  • blk.4.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.4.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.4.attn_v.weight
    Q4_K
    [1536 512]
  • blk.5
  • blk.5.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.5.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.5.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.5.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.5.attn_norm.weight
    F32
    [1536]
  • blk.5.ffn_norm.weight
    F32
    [1536]
  • blk.5.attn_k.weight
    Q4_K
    [1536 512]
  • blk.5.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.5.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.5.attn_v.weight
    Q4_K
    [1536 512]
  • blk.6
  • blk.6.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.6.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.6.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.6.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.6.attn_norm.weight
    F32
    [1536]
  • blk.6.ffn_norm.weight
    F32
    [1536]
  • blk.6.attn_k.weight
    Q4_K
    [1536 512]
  • blk.6.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.6.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.6.attn_v.weight
    Q4_K
    [1536 512]
  • blk.7
  • blk.7.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.7.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.7.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.7.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.7.attn_norm.weight
    F32
    [1536]
  • blk.7.ffn_norm.weight
    F32
    [1536]
  • blk.7.attn_k.weight
    Q4_K
    [1536 512]
  • blk.7.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.7.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.7.attn_v.weight
    Q4_K
    [1536 512]
  • blk.8
  • blk.8.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.8.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.8.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.8.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.8.attn_norm.weight
    F32
    [1536]
  • blk.8.ffn_norm.weight
    F32
    [1536]
  • blk.8.attn_k.weight
    Q4_K
    [1536 512]
  • blk.8.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.8.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.8.attn_v.weight
    Q4_K
    [1536 512]
  • blk.9
  • blk.9.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.9.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.9.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.9.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.9.attn_norm.weight
    F32
    [1536]
  • blk.9.ffn_norm.weight
    F32
    [1536]
  • blk.9.attn_k.weight
    Q4_K
    [1536 512]
  • blk.9.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.9.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.9.attn_v.weight
    Q4_K
    [1536 512]
  • blk.10
  • blk.10.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.10.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.10.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.10.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.10.attn_norm.weight
    F32
    [1536]
  • blk.10.ffn_norm.weight
    F32
    [1536]
  • blk.10.attn_k.weight
    Q4_K
    [1536 512]
  • blk.10.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.10.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.10.attn_v.weight
    Q5_K
    [1536 512]
  • blk.11
  • blk.11.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.11.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.11.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.11.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.11.attn_norm.weight
    F32
    [1536]
  • blk.11.ffn_norm.weight
    F32
    [1536]
  • blk.11.attn_k.weight
    Q4_K
    [1536 512]
  • blk.11.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.11.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.11.attn_v.weight
    Q5_K
    [1536 512]
  • blk.12
  • blk.12.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.12.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.12.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.12.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.12.attn_norm.weight
    F32
    [1536]
  • blk.12.ffn_norm.weight
    F32
    [1536]
  • blk.12.attn_k.weight
    Q4_K
    [1536 512]
  • blk.12.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.12.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.12.attn_v.weight
    Q4_K
    [1536 512]
  • blk.13
  • blk.13.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.13.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.13.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.13.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.13.attn_norm.weight
    F32
    [1536]
  • blk.13.ffn_norm.weight
    F32
    [1536]
  • blk.13.attn_k.weight
    Q4_K
    [1536 512]
  • blk.13.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.13.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.13.attn_v.weight
    Q4_K
    [1536 512]
  • blk.14
  • blk.14.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.14.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.14.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.14.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.14.attn_norm.weight
    F32
    [1536]
  • blk.14.ffn_norm.weight
    F32
    [1536]
  • blk.14.attn_k.weight
    Q4_K
    [1536 512]
  • blk.14.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.14.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.14.attn_v.weight
    Q4_K
    [1536 512]
  • blk.15
  • blk.15.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.15.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.15.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.15.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.15.attn_norm.weight
    F32
    [1536]
  • blk.15.ffn_norm.weight
    F32
    [1536]
  • blk.15.attn_k.weight
    Q4_K
    [1536 512]
  • blk.15.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.15.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.15.attn_v.weight
    Q4_K
    [1536 512]
  • blk.16
  • blk.16.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.16.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.16.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.16.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.16.attn_norm.weight
    F32
    [1536]
  • blk.16.ffn_norm.weight
    F32
    [1536]
  • blk.16.attn_k.weight
    Q4_K
    [1536 512]
  • blk.16.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.16.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.16.attn_v.weight
    Q4_K
    [1536 512]
  • blk.17
  • blk.17.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.17.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.17.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.17.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.17.attn_norm.weight
    F32
    [1536]
  • blk.17.ffn_norm.weight
    F32
    [1536]
  • blk.17.attn_k.weight
    Q4_K
    [1536 512]
  • blk.17.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.17.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.17.attn_v.weight
    Q4_K
    [1536 512]
  • blk.18
  • blk.18.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.18.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.18.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.18.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.18.attn_norm.weight
    F32
    [1536]
  • blk.18.ffn_norm.weight
    F32
    [1536]
  • blk.18.attn_k.weight
    Q4_K
    [1536 512]
  • blk.18.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.18.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.18.attn_v.weight
    Q4_K
    [1536 512]
  • blk.19
  • blk.19.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.19.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.19.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.19.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.19.attn_norm.weight
    F32
    [1536]
  • blk.19.ffn_norm.weight
    F32
    [1536]
  • blk.19.attn_k.weight
    Q4_K
    [1536 512]
  • blk.19.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.19.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.19.attn_v.weight
    Q4_K
    [1536 512]
  • blk.20
  • blk.20.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.20.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.20.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.20.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.20.attn_norm.weight
    F32
    [1536]
  • blk.20.ffn_norm.weight
    F32
    [1536]
  • blk.20.attn_k.weight
    Q4_K
    [1536 512]
  • blk.20.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.20.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.20.attn_v.weight
    Q4_K
    [1536 512]
  • blk.21
  • blk.21.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.21.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.21.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.21.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.21.attn_norm.weight
    F32
    [1536]
  • blk.21.ffn_norm.weight
    F32
    [1536]
  • blk.21.attn_k.weight
    Q4_K
    [1536 512]
  • blk.21.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.21.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.21.attn_v.weight
    Q4_K
    [1536 512]
  • blk.22
  • blk.22.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.22.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.22.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.22.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.22.attn_norm.weight
    F32
    [1536]
  • blk.22.ffn_norm.weight
    F32
    [1536]
  • blk.22.attn_k.weight
    Q4_K
    [1536 512]
  • blk.22.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.22.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.22.attn_v.weight
    Q4_K
    [1536 512]
  • blk.23
  • blk.23.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.23.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.23.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.23.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.23.attn_norm.weight
    F32
    [1536]
  • blk.23.ffn_norm.weight
    F32
    [1536]
  • blk.23.attn_k.weight
    Q4_K
    [1536 512]
  • blk.23.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.23.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.23.attn_v.weight
    Q4_K
    [1536 512]
  • blk.24
  • blk.24.attn_k.weight
    Q4_K
    [1536 512]
  • blk.24.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.24.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.24.attn_v.weight
    Q4_K
    [1536 512]
  • blk.24.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.24.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.24.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.24.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.24.attn_norm.weight
    F32
    [1536]
  • blk.24.ffn_norm.weight
    F32
    [1536]
  • blk.25
  • blk.25.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.25.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.25.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.25.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.25.attn_norm.weight
    F32
    [1536]
  • blk.25.ffn_norm.weight
    F32
    [1536]
  • blk.25.attn_k.weight
    Q4_K
    [1536 512]
  • blk.25.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.25.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.25.attn_v.weight
    Q4_K
    [1536 512]
  • blk.26
  • blk.26.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.26.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.26.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.26.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.26.attn_norm.weight
    F32
    [1536]
  • blk.26.ffn_norm.weight
    F32
    [1536]
  • blk.26.attn_k.weight
    Q4_K
    [1536 512]
  • blk.26.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.26.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.26.attn_v.weight
    Q4_K
    [1536 512]
  • blk.27
  • blk.27.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.27.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.27.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.27.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.27.attn_norm.weight
    F32
    [1536]
  • blk.27.ffn_norm.weight
    F32
    [1536]
  • blk.27.attn_k.weight
    Q4_K
    [1536 512]
  • blk.27.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.27.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.27.attn_v.weight
    Q4_K
    [1536 512]
  • blk.28
  • blk.28.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.28.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.28.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.28.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.28.attn_norm.weight
    F32
    [1536]
  • blk.28.ffn_norm.weight
    F32
    [1536]
  • blk.28.attn_k.weight
    Q4_K
    [1536 512]
  • blk.28.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.28.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.28.attn_v.weight
    Q4_K
    [1536 512]
  • blk.29
  • blk.29.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.29.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.29.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.29.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.29.attn_norm.weight
    F32
    [1536]
  • blk.29.ffn_norm.weight
    F32
    [1536]
  • blk.29.attn_k.weight
    Q4_K
    [1536 512]
  • blk.29.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.29.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.29.attn_v.weight
    Q4_K
    [1536 512]
  • blk.30
  • blk.30.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.30.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.30.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.30.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.30.attn_norm.weight
    F32
    [1536]
  • blk.30.ffn_norm.weight
    F32
    [1536]
  • blk.30.attn_k.weight
    Q4_K
    [1536 512]
  • blk.30.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.30.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.30.attn_v.weight
    Q4_K
    [1536 512]
  • blk.31
  • blk.31.ffn_gate_exps.weight
    Q4_K
    [1536 512 40]
  • blk.31.ffn_up_exps.weight
    Q4_K
    [1536 512 40]
  • blk.31.ffn_down_exps.weight
    Q4_K
    [512 1536 40]
  • blk.31.ffn_gate_inp.weight
    F32
    [1536 40]
  • blk.31.attn_norm.weight
    F32
    [1536]
  • blk.31.ffn_norm.weight
    F32
    [1536]
  • blk.31.attn_k.weight
    Q4_K
    [1536 512]
  • blk.31.attn_output.weight
    Q4_K
    [1536 1536]
  • blk.31.attn_q.weight
    Q4_K
    [1536 1536]
  • blk.31.attn_v.weight
    Q4_K
    [1536 512]
  • output.weight
    Q6_K
    [1536 49155]
  • output_norm.weight
    F32
    [1536]