27 4 months ago

Our first full preview version, getting us out of the research preview state. Brings the lowest hallucination rates, SOTA performance across different evals and an efficient architecture.

tools
77bcee066a76 · 4.9GB
    Metadata
  • general.architecture
    granite
  • general.file_type
    Q4_K_M
  • granite.attention.head_count
    32
  • granite.attention.head_count_kv
    8
  • granite.attention.layer_norm_rms_epsilon
    1e-05
  • granite.attention.scale
    0.0078125
  • granite.block_count
    40
  • granite.context_length
    131072
  • granite.embedding_length
    4096
  • granite.embedding_scale
    12
  • granite.feed_forward_length
    12800
  • granite.logit_scale
    16
  • granite.residual_scale
    0.22
  • granite.rope.dimension_count
    128
  • granite.rope.freq_base
    1e+07
  • granite.vocab_size
    49159
  • tokenizer.ggml.add_bos_token
    false
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    0
  • tokenizer.ggml.eos_token_id
    0
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, ĠĠĠĠ ĠĠĠĠ, ĠĠ Ġ, e r, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.pre
    refact
  • tokenizer.ggml.token_type
    [3, 3, 3, 3, 3, ...]
  • tokenizer.ggml.tokens
    [<|end_of_text|>, <fim_prefix>, <fim_middle>, <fim_suffix>, <fim_pad>, ...]
  • tokenizer.ggml.unknown_token_id
    0
  • Tensor
  • token_embd.weight
    Q6_K
    [4096, 49159]
  • blk.0
  • blk.0.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.0.attn_norm.weight
    F32
    [4096]
  • blk.0.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.0.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.0.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.0.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.0.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.0.ffn_norm.weight
    F32
    [4096]
  • blk.0.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.1
  • blk.1.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.1.attn_norm.weight
    F32
    [4096]
  • blk.1.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.1.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.1.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.1.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.1.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.1.ffn_norm.weight
    F32
    [4096]
  • blk.1.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.2
  • blk.2.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.2.attn_norm.weight
    F32
    [4096]
  • blk.2.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.2.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.2.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.2.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.2.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.2.ffn_norm.weight
    F32
    [4096]
  • blk.2.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.3
  • blk.3.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.3.attn_norm.weight
    F32
    [4096]
  • blk.3.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.3.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.3.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.3.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.3.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.3.ffn_norm.weight
    F32
    [4096]
  • blk.3.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.4
  • blk.4.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.4.attn_norm.weight
    F32
    [4096]
  • blk.4.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.4.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.4.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.4.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.4.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.4.ffn_norm.weight
    F32
    [4096]
  • blk.4.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.5
  • blk.5.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.5.attn_norm.weight
    F32
    [4096]
  • blk.5.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.5.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.5.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.5.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.5.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.5.ffn_norm.weight
    F32
    [4096]
  • blk.5.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.6
  • blk.6.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.6.attn_norm.weight
    F32
    [4096]
  • blk.6.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.6.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.6.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.6.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.6.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.6.ffn_norm.weight
    F32
    [4096]
  • blk.6.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.7
  • blk.7.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.7.attn_norm.weight
    F32
    [4096]
  • blk.7.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.7.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.7.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.7.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.7.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.7.ffn_norm.weight
    F32
    [4096]
  • blk.7.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.8
  • blk.8.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.8.attn_norm.weight
    F32
    [4096]
  • blk.8.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.8.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.8.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.8.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.8.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.8.ffn_norm.weight
    F32
    [4096]
  • blk.8.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.9
  • blk.9.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.9.attn_norm.weight
    F32
    [4096]
  • blk.9.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.9.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.9.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.9.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.9.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.9.ffn_norm.weight
    F32
    [4096]
  • blk.9.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.10
  • blk.10.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.10.attn_norm.weight
    F32
    [4096]
  • blk.10.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.10.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.10.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.10.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.10.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.10.ffn_norm.weight
    F32
    [4096]
  • blk.10.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.11
  • blk.11.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.11.attn_norm.weight
    F32
    [4096]
  • blk.11.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.11.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.11.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.11.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.11.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.11.ffn_norm.weight
    F32
    [4096]
  • blk.11.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.12
  • blk.12.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.12.attn_norm.weight
    F32
    [4096]
  • blk.12.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.12.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.12.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.12.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.12.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.12.ffn_norm.weight
    F32
    [4096]
  • blk.12.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.13
  • blk.13.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.13.attn_norm.weight
    F32
    [4096]
  • blk.13.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.13.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.13.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.13.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.13.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.13.ffn_norm.weight
    F32
    [4096]
  • blk.13.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.14
  • blk.14.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.14.attn_norm.weight
    F32
    [4096]
  • blk.14.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.14.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.14.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.14.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.14.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.14.ffn_norm.weight
    F32
    [4096]
  • blk.14.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.15
  • blk.15.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.15.attn_norm.weight
    F32
    [4096]
  • blk.15.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.15.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.15.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.15.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.15.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.15.ffn_norm.weight
    F32
    [4096]
  • blk.15.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.16
  • blk.16.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.16.attn_norm.weight
    F32
    [4096]
  • blk.16.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.16.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.16.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.16.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.16.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.16.ffn_norm.weight
    F32
    [4096]
  • blk.16.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.17
  • blk.17.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.17.attn_norm.weight
    F32
    [4096]
  • blk.17.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.17.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.17.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.17.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.17.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.17.ffn_norm.weight
    F32
    [4096]
  • blk.17.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.18
  • blk.18.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.18.attn_norm.weight
    F32
    [4096]
  • blk.18.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.18.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.18.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.18.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.18.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.18.ffn_norm.weight
    F32
    [4096]
  • blk.18.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.19
  • blk.19.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.19.attn_norm.weight
    F32
    [4096]
  • blk.19.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.19.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.19.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.19.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.19.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.19.ffn_norm.weight
    F32
    [4096]
  • blk.19.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.20
  • blk.20.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.20.attn_norm.weight
    F32
    [4096]
  • blk.20.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.20.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.20.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.20.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.20.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.20.ffn_norm.weight
    F32
    [4096]
  • blk.20.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.21
  • blk.21.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.21.attn_norm.weight
    F32
    [4096]
  • blk.21.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.21.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.21.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.21.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.21.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.21.ffn_norm.weight
    F32
    [4096]
  • blk.21.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.22
  • blk.22.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.22.attn_norm.weight
    F32
    [4096]
  • blk.22.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.22.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.22.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.22.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.22.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.22.ffn_norm.weight
    F32
    [4096]
  • blk.22.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.23
  • blk.23.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.23.attn_norm.weight
    F32
    [4096]
  • blk.23.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.23.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.23.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.23.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.23.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.23.ffn_norm.weight
    F32
    [4096]
  • blk.23.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.24
  • blk.24.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.24.attn_norm.weight
    F32
    [4096]
  • blk.24.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.24.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.24.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.24.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.24.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.24.ffn_norm.weight
    F32
    [4096]
  • blk.24.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.25
  • blk.25.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.25.attn_norm.weight
    F32
    [4096]
  • blk.25.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.25.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.25.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.25.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.25.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.25.ffn_norm.weight
    F32
    [4096]
  • blk.25.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.26
  • blk.26.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.26.attn_norm.weight
    F32
    [4096]
  • blk.26.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.26.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.26.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.26.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.26.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.26.ffn_norm.weight
    F32
    [4096]
  • blk.26.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.27
  • blk.27.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.27.attn_norm.weight
    F32
    [4096]
  • blk.27.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.27.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.27.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.27.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.27.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.27.ffn_norm.weight
    F32
    [4096]
  • blk.27.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.28
  • blk.28.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.28.attn_norm.weight
    F32
    [4096]
  • blk.28.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.28.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.28.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.28.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.28.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.28.ffn_norm.weight
    F32
    [4096]
  • blk.28.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.29
  • blk.29.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.29.attn_norm.weight
    F32
    [4096]
  • blk.29.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.29.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.29.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.29.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.29.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.29.ffn_norm.weight
    F32
    [4096]
  • blk.29.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.30
  • blk.30.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.30.attn_norm.weight
    F32
    [4096]
  • blk.30.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.30.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.30.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.30.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.30.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.30.ffn_norm.weight
    F32
    [4096]
  • blk.30.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.31
  • blk.31.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.31.attn_norm.weight
    F32
    [4096]
  • blk.31.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.31.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.31.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.31.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.31.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.31.ffn_norm.weight
    F32
    [4096]
  • blk.31.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.32
  • blk.32.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.32.attn_norm.weight
    F32
    [4096]
  • blk.32.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.32.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.32.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.32.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.32.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.32.ffn_norm.weight
    F32
    [4096]
  • blk.32.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.33
  • blk.33.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.33.attn_norm.weight
    F32
    [4096]
  • blk.33.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.33.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.33.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.33.ffn_down.weight
    Q4_K
    [12800, 4096]
  • blk.33.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.33.ffn_norm.weight
    F32
    [4096]
  • blk.33.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.34
  • blk.34.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.34.attn_norm.weight
    F32
    [4096]
  • blk.34.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.34.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.34.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.34.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.34.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.34.ffn_norm.weight
    F32
    [4096]
  • blk.34.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.35
  • blk.35.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.35.attn_norm.weight
    F32
    [4096]
  • blk.35.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.35.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.35.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.35.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.35.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.35.ffn_norm.weight
    F32
    [4096]
  • blk.35.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.36
  • blk.36.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.36.attn_norm.weight
    F32
    [4096]
  • blk.36.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.36.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.36.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.36.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.36.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.36.ffn_norm.weight
    F32
    [4096]
  • blk.36.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.37
  • blk.37.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.37.attn_norm.weight
    F32
    [4096]
  • blk.37.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.37.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.37.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.37.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.37.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.37.ffn_norm.weight
    F32
    [4096]
  • blk.37.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.38
  • blk.38.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.38.attn_norm.weight
    F32
    [4096]
  • blk.38.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.38.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.38.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.38.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.38.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.38.ffn_norm.weight
    F32
    [4096]
  • blk.38.ffn_up.weight
    Q4_K
    [4096, 12800]
  • blk.39
  • blk.39.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.39.attn_norm.weight
    F32
    [4096]
  • blk.39.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.39.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.39.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.39.ffn_down.weight
    Q6_K
    [12800, 4096]
  • blk.39.ffn_gate.weight
    Q4_K
    [4096, 12800]
  • blk.39.ffn_norm.weight
    F32
    [4096]
  • blk.39.ffn_up.weight
    Q4_K
    [4096, 12800]
  • output_norm.weight
    F32
    [4096]