OLMo 2 is a new family of 7B and 13B models trained on up to 5T tokens. These models are on par with or better than equivalently sized fully open models, and competitive with open-weight models such as Llama 3.1 on English academic benchmarks.

7b 13b

428.1K 2 months ago

dc55b66a2732 · 27GB
    Metadata
  • general.architecture
    olmo2
  • general.file_type
    F16
  • olmo2.attention.head_count
    40
  • olmo2.attention.head_count_kv
    40
  • olmo2.attention.layer_norm_rms_epsilon
    1e-06
  • olmo2.block_count
    40
  • olmo2.context_length
    4096
  • olmo2.embedding_length
    5120
  • olmo2.feed_forward_length
    13824
  • olmo2.rope.freq_base
    500000
  • tokenizer.ggml.bos_token_id
    100257
  • tokenizer.ggml.eos_token_id
    100257
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    100277
  • tokenizer.ggml.pre
    dbrx
  • tokenizer.ggml.token_type
    [1, 1, 1, 1, 1, ...]
  • tokenizer.ggml.tokens
    [!, ", #, $, %, ...]
  • tokenizer.ggml.unknown_token_id
    100257
  • Tensor
  • token_embd.weight
    F16
    [5120, 100352]
  • blk.0
  • blk.0.attn_k.weight
    F16
    [5120, 5120]
  • blk.0.attn_k_norm.weight
    F32
    [5120]
  • blk.0.attn_output.weight
    F16
    [5120, 5120]
  • blk.0.attn_q.weight
    F16
    [5120, 5120]
  • blk.0.attn_q_norm.weight
    F32
    [5120]
  • blk.0.attn_v.weight
    F16
    [5120, 5120]
  • blk.0.ffn_down.weight
    F16
    [13824, 5120]
  • blk.0.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.0.ffn_up.weight
    F16
    [5120, 13824]
  • blk.0.post_attention_norm.weight
    F32
    [5120]
  • blk.0.post_ffw_norm.weight
    F32
    [5120]
  • blk.1
  • blk.1.attn_k.weight
    F16
    [5120, 5120]
  • blk.1.attn_k_norm.weight
    F32
    [5120]
  • blk.1.attn_output.weight
    F16
    [5120, 5120]
  • blk.1.attn_q.weight
    F16
    [5120, 5120]
  • blk.1.attn_q_norm.weight
    F32
    [5120]
  • blk.1.attn_v.weight
    F16
    [5120, 5120]
  • blk.1.ffn_down.weight
    F16
    [13824, 5120]
  • blk.1.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.1.ffn_up.weight
    F16
    [5120, 13824]
  • blk.1.post_attention_norm.weight
    F32
    [5120]
  • blk.1.post_ffw_norm.weight
    F32
    [5120]
  • blk.2
  • blk.2.attn_k.weight
    F16
    [5120, 5120]
  • blk.2.attn_k_norm.weight
    F32
    [5120]
  • blk.2.attn_output.weight
    F16
    [5120, 5120]
  • blk.2.attn_q.weight
    F16
    [5120, 5120]
  • blk.2.attn_q_norm.weight
    F32
    [5120]
  • blk.2.attn_v.weight
    F16
    [5120, 5120]
  • blk.2.ffn_down.weight
    F16
    [13824, 5120]
  • blk.2.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.2.ffn_up.weight
    F16
    [5120, 13824]
  • blk.2.post_attention_norm.weight
    F32
    [5120]
  • blk.2.post_ffw_norm.weight
    F32
    [5120]
  • blk.3
  • blk.3.attn_k.weight
    F16
    [5120, 5120]
  • blk.3.attn_k_norm.weight
    F32
    [5120]
  • blk.3.attn_output.weight
    F16
    [5120, 5120]
  • blk.3.attn_q.weight
    F16
    [5120, 5120]
  • blk.3.attn_q_norm.weight
    F32
    [5120]
  • blk.3.attn_v.weight
    F16
    [5120, 5120]
  • blk.3.ffn_down.weight
    F16
    [13824, 5120]
  • blk.3.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.3.ffn_up.weight
    F16
    [5120, 13824]
  • blk.3.post_attention_norm.weight
    F32
    [5120]
  • blk.3.post_ffw_norm.weight
    F32
    [5120]
  • blk.4
  • blk.4.attn_k.weight
    F16
    [5120, 5120]
  • blk.4.attn_k_norm.weight
    F32
    [5120]
  • blk.4.attn_output.weight
    F16
    [5120, 5120]
  • blk.4.attn_q.weight
    F16
    [5120, 5120]
  • blk.4.attn_q_norm.weight
    F32
    [5120]
  • blk.4.attn_v.weight
    F16
    [5120, 5120]
  • blk.4.ffn_down.weight
    F16
    [13824, 5120]
  • blk.4.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.4.ffn_up.weight
    F16
    [5120, 13824]
  • blk.4.post_attention_norm.weight
    F32
    [5120]
  • blk.4.post_ffw_norm.weight
    F32
    [5120]
  • blk.5
  • blk.5.attn_k.weight
    F16
    [5120, 5120]
  • blk.5.attn_k_norm.weight
    F32
    [5120]
  • blk.5.attn_output.weight
    F16
    [5120, 5120]
  • blk.5.attn_q.weight
    F16
    [5120, 5120]
  • blk.5.attn_q_norm.weight
    F32
    [5120]
  • blk.5.attn_v.weight
    F16
    [5120, 5120]
  • blk.5.ffn_down.weight
    F16
    [13824, 5120]
  • blk.5.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.5.ffn_up.weight
    F16
    [5120, 13824]
  • blk.5.post_attention_norm.weight
    F32
    [5120]
  • blk.5.post_ffw_norm.weight
    F32
    [5120]
  • blk.6
  • blk.6.attn_k.weight
    F16
    [5120, 5120]
  • blk.6.attn_k_norm.weight
    F32
    [5120]
  • blk.6.attn_output.weight
    F16
    [5120, 5120]
  • blk.6.attn_q.weight
    F16
    [5120, 5120]
  • blk.6.attn_q_norm.weight
    F32
    [5120]
  • blk.6.attn_v.weight
    F16
    [5120, 5120]
  • blk.6.ffn_down.weight
    F16
    [13824, 5120]
  • blk.6.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.6.ffn_up.weight
    F16
    [5120, 13824]
  • blk.6.post_attention_norm.weight
    F32
    [5120]
  • blk.6.post_ffw_norm.weight
    F32
    [5120]
  • blk.7
  • blk.7.attn_k.weight
    F16
    [5120, 5120]
  • blk.7.attn_k_norm.weight
    F32
    [5120]
  • blk.7.attn_output.weight
    F16
    [5120, 5120]
  • blk.7.attn_q.weight
    F16
    [5120, 5120]
  • blk.7.attn_q_norm.weight
    F32
    [5120]
  • blk.7.attn_v.weight
    F16
    [5120, 5120]
  • blk.7.ffn_down.weight
    F16
    [13824, 5120]
  • blk.7.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.7.ffn_up.weight
    F16
    [5120, 13824]
  • blk.7.post_attention_norm.weight
    F32
    [5120]
  • blk.7.post_ffw_norm.weight
    F32
    [5120]
  • blk.8
  • blk.8.attn_k.weight
    F16
    [5120, 5120]
  • blk.8.attn_k_norm.weight
    F32
    [5120]
  • blk.8.attn_output.weight
    F16
    [5120, 5120]
  • blk.8.attn_q.weight
    F16
    [5120, 5120]
  • blk.8.attn_q_norm.weight
    F32
    [5120]
  • blk.8.attn_v.weight
    F16
    [5120, 5120]
  • blk.8.ffn_down.weight
    F16
    [13824, 5120]
  • blk.8.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.8.ffn_up.weight
    F16
    [5120, 13824]
  • blk.8.post_attention_norm.weight
    F32
    [5120]
  • blk.8.post_ffw_norm.weight
    F32
    [5120]
  • blk.9
  • blk.9.attn_k.weight
    F16
    [5120, 5120]
  • blk.9.attn_k_norm.weight
    F32
    [5120]
  • blk.9.attn_output.weight
    F16
    [5120, 5120]
  • blk.9.attn_q.weight
    F16
    [5120, 5120]
  • blk.9.attn_q_norm.weight
    F32
    [5120]
  • blk.9.attn_v.weight
    F16
    [5120, 5120]
  • blk.9.ffn_down.weight
    F16
    [13824, 5120]
  • blk.9.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.9.ffn_up.weight
    F16
    [5120, 13824]
  • blk.9.post_attention_norm.weight
    F32
    [5120]
  • blk.9.post_ffw_norm.weight
    F32
    [5120]
  • blk.10
  • blk.10.attn_k.weight
    F16
    [5120, 5120]
  • blk.10.attn_k_norm.weight
    F32
    [5120]
  • blk.10.attn_output.weight
    F16
    [5120, 5120]
  • blk.10.attn_q.weight
    F16
    [5120, 5120]
  • blk.10.attn_q_norm.weight
    F32
    [5120]
  • blk.10.attn_v.weight
    F16
    [5120, 5120]
  • blk.10.ffn_down.weight
    F16
    [13824, 5120]
  • blk.10.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.10.ffn_up.weight
    F16
    [5120, 13824]
  • blk.10.post_attention_norm.weight
    F32
    [5120]
  • blk.10.post_ffw_norm.weight
    F32
    [5120]
  • blk.11
  • blk.11.attn_k.weight
    F16
    [5120, 5120]
  • blk.11.attn_k_norm.weight
    F32
    [5120]
  • blk.11.attn_output.weight
    F16
    [5120, 5120]
  • blk.11.attn_q.weight
    F16
    [5120, 5120]
  • blk.11.attn_q_norm.weight
    F32
    [5120]
  • blk.11.attn_v.weight
    F16
    [5120, 5120]
  • blk.11.ffn_down.weight
    F16
    [13824, 5120]
  • blk.11.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.11.ffn_up.weight
    F16
    [5120, 13824]
  • blk.11.post_attention_norm.weight
    F32
    [5120]
  • blk.11.post_ffw_norm.weight
    F32
    [5120]
  • blk.12
  • blk.12.attn_k.weight
    F16
    [5120, 5120]
  • blk.12.attn_k_norm.weight
    F32
    [5120]
  • blk.12.attn_output.weight
    F16
    [5120, 5120]
  • blk.12.attn_q.weight
    F16
    [5120, 5120]
  • blk.12.attn_q_norm.weight
    F32
    [5120]
  • blk.12.attn_v.weight
    F16
    [5120, 5120]
  • blk.12.ffn_down.weight
    F16
    [13824, 5120]
  • blk.12.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.12.ffn_up.weight
    F16
    [5120, 13824]
  • blk.12.post_attention_norm.weight
    F32
    [5120]
  • blk.12.post_ffw_norm.weight
    F32
    [5120]
  • blk.13
  • blk.13.attn_k.weight
    F16
    [5120, 5120]
  • blk.13.attn_k_norm.weight
    F32
    [5120]
  • blk.13.attn_output.weight
    F16
    [5120, 5120]
  • blk.13.attn_q.weight
    F16
    [5120, 5120]
  • blk.13.attn_q_norm.weight
    F32
    [5120]
  • blk.13.attn_v.weight
    F16
    [5120, 5120]
  • blk.13.ffn_down.weight
    F16
    [13824, 5120]
  • blk.13.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.13.ffn_up.weight
    F16
    [5120, 13824]
  • blk.13.post_attention_norm.weight
    F32
    [5120]
  • blk.13.post_ffw_norm.weight
    F32
    [5120]
  • blk.14
  • blk.14.attn_k.weight
    F16
    [5120, 5120]
  • blk.14.attn_k_norm.weight
    F32
    [5120]
  • blk.14.attn_output.weight
    F16
    [5120, 5120]
  • blk.14.attn_q.weight
    F16
    [5120, 5120]
  • blk.14.attn_q_norm.weight
    F32
    [5120]
  • blk.14.attn_v.weight
    F16
    [5120, 5120]
  • blk.14.ffn_down.weight
    F16
    [13824, 5120]
  • blk.14.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.14.ffn_up.weight
    F16
    [5120, 13824]
  • blk.14.post_attention_norm.weight
    F32
    [5120]
  • blk.14.post_ffw_norm.weight
    F32
    [5120]
  • blk.15
  • blk.15.attn_k.weight
    F16
    [5120, 5120]
  • blk.15.attn_k_norm.weight
    F32
    [5120]
  • blk.15.attn_output.weight
    F16
    [5120, 5120]
  • blk.15.attn_q.weight
    F16
    [5120, 5120]
  • blk.15.attn_q_norm.weight
    F32
    [5120]
  • blk.15.attn_v.weight
    F16
    [5120, 5120]
  • blk.15.ffn_down.weight
    F16
    [13824, 5120]
  • blk.15.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.15.ffn_up.weight
    F16
    [5120, 13824]
  • blk.15.post_attention_norm.weight
    F32
    [5120]
  • blk.15.post_ffw_norm.weight
    F32
    [5120]
  • blk.16
  • blk.16.attn_k.weight
    F16
    [5120, 5120]
  • blk.16.attn_k_norm.weight
    F32
    [5120]
  • blk.16.attn_output.weight
    F16
    [5120, 5120]
  • blk.16.attn_q.weight
    F16
    [5120, 5120]
  • blk.16.attn_q_norm.weight
    F32
    [5120]
  • blk.16.attn_v.weight
    F16
    [5120, 5120]
  • blk.16.ffn_down.weight
    F16
    [13824, 5120]
  • blk.16.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.16.ffn_up.weight
    F16
    [5120, 13824]
  • blk.16.post_attention_norm.weight
    F32
    [5120]
  • blk.16.post_ffw_norm.weight
    F32
    [5120]
  • blk.17
  • blk.17.attn_k.weight
    F16
    [5120, 5120]
  • blk.17.attn_k_norm.weight
    F32
    [5120]
  • blk.17.attn_output.weight
    F16
    [5120, 5120]
  • blk.17.attn_q.weight
    F16
    [5120, 5120]
  • blk.17.attn_q_norm.weight
    F32
    [5120]
  • blk.17.attn_v.weight
    F16
    [5120, 5120]
  • blk.17.ffn_down.weight
    F16
    [13824, 5120]
  • blk.17.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.17.ffn_up.weight
    F16
    [5120, 13824]
  • blk.17.post_attention_norm.weight
    F32
    [5120]
  • blk.17.post_ffw_norm.weight
    F32
    [5120]
  • blk.18
  • blk.18.attn_k.weight
    F16
    [5120, 5120]
  • blk.18.attn_k_norm.weight
    F32
    [5120]
  • blk.18.attn_output.weight
    F16
    [5120, 5120]
  • blk.18.attn_q.weight
    F16
    [5120, 5120]
  • blk.18.attn_q_norm.weight
    F32
    [5120]
  • blk.18.attn_v.weight
    F16
    [5120, 5120]
  • blk.18.ffn_down.weight
    F16
    [13824, 5120]
  • blk.18.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.18.ffn_up.weight
    F16
    [5120, 13824]
  • blk.18.post_attention_norm.weight
    F32
    [5120]
  • blk.18.post_ffw_norm.weight
    F32
    [5120]
  • blk.19
  • blk.19.attn_k.weight
    F16
    [5120, 5120]
  • blk.19.attn_k_norm.weight
    F32
    [5120]
  • blk.19.attn_output.weight
    F16
    [5120, 5120]
  • blk.19.attn_q.weight
    F16
    [5120, 5120]
  • blk.19.attn_q_norm.weight
    F32
    [5120]
  • blk.19.attn_v.weight
    F16
    [5120, 5120]
  • blk.19.ffn_down.weight
    F16
    [13824, 5120]
  • blk.19.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.19.ffn_up.weight
    F16
    [5120, 13824]
  • blk.19.post_attention_norm.weight
    F32
    [5120]
  • blk.19.post_ffw_norm.weight
    F32
    [5120]
  • blk.20
  • blk.20.attn_k.weight
    F16
    [5120, 5120]
  • blk.20.attn_k_norm.weight
    F32
    [5120]
  • blk.20.attn_output.weight
    F16
    [5120, 5120]
  • blk.20.attn_q.weight
    F16
    [5120, 5120]
  • blk.20.attn_q_norm.weight
    F32
    [5120]
  • blk.20.attn_v.weight
    F16
    [5120, 5120]
  • blk.20.ffn_down.weight
    F16
    [13824, 5120]
  • blk.20.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.20.ffn_up.weight
    F16
    [5120, 13824]
  • blk.20.post_attention_norm.weight
    F32
    [5120]
  • blk.20.post_ffw_norm.weight
    F32
    [5120]
  • blk.21
  • blk.21.attn_k.weight
    F16
    [5120, 5120]
  • blk.21.attn_k_norm.weight
    F32
    [5120]
  • blk.21.attn_output.weight
    F16
    [5120, 5120]
  • blk.21.attn_q.weight
    F16
    [5120, 5120]
  • blk.21.attn_q_norm.weight
    F32
    [5120]
  • blk.21.attn_v.weight
    F16
    [5120, 5120]
  • blk.21.ffn_down.weight
    F16
    [13824, 5120]
  • blk.21.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.21.ffn_up.weight
    F16
    [5120, 13824]
  • blk.21.post_attention_norm.weight
    F32
    [5120]
  • blk.21.post_ffw_norm.weight
    F32
    [5120]
  • blk.22
  • blk.22.attn_k.weight
    F16
    [5120, 5120]
  • blk.22.attn_k_norm.weight
    F32
    [5120]
  • blk.22.attn_output.weight
    F16
    [5120, 5120]
  • blk.22.attn_q.weight
    F16
    [5120, 5120]
  • blk.22.attn_q_norm.weight
    F32
    [5120]
  • blk.22.attn_v.weight
    F16
    [5120, 5120]
  • blk.22.ffn_down.weight
    F16
    [13824, 5120]
  • blk.22.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.22.ffn_up.weight
    F16
    [5120, 13824]
  • blk.22.post_attention_norm.weight
    F32
    [5120]
  • blk.22.post_ffw_norm.weight
    F32
    [5120]
  • blk.23
  • blk.23.attn_k.weight
    F16
    [5120, 5120]
  • blk.23.attn_k_norm.weight
    F32
    [5120]
  • blk.23.attn_output.weight
    F16
    [5120, 5120]
  • blk.23.attn_q.weight
    F16
    [5120, 5120]
  • blk.23.attn_q_norm.weight
    F32
    [5120]
  • blk.23.attn_v.weight
    F16
    [5120, 5120]
  • blk.23.ffn_down.weight
    F16
    [13824, 5120]
  • blk.23.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.23.ffn_up.weight
    F16
    [5120, 13824]
  • blk.23.post_attention_norm.weight
    F32
    [5120]
  • blk.23.post_ffw_norm.weight
    F32
    [5120]
  • blk.24
  • blk.24.attn_k.weight
    F16
    [5120, 5120]
  • blk.24.attn_k_norm.weight
    F32
    [5120]
  • blk.24.attn_output.weight
    F16
    [5120, 5120]
  • blk.24.attn_q.weight
    F16
    [5120, 5120]
  • blk.24.attn_q_norm.weight
    F32
    [5120]
  • blk.24.attn_v.weight
    F16
    [5120, 5120]
  • blk.24.ffn_down.weight
    F16
    [13824, 5120]
  • blk.24.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.24.ffn_up.weight
    F16
    [5120, 13824]
  • blk.24.post_attention_norm.weight
    F32
    [5120]
  • blk.24.post_ffw_norm.weight
    F32
    [5120]
  • blk.25
  • blk.25.attn_k.weight
    F16
    [5120, 5120]
  • blk.25.attn_k_norm.weight
    F32
    [5120]
  • blk.25.attn_output.weight
    F16
    [5120, 5120]
  • blk.25.attn_q.weight
    F16
    [5120, 5120]
  • blk.25.attn_q_norm.weight
    F32
    [5120]
  • blk.25.attn_v.weight
    F16
    [5120, 5120]
  • blk.25.ffn_down.weight
    F16
    [13824, 5120]
  • blk.25.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.25.ffn_up.weight
    F16
    [5120, 13824]
  • blk.25.post_attention_norm.weight
    F32
    [5120]
  • blk.25.post_ffw_norm.weight
    F32
    [5120]
  • blk.26
  • blk.26.attn_k.weight
    F16
    [5120, 5120]
  • blk.26.attn_k_norm.weight
    F32
    [5120]
  • blk.26.attn_output.weight
    F16
    [5120, 5120]
  • blk.26.attn_q.weight
    F16
    [5120, 5120]
  • blk.26.attn_q_norm.weight
    F32
    [5120]
  • blk.26.attn_v.weight
    F16
    [5120, 5120]
  • blk.26.ffn_down.weight
    F16
    [13824, 5120]
  • blk.26.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.26.ffn_up.weight
    F16
    [5120, 13824]
  • blk.26.post_attention_norm.weight
    F32
    [5120]
  • blk.26.post_ffw_norm.weight
    F32
    [5120]
  • blk.27
  • blk.27.attn_k.weight
    F16
    [5120, 5120]
  • blk.27.attn_k_norm.weight
    F32
    [5120]
  • blk.27.attn_output.weight
    F16
    [5120, 5120]
  • blk.27.attn_q.weight
    F16
    [5120, 5120]
  • blk.27.attn_q_norm.weight
    F32
    [5120]
  • blk.27.attn_v.weight
    F16
    [5120, 5120]
  • blk.27.ffn_down.weight
    F16
    [13824, 5120]
  • blk.27.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.27.ffn_up.weight
    F16
    [5120, 13824]
  • blk.27.post_attention_norm.weight
    F32
    [5120]
  • blk.27.post_ffw_norm.weight
    F32
    [5120]
  • blk.28
  • blk.28.attn_k.weight
    F16
    [5120, 5120]
  • blk.28.attn_k_norm.weight
    F32
    [5120]
  • blk.28.attn_output.weight
    F16
    [5120, 5120]
  • blk.28.attn_q.weight
    F16
    [5120, 5120]
  • blk.28.attn_q_norm.weight
    F32
    [5120]
  • blk.28.attn_v.weight
    F16
    [5120, 5120]
  • blk.28.ffn_down.weight
    F16
    [13824, 5120]
  • blk.28.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.28.ffn_up.weight
    F16
    [5120, 13824]
  • blk.28.post_attention_norm.weight
    F32
    [5120]
  • blk.28.post_ffw_norm.weight
    F32
    [5120]
  • blk.29
  • blk.29.attn_k.weight
    F16
    [5120, 5120]
  • blk.29.attn_k_norm.weight
    F32
    [5120]
  • blk.29.attn_output.weight
    F16
    [5120, 5120]
  • blk.29.attn_q.weight
    F16
    [5120, 5120]
  • blk.29.attn_q_norm.weight
    F32
    [5120]
  • blk.29.attn_v.weight
    F16
    [5120, 5120]
  • blk.29.ffn_down.weight
    F16
    [13824, 5120]
  • blk.29.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.29.ffn_up.weight
    F16
    [5120, 13824]
  • blk.29.post_attention_norm.weight
    F32
    [5120]
  • blk.29.post_ffw_norm.weight
    F32
    [5120]
  • blk.30
  • blk.30.attn_k.weight
    F16
    [5120, 5120]
  • blk.30.attn_k_norm.weight
    F32
    [5120]
  • blk.30.attn_output.weight
    F16
    [5120, 5120]
  • blk.30.attn_q.weight
    F16
    [5120, 5120]
  • blk.30.attn_q_norm.weight
    F32
    [5120]
  • blk.30.attn_v.weight
    F16
    [5120, 5120]
  • blk.30.ffn_down.weight
    F16
    [13824, 5120]
  • blk.30.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.30.ffn_up.weight
    F16
    [5120, 13824]
  • blk.30.post_attention_norm.weight
    F32
    [5120]
  • blk.30.post_ffw_norm.weight
    F32
    [5120]
  • blk.31
  • blk.31.attn_k.weight
    F16
    [5120, 5120]
  • blk.31.attn_k_norm.weight
    F32
    [5120]
  • blk.31.attn_output.weight
    F16
    [5120, 5120]
  • blk.31.attn_q.weight
    F16
    [5120, 5120]
  • blk.31.attn_q_norm.weight
    F32
    [5120]
  • blk.31.attn_v.weight
    F16
    [5120, 5120]
  • blk.31.ffn_down.weight
    F16
    [13824, 5120]
  • blk.31.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.31.ffn_up.weight
    F16
    [5120, 13824]
  • blk.31.post_attention_norm.weight
    F32
    [5120]
  • blk.31.post_ffw_norm.weight
    F32
    [5120]
  • blk.32
  • blk.32.attn_k.weight
    F16
    [5120, 5120]
  • blk.32.attn_k_norm.weight
    F32
    [5120]
  • blk.32.attn_output.weight
    F16
    [5120, 5120]
  • blk.32.attn_q.weight
    F16
    [5120, 5120]
  • blk.32.attn_q_norm.weight
    F32
    [5120]
  • blk.32.attn_v.weight
    F16
    [5120, 5120]
  • blk.32.ffn_down.weight
    F16
    [13824, 5120]
  • blk.32.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.32.ffn_up.weight
    F16
    [5120, 13824]
  • blk.32.post_attention_norm.weight
    F32
    [5120]
  • blk.32.post_ffw_norm.weight
    F32
    [5120]
  • blk.33
  • blk.33.attn_k.weight
    F16
    [5120, 5120]
  • blk.33.attn_k_norm.weight
    F32
    [5120]
  • blk.33.attn_output.weight
    F16
    [5120, 5120]
  • blk.33.attn_q.weight
    F16
    [5120, 5120]
  • blk.33.attn_q_norm.weight
    F32
    [5120]
  • blk.33.attn_v.weight
    F16
    [5120, 5120]
  • blk.33.ffn_down.weight
    F16
    [13824, 5120]
  • blk.33.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.33.ffn_up.weight
    F16
    [5120, 13824]
  • blk.33.post_attention_norm.weight
    F32
    [5120]
  • blk.33.post_ffw_norm.weight
    F32
    [5120]
  • blk.34
  • blk.34.attn_k.weight
    F16
    [5120, 5120]
  • blk.34.attn_k_norm.weight
    F32
    [5120]
  • blk.34.attn_output.weight
    F16
    [5120, 5120]
  • blk.34.attn_q.weight
    F16
    [5120, 5120]
  • blk.34.attn_q_norm.weight
    F32
    [5120]
  • blk.34.attn_v.weight
    F16
    [5120, 5120]
  • blk.34.ffn_down.weight
    F16
    [13824, 5120]
  • blk.34.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.34.ffn_up.weight
    F16
    [5120, 13824]
  • blk.34.post_attention_norm.weight
    F32
    [5120]
  • blk.34.post_ffw_norm.weight
    F32
    [5120]
  • blk.35
  • blk.35.attn_k.weight
    F16
    [5120, 5120]
  • blk.35.attn_k_norm.weight
    F32
    [5120]
  • blk.35.attn_output.weight
    F16
    [5120, 5120]
  • blk.35.attn_q.weight
    F16
    [5120, 5120]
  • blk.35.attn_q_norm.weight
    F32
    [5120]
  • blk.35.attn_v.weight
    F16
    [5120, 5120]
  • blk.35.ffn_down.weight
    F16
    [13824, 5120]
  • blk.35.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.35.ffn_up.weight
    F16
    [5120, 13824]
  • blk.35.post_attention_norm.weight
    F32
    [5120]
  • blk.35.post_ffw_norm.weight
    F32
    [5120]
  • blk.36
  • blk.36.attn_k.weight
    F16
    [5120, 5120]
  • blk.36.attn_k_norm.weight
    F32
    [5120]
  • blk.36.attn_output.weight
    F16
    [5120, 5120]
  • blk.36.attn_q.weight
    F16
    [5120, 5120]
  • blk.36.attn_q_norm.weight
    F32
    [5120]
  • blk.36.attn_v.weight
    F16
    [5120, 5120]
  • blk.36.ffn_down.weight
    F16
    [13824, 5120]
  • blk.36.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.36.ffn_up.weight
    F16
    [5120, 13824]
  • blk.36.post_attention_norm.weight
    F32
    [5120]
  • blk.36.post_ffw_norm.weight
    F32
    [5120]
  • blk.37
  • blk.37.attn_k.weight
    F16
    [5120, 5120]
  • blk.37.attn_k_norm.weight
    F32
    [5120]
  • blk.37.attn_output.weight
    F16
    [5120, 5120]
  • blk.37.attn_q.weight
    F16
    [5120, 5120]
  • blk.37.attn_q_norm.weight
    F32
    [5120]
  • blk.37.attn_v.weight
    F16
    [5120, 5120]
  • blk.37.ffn_down.weight
    F16
    [13824, 5120]
  • blk.37.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.37.ffn_up.weight
    F16
    [5120, 13824]
  • blk.37.post_attention_norm.weight
    F32
    [5120]
  • blk.37.post_ffw_norm.weight
    F32
    [5120]
  • blk.38
  • blk.38.attn_k.weight
    F16
    [5120, 5120]
  • blk.38.attn_k_norm.weight
    F32
    [5120]
  • blk.38.attn_output.weight
    F16
    [5120, 5120]
  • blk.38.attn_q.weight
    F16
    [5120, 5120]
  • blk.38.attn_q_norm.weight
    F32
    [5120]
  • blk.38.attn_v.weight
    F16
    [5120, 5120]
  • blk.38.ffn_down.weight
    F16
    [13824, 5120]
  • blk.38.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.38.ffn_up.weight
    F16
    [5120, 13824]
  • blk.38.post_attention_norm.weight
    F32
    [5120]
  • blk.38.post_ffw_norm.weight
    F32
    [5120]
  • blk.39
  • blk.39.attn_k.weight
    F16
    [5120, 5120]
  • blk.39.attn_k_norm.weight
    F32
    [5120]
  • blk.39.attn_output.weight
    F16
    [5120, 5120]
  • blk.39.attn_q.weight
    F16
    [5120, 5120]
  • blk.39.attn_q_norm.weight
    F32
    [5120]
  • blk.39.attn_v.weight
    F16
    [5120, 5120]
  • blk.39.ffn_down.weight
    F16
    [13824, 5120]
  • blk.39.ffn_gate.weight
    F16
    [5120, 13824]
  • blk.39.ffn_up.weight
    F16
    [5120, 13824]
  • blk.39.post_attention_norm.weight
    F32
    [5120]
  • blk.39.post_ffw_norm.weight
    F32
    [5120]
  • output.weight
    F16
    [5120, 100352]
  • output_norm.weight
    F32
    [5120]