youtu/ youtu:latest

759 1 month ago

Youtu-LLM is a new, small, yet powerful LLM, contains only 1.96B parameters, supports 128k long context, and has native agentic talents. Not yet runnable. Requires Ollama with the latest llama.cpp changes integrated.

tools thinking 2b
621a3484fa5d · 2.1GB
    Metadata
  • general.architecture
    deepseek2
  • general.file_type
    Q8_0
  • deepseek2.attention.head_count
    16
  • deepseek2.attention.head_count_kv
    1
  • deepseek2.attention.key_length
    576
  • deepseek2.attention.key_length_mla
    192
  • deepseek2.attention.kv_lora_rank
    512
  • deepseek2.attention.layer_norm_rms_epsilon
    1e-06
  • deepseek2.attention.q_lora_rank
    1536
  • deepseek2.attention.value_length
    512
  • deepseek2.attention.value_length_mla
    128
  • deepseek2.block_count
    32
  • deepseek2.context_length
    131072
  • deepseek2.embedding_length
    2048
  • deepseek2.expert_feed_forward_length
    6144
  • deepseek2.expert_shared_count
    0
  • deepseek2.feed_forward_length
    6144
  • deepseek2.leading_dense_block_count
    32
  • deepseek2.rope.dimension_count
    64
  • deepseek2.rope.freq_base
    1.6e+06
  • deepseek2.vocab_size
    128256
  • tokenizer.ggml.bos_token_id
    128000
  • tokenizer.ggml.eos_token_id
    128001
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, i n, e r, Ġ t, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    128001
  • tokenizer.ggml.pre
    youtu
  • tokenizer.ggml.token_type
    [1, 1, 1, 1, 1, ...]
  • tokenizer.ggml.tokens
    [!, ", #, $, %, ...]
  • Tensor
  • token_embd.weight
    Q8_0
    [2048, 128256]
  • blk.0
  • blk.0.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.0.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.0.attn_kv_a_norm.weight
    F32
    [512]
  • blk.0.attn_norm.weight
    F32
    [2048]
  • blk.0.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.0.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.0.attn_q_a_norm.weight
    F32
    [1536]
  • blk.0.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.0.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.0.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.0.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.0.ffn_norm.weight
    F32
    [2048]
  • blk.0.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.1
  • blk.1.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.1.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.1.attn_kv_a_norm.weight
    F32
    [512]
  • blk.1.attn_norm.weight
    F32
    [2048]
  • blk.1.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.1.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.1.attn_q_a_norm.weight
    F32
    [1536]
  • blk.1.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.1.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.1.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.1.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.1.ffn_norm.weight
    F32
    [2048]
  • blk.1.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.2
  • blk.2.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.2.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.2.attn_kv_a_norm.weight
    F32
    [512]
  • blk.2.attn_norm.weight
    F32
    [2048]
  • blk.2.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.2.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.2.attn_q_a_norm.weight
    F32
    [1536]
  • blk.2.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.2.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.2.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.2.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.2.ffn_norm.weight
    F32
    [2048]
  • blk.2.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.3
  • blk.3.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.3.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.3.attn_kv_a_norm.weight
    F32
    [512]
  • blk.3.attn_norm.weight
    F32
    [2048]
  • blk.3.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.3.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.3.attn_q_a_norm.weight
    F32
    [1536]
  • blk.3.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.3.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.3.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.3.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.3.ffn_norm.weight
    F32
    [2048]
  • blk.3.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.4
  • blk.4.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.4.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.4.attn_kv_a_norm.weight
    F32
    [512]
  • blk.4.attn_norm.weight
    F32
    [2048]
  • blk.4.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.4.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.4.attn_q_a_norm.weight
    F32
    [1536]
  • blk.4.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.4.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.4.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.4.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.4.ffn_norm.weight
    F32
    [2048]
  • blk.4.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.5
  • blk.5.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.5.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.5.attn_kv_a_norm.weight
    F32
    [512]
  • blk.5.attn_norm.weight
    F32
    [2048]
  • blk.5.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.5.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.5.attn_q_a_norm.weight
    F32
    [1536]
  • blk.5.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.5.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.5.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.5.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.5.ffn_norm.weight
    F32
    [2048]
  • blk.5.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.6
  • blk.6.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.6.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.6.attn_kv_a_norm.weight
    F32
    [512]
  • blk.6.attn_norm.weight
    F32
    [2048]
  • blk.6.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.6.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.6.attn_q_a_norm.weight
    F32
    [1536]
  • blk.6.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.6.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.6.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.6.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.6.ffn_norm.weight
    F32
    [2048]
  • blk.6.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.7
  • blk.7.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.7.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.7.attn_kv_a_norm.weight
    F32
    [512]
  • blk.7.attn_norm.weight
    F32
    [2048]
  • blk.7.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.7.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.7.attn_q_a_norm.weight
    F32
    [1536]
  • blk.7.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.7.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.7.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.7.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.7.ffn_norm.weight
    F32
    [2048]
  • blk.7.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.8
  • blk.8.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.8.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.8.attn_kv_a_norm.weight
    F32
    [512]
  • blk.8.attn_norm.weight
    F32
    [2048]
  • blk.8.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.8.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.8.attn_q_a_norm.weight
    F32
    [1536]
  • blk.8.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.8.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.8.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.8.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.8.ffn_norm.weight
    F32
    [2048]
  • blk.8.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.9
  • blk.9.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.9.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.9.attn_kv_a_norm.weight
    F32
    [512]
  • blk.9.attn_norm.weight
    F32
    [2048]
  • blk.9.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.9.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.9.attn_q_a_norm.weight
    F32
    [1536]
  • blk.9.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.9.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.9.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.9.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.9.ffn_norm.weight
    F32
    [2048]
  • blk.9.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.10
  • blk.10.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.10.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.10.attn_kv_a_norm.weight
    F32
    [512]
  • blk.10.attn_norm.weight
    F32
    [2048]
  • blk.10.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.10.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.10.attn_q_a_norm.weight
    F32
    [1536]
  • blk.10.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.10.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.10.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.10.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.10.ffn_norm.weight
    F32
    [2048]
  • blk.10.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.11
  • blk.11.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.11.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.11.attn_kv_a_norm.weight
    F32
    [512]
  • blk.11.attn_norm.weight
    F32
    [2048]
  • blk.11.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.11.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.11.attn_q_a_norm.weight
    F32
    [1536]
  • blk.11.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.11.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.11.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.11.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.11.ffn_norm.weight
    F32
    [2048]
  • blk.11.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.12
  • blk.12.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.12.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.12.attn_kv_a_norm.weight
    F32
    [512]
  • blk.12.attn_norm.weight
    F32
    [2048]
  • blk.12.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.12.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.12.attn_q_a_norm.weight
    F32
    [1536]
  • blk.12.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.12.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.12.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.12.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.12.ffn_norm.weight
    F32
    [2048]
  • blk.12.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.13
  • blk.13.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.13.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.13.attn_kv_a_norm.weight
    F32
    [512]
  • blk.13.attn_norm.weight
    F32
    [2048]
  • blk.13.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.13.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.13.attn_q_a_norm.weight
    F32
    [1536]
  • blk.13.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.13.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.13.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.13.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.13.ffn_norm.weight
    F32
    [2048]
  • blk.13.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.14
  • blk.14.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.14.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.14.attn_kv_a_norm.weight
    F32
    [512]
  • blk.14.attn_norm.weight
    F32
    [2048]
  • blk.14.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.14.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.14.attn_q_a_norm.weight
    F32
    [1536]
  • blk.14.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.14.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.14.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.14.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.14.ffn_norm.weight
    F32
    [2048]
  • blk.14.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.15
  • blk.15.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.15.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.15.attn_kv_a_norm.weight
    F32
    [512]
  • blk.15.attn_norm.weight
    F32
    [2048]
  • blk.15.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.15.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.15.attn_q_a_norm.weight
    F32
    [1536]
  • blk.15.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.15.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.15.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.15.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.15.ffn_norm.weight
    F32
    [2048]
  • blk.15.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.16
  • blk.16.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.16.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.16.attn_kv_a_norm.weight
    F32
    [512]
  • blk.16.attn_norm.weight
    F32
    [2048]
  • blk.16.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.16.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.16.attn_q_a_norm.weight
    F32
    [1536]
  • blk.16.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.16.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.16.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.16.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.16.ffn_norm.weight
    F32
    [2048]
  • blk.16.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.17
  • blk.17.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.17.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.17.attn_kv_a_norm.weight
    F32
    [512]
  • blk.17.attn_norm.weight
    F32
    [2048]
  • blk.17.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.17.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.17.attn_q_a_norm.weight
    F32
    [1536]
  • blk.17.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.17.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.17.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.17.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.17.ffn_norm.weight
    F32
    [2048]
  • blk.17.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.18
  • blk.18.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.18.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.18.attn_kv_a_norm.weight
    F32
    [512]
  • blk.18.attn_norm.weight
    F32
    [2048]
  • blk.18.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.18.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.18.attn_q_a_norm.weight
    F32
    [1536]
  • blk.18.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.18.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.18.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.18.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.18.ffn_norm.weight
    F32
    [2048]
  • blk.18.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.19
  • blk.19.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.19.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.19.attn_kv_a_norm.weight
    F32
    [512]
  • blk.19.attn_norm.weight
    F32
    [2048]
  • blk.19.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.19.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.19.attn_q_a_norm.weight
    F32
    [1536]
  • blk.19.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.19.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.19.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.19.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.19.ffn_norm.weight
    F32
    [2048]
  • blk.19.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.20
  • blk.20.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.20.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.20.attn_kv_a_norm.weight
    F32
    [512]
  • blk.20.attn_norm.weight
    F32
    [2048]
  • blk.20.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.20.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.20.attn_q_a_norm.weight
    F32
    [1536]
  • blk.20.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.20.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.20.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.20.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.20.ffn_norm.weight
    F32
    [2048]
  • blk.20.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.21
  • blk.21.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.21.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.21.attn_kv_a_norm.weight
    F32
    [512]
  • blk.21.attn_norm.weight
    F32
    [2048]
  • blk.21.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.21.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.21.attn_q_a_norm.weight
    F32
    [1536]
  • blk.21.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.21.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.21.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.21.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.21.ffn_norm.weight
    F32
    [2048]
  • blk.21.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.22
  • blk.22.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.22.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.22.attn_kv_a_norm.weight
    F32
    [512]
  • blk.22.attn_norm.weight
    F32
    [2048]
  • blk.22.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.22.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.22.attn_q_a_norm.weight
    F32
    [1536]
  • blk.22.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.22.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.22.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.22.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.22.ffn_norm.weight
    F32
    [2048]
  • blk.22.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.23
  • blk.23.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.23.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.23.attn_kv_a_norm.weight
    F32
    [512]
  • blk.23.attn_norm.weight
    F32
    [2048]
  • blk.23.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.23.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.23.attn_q_a_norm.weight
    F32
    [1536]
  • blk.23.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.23.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.23.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.23.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.23.ffn_norm.weight
    F32
    [2048]
  • blk.23.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.24
  • blk.24.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.24.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.24.attn_kv_a_norm.weight
    F32
    [512]
  • blk.24.attn_norm.weight
    F32
    [2048]
  • blk.24.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.24.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.24.attn_q_a_norm.weight
    F32
    [1536]
  • blk.24.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.24.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.24.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.24.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.24.ffn_norm.weight
    F32
    [2048]
  • blk.24.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.25
  • blk.25.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.25.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.25.attn_kv_a_norm.weight
    F32
    [512]
  • blk.25.attn_norm.weight
    F32
    [2048]
  • blk.25.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.25.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.25.attn_q_a_norm.weight
    F32
    [1536]
  • blk.25.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.25.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.25.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.25.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.25.ffn_norm.weight
    F32
    [2048]
  • blk.25.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.26
  • blk.26.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.26.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.26.attn_kv_a_norm.weight
    F32
    [512]
  • blk.26.attn_norm.weight
    F32
    [2048]
  • blk.26.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.26.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.26.attn_q_a_norm.weight
    F32
    [1536]
  • blk.26.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.26.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.26.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.26.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.26.ffn_norm.weight
    F32
    [2048]
  • blk.26.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.27
  • blk.27.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.27.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.27.attn_kv_a_norm.weight
    F32
    [512]
  • blk.27.attn_norm.weight
    F32
    [2048]
  • blk.27.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.27.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.27.attn_q_a_norm.weight
    F32
    [1536]
  • blk.27.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.27.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.27.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.27.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.27.ffn_norm.weight
    F32
    [2048]
  • blk.27.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.28
  • blk.28.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.28.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.28.attn_kv_a_norm.weight
    F32
    [512]
  • blk.28.attn_norm.weight
    F32
    [2048]
  • blk.28.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.28.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.28.attn_q_a_norm.weight
    F32
    [1536]
  • blk.28.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.28.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.28.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.28.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.28.ffn_norm.weight
    F32
    [2048]
  • blk.28.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.29
  • blk.29.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.29.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.29.attn_kv_a_norm.weight
    F32
    [512]
  • blk.29.attn_norm.weight
    F32
    [2048]
  • blk.29.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.29.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.29.attn_q_a_norm.weight
    F32
    [1536]
  • blk.29.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.29.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.29.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.29.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.29.ffn_norm.weight
    F32
    [2048]
  • blk.29.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.30
  • blk.30.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.30.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.30.attn_kv_a_norm.weight
    F32
    [512]
  • blk.30.attn_norm.weight
    F32
    [2048]
  • blk.30.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.30.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.30.attn_q_a_norm.weight
    F32
    [1536]
  • blk.30.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.30.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.30.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.30.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.30.ffn_norm.weight
    F32
    [2048]
  • blk.30.ffn_up.weight
    Q8_0
    [2048, 6144]
  • blk.31
  • blk.31.attn_k_b.weight
    Q8_0
    [128, 512, 16]
  • blk.31.attn_kv_a_mqa.weight
    Q8_0
    [2048, 576]
  • blk.31.attn_kv_a_norm.weight
    F32
    [512]
  • blk.31.attn_norm.weight
    F32
    [2048]
  • blk.31.attn_output.weight
    Q8_0
    [2048, 2048]
  • blk.31.attn_q_a.weight
    Q8_0
    [2048, 1536]
  • blk.31.attn_q_a_norm.weight
    F32
    [1536]
  • blk.31.attn_q_b.weight
    Q8_0
    [1536, 3072]
  • blk.31.attn_v_b.weight
    Q8_0
    [512, 128, 16]
  • blk.31.ffn_down.weight
    Q8_0
    [6144, 2048]
  • blk.31.ffn_gate.weight
    Q8_0
    [2048, 6144]
  • blk.31.ffn_norm.weight
    F32
    [2048]
  • blk.31.ffn_up.weight
    Q8_0
    [2048, 6144]
  • output_norm.weight
    F32
    [2048]