Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
youtu
/
youtu
:latest
759
Downloads
Updated
1 month ago
Youtu-LLM is a new, small, yet powerful LLM, contains only 1.96B parameters, supports 128k long context, and has native agentic talents. Not yet runnable. Requires Ollama with the latest llama.cpp changes integrated.
Youtu-LLM is a new, small, yet powerful LLM, contains only 1.96B parameters, supports 128k long context, and has native agentic talents. Not yet runnable. Requires Ollama with the latest llama.cpp changes integrated.
Cancel
tools
thinking
2b
youtu:latest
...
/
model
621a3484fa5d · 2.1GB
Metadata
general.architecture
deepseek2
deepseek2
general.file_type
Q8_0
Q8_0
deepseek2.attention.head_count
16
16
deepseek2.attention.head_count_kv
1
1
deepseek2.attention.key_length
576
576
deepseek2.attention.key_length_mla
192
192
deepseek2.attention.kv_lora_rank
512
512
deepseek2.attention.layer_norm_rms_epsilon
1e-06
1e-06
deepseek2.attention.q_lora_rank
1536
1536
deepseek2.attention.value_length
512
512
deepseek2.attention.value_length_mla
128
128
deepseek2.block_count
32
32
deepseek2.context_length
131072
131072
deepseek2.embedding_length
2048
2048
deepseek2.expert_feed_forward_length
6144
6144
deepseek2.expert_shared_count
0
0
deepseek2.feed_forward_length
6144
6144
deepseek2.leading_dense_block_count
32
32
deepseek2.rope.dimension_count
64
64
deepseek2.rope.freq_base
1.6e+06
1.6e+06
deepseek2.vocab_size
128256
128256
tokenizer.ggml.bos_token_id
128000
128000
tokenizer.ggml.eos_token_id
128001
128001
tokenizer.ggml.merges
[Ġ Ġ, ĠĠ ĠĠ, i n, e r, Ġ t, ...]
[Ġ Ġ, ĠĠ ĠĠ, i n, e r, Ġ t, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
128001
128001
tokenizer.ggml.pre
youtu
youtu
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
Tensor
Name
Type
Shape
token_embd.weight
Q8_0
Q8_0
[2048, 128256]
blk.0
blk.0.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.0.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.0.attn_kv_a_norm.weight
F32
F32
[512]
blk.0.attn_norm.weight
F32
F32
[2048]
blk.0.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.0.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.0.attn_q_a_norm.weight
F32
F32
[1536]
blk.0.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.0.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.0.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.0.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.0.ffn_norm.weight
F32
F32
[2048]
blk.0.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.1
blk.1.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.1.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.1.attn_kv_a_norm.weight
F32
F32
[512]
blk.1.attn_norm.weight
F32
F32
[2048]
blk.1.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.1.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.1.attn_q_a_norm.weight
F32
F32
[1536]
blk.1.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.1.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.1.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.1.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.1.ffn_norm.weight
F32
F32
[2048]
blk.1.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.2
blk.2.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.2.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.2.attn_kv_a_norm.weight
F32
F32
[512]
blk.2.attn_norm.weight
F32
F32
[2048]
blk.2.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.2.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.2.attn_q_a_norm.weight
F32
F32
[1536]
blk.2.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.2.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.2.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.2.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.2.ffn_norm.weight
F32
F32
[2048]
blk.2.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.3
blk.3.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.3.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.3.attn_kv_a_norm.weight
F32
F32
[512]
blk.3.attn_norm.weight
F32
F32
[2048]
blk.3.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.3.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.3.attn_q_a_norm.weight
F32
F32
[1536]
blk.3.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.3.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.3.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.3.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.3.ffn_norm.weight
F32
F32
[2048]
blk.3.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.4
blk.4.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.4.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.4.attn_kv_a_norm.weight
F32
F32
[512]
blk.4.attn_norm.weight
F32
F32
[2048]
blk.4.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.4.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.4.attn_q_a_norm.weight
F32
F32
[1536]
blk.4.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.4.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.4.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.4.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.4.ffn_norm.weight
F32
F32
[2048]
blk.4.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.5
blk.5.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.5.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.5.attn_kv_a_norm.weight
F32
F32
[512]
blk.5.attn_norm.weight
F32
F32
[2048]
blk.5.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.5.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.5.attn_q_a_norm.weight
F32
F32
[1536]
blk.5.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.5.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.5.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.5.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.5.ffn_norm.weight
F32
F32
[2048]
blk.5.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.6
blk.6.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.6.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.6.attn_kv_a_norm.weight
F32
F32
[512]
blk.6.attn_norm.weight
F32
F32
[2048]
blk.6.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.6.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.6.attn_q_a_norm.weight
F32
F32
[1536]
blk.6.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.6.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.6.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.6.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.6.ffn_norm.weight
F32
F32
[2048]
blk.6.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.7
blk.7.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.7.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.7.attn_kv_a_norm.weight
F32
F32
[512]
blk.7.attn_norm.weight
F32
F32
[2048]
blk.7.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.7.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.7.attn_q_a_norm.weight
F32
F32
[1536]
blk.7.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.7.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.7.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.7.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.7.ffn_norm.weight
F32
F32
[2048]
blk.7.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.8
blk.8.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.8.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.8.attn_kv_a_norm.weight
F32
F32
[512]
blk.8.attn_norm.weight
F32
F32
[2048]
blk.8.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.8.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.8.attn_q_a_norm.weight
F32
F32
[1536]
blk.8.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.8.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.8.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.8.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.8.ffn_norm.weight
F32
F32
[2048]
blk.8.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.9
blk.9.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.9.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.9.attn_kv_a_norm.weight
F32
F32
[512]
blk.9.attn_norm.weight
F32
F32
[2048]
blk.9.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.9.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.9.attn_q_a_norm.weight
F32
F32
[1536]
blk.9.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.9.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.9.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.9.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.9.ffn_norm.weight
F32
F32
[2048]
blk.9.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.10
blk.10.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.10.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.10.attn_kv_a_norm.weight
F32
F32
[512]
blk.10.attn_norm.weight
F32
F32
[2048]
blk.10.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.10.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.10.attn_q_a_norm.weight
F32
F32
[1536]
blk.10.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.10.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.10.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.10.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.10.ffn_norm.weight
F32
F32
[2048]
blk.10.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.11
blk.11.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.11.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.11.attn_kv_a_norm.weight
F32
F32
[512]
blk.11.attn_norm.weight
F32
F32
[2048]
blk.11.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.11.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.11.attn_q_a_norm.weight
F32
F32
[1536]
blk.11.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.11.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.11.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.11.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.11.ffn_norm.weight
F32
F32
[2048]
blk.11.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.12
blk.12.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.12.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.12.attn_kv_a_norm.weight
F32
F32
[512]
blk.12.attn_norm.weight
F32
F32
[2048]
blk.12.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.12.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.12.attn_q_a_norm.weight
F32
F32
[1536]
blk.12.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.12.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.12.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.12.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.12.ffn_norm.weight
F32
F32
[2048]
blk.12.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.13
blk.13.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.13.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.13.attn_kv_a_norm.weight
F32
F32
[512]
blk.13.attn_norm.weight
F32
F32
[2048]
blk.13.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.13.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.13.attn_q_a_norm.weight
F32
F32
[1536]
blk.13.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.13.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.13.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.13.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.13.ffn_norm.weight
F32
F32
[2048]
blk.13.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.14
blk.14.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.14.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.14.attn_kv_a_norm.weight
F32
F32
[512]
blk.14.attn_norm.weight
F32
F32
[2048]
blk.14.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.14.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.14.attn_q_a_norm.weight
F32
F32
[1536]
blk.14.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.14.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.14.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.14.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.14.ffn_norm.weight
F32
F32
[2048]
blk.14.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.15
blk.15.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.15.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.15.attn_kv_a_norm.weight
F32
F32
[512]
blk.15.attn_norm.weight
F32
F32
[2048]
blk.15.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.15.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.15.attn_q_a_norm.weight
F32
F32
[1536]
blk.15.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.15.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.15.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.15.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.15.ffn_norm.weight
F32
F32
[2048]
blk.15.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.16
blk.16.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.16.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.16.attn_kv_a_norm.weight
F32
F32
[512]
blk.16.attn_norm.weight
F32
F32
[2048]
blk.16.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.16.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.16.attn_q_a_norm.weight
F32
F32
[1536]
blk.16.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.16.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.16.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.16.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.16.ffn_norm.weight
F32
F32
[2048]
blk.16.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.17
blk.17.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.17.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.17.attn_kv_a_norm.weight
F32
F32
[512]
blk.17.attn_norm.weight
F32
F32
[2048]
blk.17.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.17.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.17.attn_q_a_norm.weight
F32
F32
[1536]
blk.17.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.17.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.17.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.17.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.17.ffn_norm.weight
F32
F32
[2048]
blk.17.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.18
blk.18.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.18.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.18.attn_kv_a_norm.weight
F32
F32
[512]
blk.18.attn_norm.weight
F32
F32
[2048]
blk.18.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.18.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.18.attn_q_a_norm.weight
F32
F32
[1536]
blk.18.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.18.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.18.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.18.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.18.ffn_norm.weight
F32
F32
[2048]
blk.18.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.19
blk.19.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.19.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.19.attn_kv_a_norm.weight
F32
F32
[512]
blk.19.attn_norm.weight
F32
F32
[2048]
blk.19.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.19.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.19.attn_q_a_norm.weight
F32
F32
[1536]
blk.19.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.19.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.19.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.19.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.19.ffn_norm.weight
F32
F32
[2048]
blk.19.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.20
blk.20.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.20.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.20.attn_kv_a_norm.weight
F32
F32
[512]
blk.20.attn_norm.weight
F32
F32
[2048]
blk.20.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.20.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.20.attn_q_a_norm.weight
F32
F32
[1536]
blk.20.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.20.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.20.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.20.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.20.ffn_norm.weight
F32
F32
[2048]
blk.20.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.21
blk.21.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.21.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.21.attn_kv_a_norm.weight
F32
F32
[512]
blk.21.attn_norm.weight
F32
F32
[2048]
blk.21.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.21.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.21.attn_q_a_norm.weight
F32
F32
[1536]
blk.21.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.21.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.21.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.21.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.21.ffn_norm.weight
F32
F32
[2048]
blk.21.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.22
blk.22.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.22.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.22.attn_kv_a_norm.weight
F32
F32
[512]
blk.22.attn_norm.weight
F32
F32
[2048]
blk.22.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.22.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.22.attn_q_a_norm.weight
F32
F32
[1536]
blk.22.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.22.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.22.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.22.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.22.ffn_norm.weight
F32
F32
[2048]
blk.22.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.23
blk.23.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.23.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.23.attn_kv_a_norm.weight
F32
F32
[512]
blk.23.attn_norm.weight
F32
F32
[2048]
blk.23.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.23.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.23.attn_q_a_norm.weight
F32
F32
[1536]
blk.23.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.23.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.23.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.23.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.23.ffn_norm.weight
F32
F32
[2048]
blk.23.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.24
blk.24.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.24.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.24.attn_kv_a_norm.weight
F32
F32
[512]
blk.24.attn_norm.weight
F32
F32
[2048]
blk.24.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.24.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.24.attn_q_a_norm.weight
F32
F32
[1536]
blk.24.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.24.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.24.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.24.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.24.ffn_norm.weight
F32
F32
[2048]
blk.24.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.25
blk.25.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.25.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.25.attn_kv_a_norm.weight
F32
F32
[512]
blk.25.attn_norm.weight
F32
F32
[2048]
blk.25.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.25.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.25.attn_q_a_norm.weight
F32
F32
[1536]
blk.25.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.25.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.25.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.25.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.25.ffn_norm.weight
F32
F32
[2048]
blk.25.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.26
blk.26.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.26.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.26.attn_kv_a_norm.weight
F32
F32
[512]
blk.26.attn_norm.weight
F32
F32
[2048]
blk.26.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.26.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.26.attn_q_a_norm.weight
F32
F32
[1536]
blk.26.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.26.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.26.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.26.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.26.ffn_norm.weight
F32
F32
[2048]
blk.26.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.27
blk.27.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.27.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.27.attn_kv_a_norm.weight
F32
F32
[512]
blk.27.attn_norm.weight
F32
F32
[2048]
blk.27.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.27.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.27.attn_q_a_norm.weight
F32
F32
[1536]
blk.27.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.27.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.27.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.27.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.27.ffn_norm.weight
F32
F32
[2048]
blk.27.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.28
blk.28.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.28.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.28.attn_kv_a_norm.weight
F32
F32
[512]
blk.28.attn_norm.weight
F32
F32
[2048]
blk.28.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.28.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.28.attn_q_a_norm.weight
F32
F32
[1536]
blk.28.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.28.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.28.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.28.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.28.ffn_norm.weight
F32
F32
[2048]
blk.28.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.29
blk.29.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.29.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.29.attn_kv_a_norm.weight
F32
F32
[512]
blk.29.attn_norm.weight
F32
F32
[2048]
blk.29.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.29.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.29.attn_q_a_norm.weight
F32
F32
[1536]
blk.29.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.29.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.29.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.29.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.29.ffn_norm.weight
F32
F32
[2048]
blk.29.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.30
blk.30.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.30.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.30.attn_kv_a_norm.weight
F32
F32
[512]
blk.30.attn_norm.weight
F32
F32
[2048]
blk.30.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.30.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.30.attn_q_a_norm.weight
F32
F32
[1536]
blk.30.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.30.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.30.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.30.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.30.ffn_norm.weight
F32
F32
[2048]
blk.30.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
blk.31
blk.31.attn_k_b.weight
Q8_0
Q8_0
[128, 512, 16]
blk.31.attn_kv_a_mqa.weight
Q8_0
Q8_0
[2048, 576]
blk.31.attn_kv_a_norm.weight
F32
F32
[512]
blk.31.attn_norm.weight
F32
F32
[2048]
blk.31.attn_output.weight
Q8_0
Q8_0
[2048, 2048]
blk.31.attn_q_a.weight
Q8_0
Q8_0
[2048, 1536]
blk.31.attn_q_a_norm.weight
F32
F32
[1536]
blk.31.attn_q_b.weight
Q8_0
Q8_0
[1536, 3072]
blk.31.attn_v_b.weight
Q8_0
Q8_0
[512, 128, 16]
blk.31.ffn_down.weight
Q8_0
Q8_0
[6144, 2048]
blk.31.ffn_gate.weight
Q8_0
Q8_0
[2048, 6144]
blk.31.ffn_norm.weight
F32
F32
[2048]
blk.31.ffn_up.weight
Q8_0
Q8_0
[2048, 6144]
output_norm.weight
F32
F32
[2048]