Qwen 2.5 with Unsloth
0.5b
1.5b
3b
7b
14b
146 Pulls Updated 7 days ago
fc4bc5fff654 · 8.6GB
-
general.architectureqwen2
-
general.base_model.0.nameQwen2.5 14B
-
general.base_model.0.organizationQwen
-
general.base_model.0.repo_urlhttps://huggingface.co/Qwen/Qwen2.5-14B
-
general.base_model.count1
-
general.basenameQwen2.5
-
general.file_type25
-
general.languages[en]
-
general.licenseapache-2.0
-
general.nameQwen2.5 14B
-
general.organizationQwen
-
general.quantization_version2
-
general.size_label14B
-
general.tags[unsloth, transformers]
-
general.typemodel
-
qwen2.attention.head_count40
-
qwen2.attention.head_count_kv8
-
qwen2.attention.layer_norm_rms_epsilon1e-05
-
qwen2.block_count48
-
qwen2.context_length131072
-
qwen2.embedding_length5120
-
qwen2.feed_forward_length13824
-
qwen2.rope.freq_base1e+06
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.bos_token_id151643
-
tokenizer.ggml.eos_token_id151643
-
tokenizer.ggml.merges[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id151665
-
tokenizer.ggml.preqwen2
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 20!)[5120, 152064]
-
blk.0.attn_k.biasF32[1024]
-
blk.0.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.0.attn_norm.weightF32[5120]
-
blk.0.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.0.attn_q.biasF32[5120]
-
blk.0.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.0.attn_v.biasF32[1024]
-
blk.0.attn_v.weightQ5_K[5120, 1024]
-
blk.0.ffn_down.weightQ5_K[13824, 5120]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.0.ffn_norm.weightF32[5120]
-
blk.0.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.1.attn_k.biasF32[1024]
-
blk.1.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.1.attn_norm.weightF32[5120]
-
blk.1.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.1.attn_q.biasF32[5120]
-
blk.1.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.1.attn_v.biasF32[1024]
-
blk.1.attn_v.weightQ5_K[5120, 1024]
-
blk.1.ffn_down.weightQ5_K[13824, 5120]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.1.ffn_norm.weightF32[5120]
-
blk.1.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.2.attn_k.biasF32[1024]
-
blk.2.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.2.attn_norm.weightF32[5120]
-
blk.2.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.2.attn_q.biasF32[5120]
-
blk.2.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.2.attn_v.biasF32[1024]
-
blk.2.attn_v.weightQ5_K[5120, 1024]
-
blk.2.ffn_down.weightQ5_K[13824, 5120]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.2.ffn_norm.weightF32[5120]
-
blk.2.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.3.attn_k.biasF32[1024]
-
blk.3.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.3.attn_norm.weightF32[5120]
-
blk.3.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.3.attn_q.biasF32[5120]
-
blk.3.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.3.attn_v.biasF32[1024]
-
blk.3.attn_v.weightQ5_K[5120, 1024]
-
blk.3.ffn_down.weightQ5_K[13824, 5120]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.3.ffn_norm.weightF32[5120]
-
blk.3.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.4.attn_k.biasF32[1024]
-
blk.4.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.4.attn_norm.weightF32[5120]
-
blk.4.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.4.attn_q.biasF32[5120]
-
blk.4.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.4.attn_v.biasF32[1024]
-
blk.4.attn_v.weightQ5_K[5120, 1024]
-
blk.4.ffn_down.weightQ5_K[13824, 5120]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.4.ffn_norm.weightF32[5120]
-
blk.4.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.5.attn_k.biasF32[1024]
-
blk.5.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.5.attn_norm.weightF32[5120]
-
blk.5.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.5.attn_q.biasF32[5120]
-
blk.5.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.5.attn_v.biasF32[1024]
-
blk.5.attn_v.weightQ5_K[5120, 1024]
-
blk.5.ffn_down.weightQ5_K[13824, 5120]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.5.ffn_norm.weightF32[5120]
-
blk.5.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.6.attn_k.biasF32[1024]
-
blk.6.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.6.attn_norm.weightF32[5120]
-
blk.6.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.6.attn_q.biasF32[5120]
-
blk.6.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.6.attn_v.biasF32[1024]
-
blk.6.attn_v.weightQ5_K[5120, 1024]
-
blk.6.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.6.ffn_norm.weightF32[5120]
-
blk.6.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.7.attn_k.biasF32[1024]
-
blk.7.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.7.attn_norm.weightF32[5120]
-
blk.7.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.7.attn_q.biasF32[5120]
-
blk.7.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.7.attn_v.biasF32[1024]
-
blk.7.attn_v.weightQ5_K[5120, 1024]
-
blk.7.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.7.ffn_norm.weightF32[5120]
-
blk.7.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.8.attn_k.biasF32[1024]
-
blk.8.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.8.attn_norm.weightF32[5120]
-
blk.8.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.8.attn_q.biasF32[5120]
-
blk.8.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.8.attn_v.biasF32[1024]
-
blk.8.attn_v.weightQ5_K[5120, 1024]
-
blk.8.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.8.ffn_norm.weightF32[5120]
-
blk.8.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.9.attn_k.biasF32[1024]
-
blk.9.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.9.attn_norm.weightF32[5120]
-
blk.9.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.9.attn_q.biasF32[5120]
-
blk.9.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.9.attn_v.biasF32[1024]
-
blk.9.attn_v.weightQ5_K[5120, 1024]
-
blk.9.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.9.ffn_norm.weightF32[5120]
-
blk.9.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.10.attn_k.biasF32[1024]
-
blk.10.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.10.attn_norm.weightF32[5120]
-
blk.10.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.10.attn_q.biasF32[5120]
-
blk.10.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.10.attn_v.biasF32[1024]
-
blk.10.attn_v.weightQ5_K[5120, 1024]
-
blk.10.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.10.ffn_norm.weightF32[5120]
-
blk.10.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.11.attn_k.biasF32[1024]
-
blk.11.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.11.attn_norm.weightF32[5120]
-
blk.11.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.11.attn_q.biasF32[5120]
-
blk.11.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.11.attn_v.biasF32[1024]
-
blk.11.attn_v.weightQ5_K[5120, 1024]
-
blk.11.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.11.ffn_norm.weightF32[5120]
-
blk.11.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.12.attn_k.biasF32[1024]
-
blk.12.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.12.attn_norm.weightF32[5120]
-
blk.12.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.12.attn_q.biasF32[5120]
-
blk.12.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.12.attn_v.biasF32[1024]
-
blk.12.attn_v.weightQ5_K[5120, 1024]
-
blk.12.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.12.ffn_norm.weightF32[5120]
-
blk.12.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.13.attn_k.biasF32[1024]
-
blk.13.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.13.attn_norm.weightF32[5120]
-
blk.13.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.13.attn_q.biasF32[5120]
-
blk.13.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.13.attn_v.biasF32[1024]
-
blk.13.attn_v.weightQ5_K[5120, 1024]
-
blk.13.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.13.ffn_norm.weightF32[5120]
-
blk.13.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.14.attn_k.biasF32[1024]
-
blk.14.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.14.attn_norm.weightF32[5120]
-
blk.14.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.14.attn_q.biasF32[5120]
-
blk.14.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.14.attn_v.biasF32[1024]
-
blk.14.attn_v.weightQ5_K[5120, 1024]
-
blk.14.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.14.ffn_norm.weightF32[5120]
-
blk.14.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.15.attn_k.biasF32[1024]
-
blk.15.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.15.attn_norm.weightF32[5120]
-
blk.15.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.15.attn_q.biasF32[5120]
-
blk.15.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.15.attn_v.biasF32[1024]
-
blk.15.attn_v.weightQ5_K[5120, 1024]
-
blk.15.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.15.ffn_norm.weightF32[5120]
-
blk.15.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.16.attn_k.biasF32[1024]
-
blk.16.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.16.attn_norm.weightF32[5120]
-
blk.16.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.16.attn_q.biasF32[5120]
-
blk.16.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.16.attn_v.biasF32[1024]
-
blk.16.attn_v.weightQ5_K[5120, 1024]
-
blk.16.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.16.ffn_norm.weightF32[5120]
-
blk.16.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.17.attn_k.biasF32[1024]
-
blk.17.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.17.attn_norm.weightF32[5120]
-
blk.17.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.17.attn_q.biasF32[5120]
-
blk.17.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.17.attn_v.biasF32[1024]
-
blk.17.attn_v.weightQ5_K[5120, 1024]
-
blk.17.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.17.ffn_norm.weightF32[5120]
-
blk.17.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.18.attn_k.biasF32[1024]
-
blk.18.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.18.attn_norm.weightF32[5120]
-
blk.18.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.18.attn_q.biasF32[5120]
-
blk.18.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.18.attn_v.biasF32[1024]
-
blk.18.attn_v.weightQ5_K[5120, 1024]
-
blk.18.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.18.ffn_norm.weightF32[5120]
-
blk.18.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.19.attn_k.biasF32[1024]
-
blk.19.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.19.attn_norm.weightF32[5120]
-
blk.19.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.19.attn_q.biasF32[5120]
-
blk.19.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.19.attn_v.biasF32[1024]
-
blk.19.attn_v.weightQ5_K[5120, 1024]
-
blk.19.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.19.ffn_norm.weightF32[5120]
-
blk.19.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.20.attn_k.biasF32[1024]
-
blk.20.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.20.attn_norm.weightF32[5120]
-
blk.20.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.20.attn_q.biasF32[5120]
-
blk.20.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.20.attn_v.biasF32[1024]
-
blk.20.attn_v.weightQ5_K[5120, 1024]
-
blk.20.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.20.ffn_norm.weightF32[5120]
-
blk.20.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.21.attn_k.biasF32[1024]
-
blk.21.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.21.attn_norm.weightF32[5120]
-
blk.21.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.21.attn_q.biasF32[5120]
-
blk.21.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.21.attn_v.biasF32[1024]
-
blk.21.attn_v.weightQ5_K[5120, 1024]
-
blk.21.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.21.ffn_norm.weightF32[5120]
-
blk.21.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.22.attn_k.biasF32[1024]
-
blk.22.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.22.attn_norm.weightF32[5120]
-
blk.22.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.22.attn_q.biasF32[5120]
-
blk.22.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.22.attn_v.biasF32[1024]
-
blk.22.attn_v.weightQ5_K[5120, 1024]
-
blk.22.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.22.ffn_norm.weightF32[5120]
-
blk.22.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.23.attn_k.biasF32[1024]
-
blk.23.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.23.attn_norm.weightF32[5120]
-
blk.23.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.23.attn_q.biasF32[5120]
-
blk.23.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.23.attn_v.biasF32[1024]
-
blk.23.attn_v.weightQ5_K[5120, 1024]
-
blk.23.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.23.ffn_norm.weightF32[5120]
-
blk.23.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.24.attn_k.biasF32[1024]
-
blk.24.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.24.attn_norm.weightF32[5120]
-
blk.24.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.24.attn_q.biasF32[5120]
-
blk.24.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.24.attn_v.biasF32[1024]
-
blk.24.attn_v.weightQ5_K[5120, 1024]
-
blk.24.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.24.ffn_norm.weightF32[5120]
-
blk.24.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.25.attn_k.biasF32[1024]
-
blk.25.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.25.attn_norm.weightF32[5120]
-
blk.25.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.25.attn_q.biasF32[5120]
-
blk.25.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.25.attn_v.biasF32[1024]
-
blk.25.attn_v.weightQ5_K[5120, 1024]
-
blk.25.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.25.ffn_norm.weightF32[5120]
-
blk.25.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.26.attn_k.biasF32[1024]
-
blk.26.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.26.attn_norm.weightF32[5120]
-
blk.26.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.26.attn_q.biasF32[5120]
-
blk.26.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.26.attn_v.biasF32[1024]
-
blk.26.attn_v.weightQ5_K[5120, 1024]
-
blk.26.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.26.ffn_norm.weightF32[5120]
-
blk.26.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.27.attn_k.biasF32[1024]
-
blk.27.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.27.attn_norm.weightF32[5120]
-
blk.27.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.27.attn_q.biasF32[5120]
-
blk.27.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.27.attn_v.biasF32[1024]
-
blk.27.attn_v.weightQ5_K[5120, 1024]
-
blk.27.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.27.ffn_norm.weightF32[5120]
-
blk.27.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.28.attn_k.biasF32[1024]
-
blk.28.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.28.attn_norm.weightF32[5120]
-
blk.28.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.28.attn_q.biasF32[5120]
-
blk.28.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.28.attn_v.biasF32[1024]
-
blk.28.attn_v.weightQ5_K[5120, 1024]
-
blk.28.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.28.ffn_norm.weightF32[5120]
-
blk.28.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.29.attn_k.biasF32[1024]
-
blk.29.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.29.attn_norm.weightF32[5120]
-
blk.29.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.29.attn_q.biasF32[5120]
-
blk.29.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.29.attn_v.biasF32[1024]
-
blk.29.attn_v.weightQ5_K[5120, 1024]
-
blk.29.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.29.ffn_norm.weightF32[5120]
-
blk.29.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.30.attn_k.biasF32[1024]
-
blk.30.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.30.attn_norm.weightF32[5120]
-
blk.30.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.30.attn_q.biasF32[5120]
-
blk.30.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.30.attn_v.biasF32[1024]
-
blk.30.attn_v.weightQ5_K[5120, 1024]
-
blk.30.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.30.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.30.ffn_norm.weightF32[5120]
-
blk.30.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.31.attn_k.biasF32[1024]
-
blk.31.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.31.attn_norm.weightF32[5120]
-
blk.31.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.31.attn_q.biasF32[5120]
-
blk.31.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.31.attn_v.biasF32[1024]
-
blk.31.attn_v.weightQ5_K[5120, 1024]
-
blk.31.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.31.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.31.ffn_norm.weightF32[5120]
-
blk.31.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.32.attn_k.biasF32[1024]
-
blk.32.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.32.attn_norm.weightF32[5120]
-
blk.32.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.32.attn_q.biasF32[5120]
-
blk.32.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.32.attn_v.biasF32[1024]
-
blk.32.attn_v.weightQ5_K[5120, 1024]
-
blk.32.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.32.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.32.ffn_norm.weightF32[5120]
-
blk.32.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.33.attn_k.biasF32[1024]
-
blk.33.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.33.attn_norm.weightF32[5120]
-
blk.33.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.33.attn_q.biasF32[5120]
-
blk.33.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.33.attn_v.biasF32[1024]
-
blk.33.attn_v.weightQ5_K[5120, 1024]
-
blk.33.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.33.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.33.ffn_norm.weightF32[5120]
-
blk.33.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.34.attn_k.biasF32[1024]
-
blk.34.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.34.attn_norm.weightF32[5120]
-
blk.34.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.34.attn_q.biasF32[5120]
-
blk.34.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.34.attn_v.biasF32[1024]
-
blk.34.attn_v.weightQ5_K[5120, 1024]
-
blk.34.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.34.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.34.ffn_norm.weightF32[5120]
-
blk.34.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.35.attn_k.biasF32[1024]
-
blk.35.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.35.attn_norm.weightF32[5120]
-
blk.35.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.35.attn_q.biasF32[5120]
-
blk.35.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.35.attn_v.biasF32[1024]
-
blk.35.attn_v.weightQ5_K[5120, 1024]
-
blk.35.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.35.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.35.ffn_norm.weightF32[5120]
-
blk.35.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.36.attn_k.biasF32[1024]
-
blk.36.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.36.attn_norm.weightF32[5120]
-
blk.36.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.36.attn_q.biasF32[5120]
-
blk.36.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.36.attn_v.biasF32[1024]
-
blk.36.attn_v.weightQ5_K[5120, 1024]
-
blk.36.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.36.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.36.ffn_norm.weightF32[5120]
-
blk.36.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.37.attn_k.biasF32[1024]
-
blk.37.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.37.attn_norm.weightF32[5120]
-
blk.37.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.37.attn_q.biasF32[5120]
-
blk.37.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.37.attn_v.biasF32[1024]
-
blk.37.attn_v.weightQ5_K[5120, 1024]
-
blk.37.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.37.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.37.ffn_norm.weightF32[5120]
-
blk.37.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.38.attn_k.biasF32[1024]
-
blk.38.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.38.attn_norm.weightF32[5120]
-
blk.38.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.38.attn_q.biasF32[5120]
-
blk.38.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.38.attn_v.biasF32[1024]
-
blk.38.attn_v.weightQ5_K[5120, 1024]
-
blk.38.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.38.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.38.ffn_norm.weightF32[5120]
-
blk.38.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.39.attn_k.biasF32[1024]
-
blk.39.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.39.attn_norm.weightF32[5120]
-
blk.39.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.39.attn_q.biasF32[5120]
-
blk.39.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.39.attn_v.biasF32[1024]
-
blk.39.attn_v.weightQ5_K[5120, 1024]
-
blk.39.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.39.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.39.ffn_norm.weightF32[5120]
-
blk.39.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.40.attn_k.biasF32[1024]
-
blk.40.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.40.attn_norm.weightF32[5120]
-
blk.40.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.40.attn_q.biasF32[5120]
-
blk.40.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.40.attn_v.biasF32[1024]
-
blk.40.attn_v.weightQ5_K[5120, 1024]
-
blk.40.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.40.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.40.ffn_norm.weightF32[5120]
-
blk.40.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.41.attn_k.biasF32[1024]
-
blk.41.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.41.attn_norm.weightF32[5120]
-
blk.41.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.41.attn_q.biasF32[5120]
-
blk.41.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.41.attn_v.biasF32[1024]
-
blk.41.attn_v.weightQ5_K[5120, 1024]
-
blk.41.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.41.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.41.ffn_norm.weightF32[5120]
-
blk.41.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.42.attn_k.biasF32[1024]
-
blk.42.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.42.attn_norm.weightF32[5120]
-
blk.42.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.42.attn_q.biasF32[5120]
-
blk.42.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.42.attn_v.biasF32[1024]
-
blk.42.attn_v.weightQ5_K[5120, 1024]
-
blk.42.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.42.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.42.ffn_norm.weightF32[5120]
-
blk.42.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.43.attn_k.biasF32[1024]
-
blk.43.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.43.attn_norm.weightF32[5120]
-
blk.43.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.43.attn_q.biasF32[5120]
-
blk.43.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.43.attn_v.biasF32[1024]
-
blk.43.attn_v.weightQ5_K[5120, 1024]
-
blk.43.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.43.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.43.ffn_norm.weightF32[5120]
-
blk.43.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.44.attn_k.biasF32[1024]
-
blk.44.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.44.attn_norm.weightF32[5120]
-
blk.44.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.44.attn_q.biasF32[5120]
-
blk.44.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.44.attn_v.biasF32[1024]
-
blk.44.attn_v.weightQ5_K[5120, 1024]
-
blk.44.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.44.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.44.ffn_norm.weightF32[5120]
-
blk.44.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.45.attn_k.biasF32[1024]
-
blk.45.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.45.attn_norm.weightF32[5120]
-
blk.45.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.45.attn_q.biasF32[5120]
-
blk.45.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.45.attn_v.biasF32[1024]
-
blk.45.attn_v.weightQ5_K[5120, 1024]
-
blk.45.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.45.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.45.ffn_norm.weightF32[5120]
-
blk.45.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.46.attn_k.biasF32[1024]
-
blk.46.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.46.attn_norm.weightF32[5120]
-
blk.46.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.46.attn_q.biasF32[5120]
-
blk.46.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.46.attn_v.biasF32[1024]
-
blk.46.attn_v.weightQ5_K[5120, 1024]
-
blk.46.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.46.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.46.ffn_norm.weightF32[5120]
-
blk.46.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
blk.47.attn_k.biasF32[1024]
-
blk.47.attn_k.weight(!unknown_type 20!)[5120, 1024]
-
blk.47.attn_norm.weightF32[5120]
-
blk.47.attn_output.weight(!unknown_type 20!)[5120, 5120]
-
blk.47.attn_q.biasF32[5120]
-
blk.47.attn_q.weight(!unknown_type 20!)[5120, 5120]
-
blk.47.attn_v.biasF32[1024]
-
blk.47.attn_v.weightQ5_K[5120, 1024]
-
blk.47.ffn_down.weight(!unknown_type 20!)[13824, 5120]
-
blk.47.ffn_gate.weight(!unknown_type 20!)[5120, 13824]
-
blk.47.ffn_norm.weightF32[5120]
-
blk.47.ffn_up.weight(!unknown_type 20!)[5120, 13824]
-
output.weightQ6_K[5120, 152064]
-
output_norm.weightF32[5120]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41
blk.42
blk.43
blk.44
blk.45
blk.46
blk.47