Llama 3.2 with Unsloth
1b
3b
84 Pulls Updated 5 days ago
2edba47be4bf · 1.5GB
-
general.architectureqwen2
-
general.base_model.0.nameQwen2.5 3B Instruct
-
general.base_model.0.organizationQwen
-
general.base_model.0.repo_urlhttps://huggingface.co/Qwen/Qwen2.5-3B-Instruct
-
general.base_model.count1
-
general.basenameQwen2.5
-
general.file_type27
-
general.finetuneInstruct
-
general.languages[en]
-
general.licenseother
-
general.nameQwen2.5 3B Instruct
-
general.organizationQwen
-
general.quantization_version2
-
general.size_label3B
-
general.tags[unsloth, transformers]
-
general.typemodel
-
qwen2.attention.head_count16
-
qwen2.attention.head_count_kv2
-
qwen2.attention.layer_norm_rms_epsilon1e-06
-
qwen2.block_count36
-
qwen2.context_length32768
-
qwen2.embedding_length2048
-
qwen2.feed_forward_length11008
-
qwen2.rope.freq_base1e+06
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.bos_token_id151643
-
tokenizer.ggml.eos_token_id151645
-
tokenizer.ggml.merges[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id151665
-
tokenizer.ggml.preqwen2
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weightQ6_K[2048, 151936]
-
blk.0.attn_k.biasF32[256]
-
blk.0.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.0.attn_norm.weightF32[2048]
-
blk.0.attn_output.weightQ4_K[2048, 2048]
-
blk.0.attn_q.biasF32[2048]
-
blk.0.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.0.attn_v.biasF32[256]
-
blk.0.attn_v.weightQ4_K[2048, 256]
-
blk.0.ffn_down.weightQ4_K[11008, 2048]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.0.ffn_norm.weightF32[2048]
-
blk.0.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.1.attn_k.biasF32[256]
-
blk.1.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.1.attn_norm.weightF32[2048]
-
blk.1.attn_output.weightQ4_K[2048, 2048]
-
blk.1.attn_q.biasF32[2048]
-
blk.1.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.1.attn_v.biasF32[256]
-
blk.1.attn_v.weightQ4_K[2048, 256]
-
blk.1.ffn_down.weightQ4_K[11008, 2048]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.1.ffn_norm.weightF32[2048]
-
blk.1.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.2.attn_k.biasF32[256]
-
blk.2.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.2.attn_norm.weightF32[2048]
-
blk.2.attn_output.weightQ4_K[2048, 2048]
-
blk.2.attn_q.biasF32[2048]
-
blk.2.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.2.attn_v.biasF32[256]
-
blk.2.attn_v.weightQ4_K[2048, 256]
-
blk.2.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.2.ffn_norm.weightF32[2048]
-
blk.2.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.3.attn_k.biasF32[256]
-
blk.3.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.3.attn_norm.weightF32[2048]
-
blk.3.attn_output.weightQ4_K[2048, 2048]
-
blk.3.attn_q.biasF32[2048]
-
blk.3.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.3.attn_v.biasF32[256]
-
blk.3.attn_v.weightQ4_K[2048, 256]
-
blk.3.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.3.ffn_norm.weightF32[2048]
-
blk.3.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.4.attn_k.biasF32[256]
-
blk.4.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.4.attn_norm.weightF32[2048]
-
blk.4.attn_output.weightQ4_K[2048, 2048]
-
blk.4.attn_q.biasF32[2048]
-
blk.4.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.4.attn_v.biasF32[256]
-
blk.4.attn_v.weightQ4_K[2048, 256]
-
blk.4.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.4.ffn_norm.weightF32[2048]
-
blk.4.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.5.attn_k.biasF32[256]
-
blk.5.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.5.attn_norm.weightF32[2048]
-
blk.5.attn_output.weightQ4_K[2048, 2048]
-
blk.5.attn_q.biasF32[2048]
-
blk.5.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.5.attn_v.biasF32[256]
-
blk.5.attn_v.weightQ4_K[2048, 256]
-
blk.5.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.5.ffn_norm.weightF32[2048]
-
blk.5.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.6.attn_k.biasF32[256]
-
blk.6.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.6.attn_norm.weightF32[2048]
-
blk.6.attn_output.weightQ4_K[2048, 2048]
-
blk.6.attn_q.biasF32[2048]
-
blk.6.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.6.attn_v.biasF32[256]
-
blk.6.attn_v.weightQ4_K[2048, 256]
-
blk.6.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.6.ffn_norm.weightF32[2048]
-
blk.6.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.7.attn_k.biasF32[256]
-
blk.7.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.7.attn_norm.weightF32[2048]
-
blk.7.attn_output.weightQ4_K[2048, 2048]
-
blk.7.attn_q.biasF32[2048]
-
blk.7.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.7.attn_v.biasF32[256]
-
blk.7.attn_v.weightQ4_K[2048, 256]
-
blk.7.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.7.ffn_norm.weightF32[2048]
-
blk.7.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.8.attn_k.biasF32[256]
-
blk.8.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.8.attn_norm.weightF32[2048]
-
blk.8.attn_output.weightQ4_K[2048, 2048]
-
blk.8.attn_q.biasF32[2048]
-
blk.8.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.8.attn_v.biasF32[256]
-
blk.8.attn_v.weightQ4_K[2048, 256]
-
blk.8.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.8.ffn_norm.weightF32[2048]
-
blk.8.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.9.attn_k.biasF32[256]
-
blk.9.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.9.attn_norm.weightF32[2048]
-
blk.9.attn_output.weightQ4_K[2048, 2048]
-
blk.9.attn_q.biasF32[2048]
-
blk.9.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.9.attn_v.biasF32[256]
-
blk.9.attn_v.weightQ4_K[2048, 256]
-
blk.9.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.9.ffn_norm.weightF32[2048]
-
blk.9.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.10.attn_k.biasF32[256]
-
blk.10.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.10.attn_norm.weightF32[2048]
-
blk.10.attn_output.weightQ4_K[2048, 2048]
-
blk.10.attn_q.biasF32[2048]
-
blk.10.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.10.attn_v.biasF32[256]
-
blk.10.attn_v.weightQ4_K[2048, 256]
-
blk.10.ffn_down.weightQ4_K[11008, 2048]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.10.ffn_norm.weightF32[2048]
-
blk.10.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.11.attn_k.biasF32[256]
-
blk.11.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.11.attn_norm.weightF32[2048]
-
blk.11.attn_output.weightQ4_K[2048, 2048]
-
blk.11.attn_q.biasF32[2048]
-
blk.11.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.11.attn_v.biasF32[256]
-
blk.11.attn_v.weightQ4_K[2048, 256]
-
blk.11.ffn_down.weightQ4_K[11008, 2048]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.11.ffn_norm.weightF32[2048]
-
blk.11.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.12.attn_k.biasF32[256]
-
blk.12.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.12.attn_norm.weightF32[2048]
-
blk.12.attn_output.weightQ4_K[2048, 2048]
-
blk.12.attn_q.biasF32[2048]
-
blk.12.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.12.attn_v.biasF32[256]
-
blk.12.attn_v.weightQ4_K[2048, 256]
-
blk.12.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.12.ffn_norm.weightF32[2048]
-
blk.12.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.13.attn_k.biasF32[256]
-
blk.13.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.13.attn_norm.weightF32[2048]
-
blk.13.attn_output.weightQ4_K[2048, 2048]
-
blk.13.attn_q.biasF32[2048]
-
blk.13.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.13.attn_v.biasF32[256]
-
blk.13.attn_v.weightQ4_K[2048, 256]
-
blk.13.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.13.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.13.ffn_norm.weightF32[2048]
-
blk.13.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.14.attn_k.biasF32[256]
-
blk.14.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.14.attn_norm.weightF32[2048]
-
blk.14.attn_output.weightQ4_K[2048, 2048]
-
blk.14.attn_q.biasF32[2048]
-
blk.14.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.14.attn_v.biasF32[256]
-
blk.14.attn_v.weightQ4_K[2048, 256]
-
blk.14.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.14.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.14.ffn_norm.weightF32[2048]
-
blk.14.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.15.attn_k.biasF32[256]
-
blk.15.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.15.attn_norm.weightF32[2048]
-
blk.15.attn_output.weightQ4_K[2048, 2048]
-
blk.15.attn_q.biasF32[2048]
-
blk.15.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.15.attn_v.biasF32[256]
-
blk.15.attn_v.weightQ4_K[2048, 256]
-
blk.15.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.15.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.15.ffn_norm.weightF32[2048]
-
blk.15.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.16.attn_k.biasF32[256]
-
blk.16.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.16.attn_norm.weightF32[2048]
-
blk.16.attn_output.weightQ4_K[2048, 2048]
-
blk.16.attn_q.biasF32[2048]
-
blk.16.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.16.attn_v.biasF32[256]
-
blk.16.attn_v.weightQ4_K[2048, 256]
-
blk.16.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.16.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.16.ffn_norm.weightF32[2048]
-
blk.16.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.17.attn_k.biasF32[256]
-
blk.17.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.17.attn_norm.weightF32[2048]
-
blk.17.attn_output.weightQ4_K[2048, 2048]
-
blk.17.attn_q.biasF32[2048]
-
blk.17.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.17.attn_v.biasF32[256]
-
blk.17.attn_v.weightQ4_K[2048, 256]
-
blk.17.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.17.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.17.ffn_norm.weightF32[2048]
-
blk.17.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.18.attn_k.biasF32[256]
-
blk.18.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.18.attn_norm.weightF32[2048]
-
blk.18.attn_output.weightQ4_K[2048, 2048]
-
blk.18.attn_q.biasF32[2048]
-
blk.18.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.18.attn_v.biasF32[256]
-
blk.18.attn_v.weightQ4_K[2048, 256]
-
blk.18.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.18.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.18.ffn_norm.weightF32[2048]
-
blk.18.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.19.attn_k.biasF32[256]
-
blk.19.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.19.attn_norm.weightF32[2048]
-
blk.19.attn_output.weightQ4_K[2048, 2048]
-
blk.19.attn_q.biasF32[2048]
-
blk.19.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.19.attn_v.biasF32[256]
-
blk.19.attn_v.weightQ4_K[2048, 256]
-
blk.19.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.19.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.19.ffn_norm.weightF32[2048]
-
blk.19.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.20.attn_k.biasF32[256]
-
blk.20.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.20.attn_norm.weightF32[2048]
-
blk.20.attn_output.weightQ4_K[2048, 2048]
-
blk.20.attn_q.biasF32[2048]
-
blk.20.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.20.attn_v.biasF32[256]
-
blk.20.attn_v.weightQ4_K[2048, 256]
-
blk.20.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.20.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.20.ffn_norm.weightF32[2048]
-
blk.20.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.21.attn_k.biasF32[256]
-
blk.21.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.21.attn_norm.weightF32[2048]
-
blk.21.attn_output.weightQ4_K[2048, 2048]
-
blk.21.attn_q.biasF32[2048]
-
blk.21.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.21.attn_v.biasF32[256]
-
blk.21.attn_v.weightQ4_K[2048, 256]
-
blk.21.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.21.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.21.ffn_norm.weightF32[2048]
-
blk.21.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.22.attn_k.biasF32[256]
-
blk.22.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.22.attn_norm.weightF32[2048]
-
blk.22.attn_output.weightQ4_K[2048, 2048]
-
blk.22.attn_q.biasF32[2048]
-
blk.22.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.22.attn_v.biasF32[256]
-
blk.22.attn_v.weightQ4_K[2048, 256]
-
blk.22.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.22.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.22.ffn_norm.weightF32[2048]
-
blk.22.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.23.attn_k.biasF32[256]
-
blk.23.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.23.attn_norm.weightF32[2048]
-
blk.23.attn_output.weightQ4_K[2048, 2048]
-
blk.23.attn_q.biasF32[2048]
-
blk.23.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.23.attn_v.biasF32[256]
-
blk.23.attn_v.weightQ4_K[2048, 256]
-
blk.23.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.23.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.23.ffn_norm.weightF32[2048]
-
blk.23.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.24.attn_k.biasF32[256]
-
blk.24.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.24.attn_norm.weightF32[2048]
-
blk.24.attn_output.weightQ4_K[2048, 2048]
-
blk.24.attn_q.biasF32[2048]
-
blk.24.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.24.attn_v.biasF32[256]
-
blk.24.attn_v.weightQ4_K[2048, 256]
-
blk.24.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.24.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.24.ffn_norm.weightF32[2048]
-
blk.24.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.25.attn_k.biasF32[256]
-
blk.25.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.25.attn_norm.weightF32[2048]
-
blk.25.attn_output.weightQ4_K[2048, 2048]
-
blk.25.attn_q.biasF32[2048]
-
blk.25.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.25.attn_v.biasF32[256]
-
blk.25.attn_v.weightQ4_K[2048, 256]
-
blk.25.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.25.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.25.ffn_norm.weightF32[2048]
-
blk.25.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.26.attn_k.biasF32[256]
-
blk.26.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.26.attn_norm.weightF32[2048]
-
blk.26.attn_output.weightQ4_K[2048, 2048]
-
blk.26.attn_q.biasF32[2048]
-
blk.26.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.26.attn_v.biasF32[256]
-
blk.26.attn_v.weightQ4_K[2048, 256]
-
blk.26.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.26.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.26.ffn_norm.weightF32[2048]
-
blk.26.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.27.attn_k.biasF32[256]
-
blk.27.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.27.attn_norm.weightF32[2048]
-
blk.27.attn_output.weightQ4_K[2048, 2048]
-
blk.27.attn_q.biasF32[2048]
-
blk.27.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.27.attn_v.biasF32[256]
-
blk.27.attn_v.weightQ4_K[2048, 256]
-
blk.27.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.27.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.27.ffn_norm.weightF32[2048]
-
blk.27.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.28.attn_k.biasF32[256]
-
blk.28.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.28.attn_norm.weightF32[2048]
-
blk.28.attn_output.weightQ4_K[2048, 2048]
-
blk.28.attn_q.biasF32[2048]
-
blk.28.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.28.attn_v.biasF32[256]
-
blk.28.attn_v.weightQ4_K[2048, 256]
-
blk.28.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.28.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.28.ffn_norm.weightF32[2048]
-
blk.28.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.29.attn_k.biasF32[256]
-
blk.29.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.29.attn_norm.weightF32[2048]
-
blk.29.attn_output.weightQ4_K[2048, 2048]
-
blk.29.attn_q.biasF32[2048]
-
blk.29.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.29.attn_v.biasF32[256]
-
blk.29.attn_v.weightQ4_K[2048, 256]
-
blk.29.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.29.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.29.ffn_norm.weightF32[2048]
-
blk.29.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.30.attn_k.biasF32[256]
-
blk.30.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.30.attn_norm.weightF32[2048]
-
blk.30.attn_output.weightQ4_K[2048, 2048]
-
blk.30.attn_q.biasF32[2048]
-
blk.30.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.30.attn_v.biasF32[256]
-
blk.30.attn_v.weightQ4_K[2048, 256]
-
blk.30.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.30.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.30.ffn_norm.weightF32[2048]
-
blk.30.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.31.attn_k.biasF32[256]
-
blk.31.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.31.attn_norm.weightF32[2048]
-
blk.31.attn_output.weightQ4_K[2048, 2048]
-
blk.31.attn_q.biasF32[2048]
-
blk.31.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.31.attn_v.biasF32[256]
-
blk.31.attn_v.weightQ4_K[2048, 256]
-
blk.31.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.31.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.31.ffn_norm.weightF32[2048]
-
blk.31.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.32.attn_k.biasF32[256]
-
blk.32.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.32.attn_norm.weightF32[2048]
-
blk.32.attn_output.weightQ4_K[2048, 2048]
-
blk.32.attn_q.biasF32[2048]
-
blk.32.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.32.attn_v.biasF32[256]
-
blk.32.attn_v.weightQ4_K[2048, 256]
-
blk.32.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.32.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.32.ffn_norm.weightF32[2048]
-
blk.32.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.33.attn_k.biasF32[256]
-
blk.33.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.33.attn_norm.weightF32[2048]
-
blk.33.attn_output.weightQ4_K[2048, 2048]
-
blk.33.attn_q.biasF32[2048]
-
blk.33.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.33.attn_v.biasF32[256]
-
blk.33.attn_v.weightQ4_K[2048, 256]
-
blk.33.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.33.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.33.ffn_norm.weightF32[2048]
-
blk.33.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.34.attn_k.biasF32[256]
-
blk.34.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.34.attn_norm.weightF32[2048]
-
blk.34.attn_output.weightQ4_K[2048, 2048]
-
blk.34.attn_q.biasF32[2048]
-
blk.34.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.34.attn_v.biasF32[256]
-
blk.34.attn_v.weightQ4_K[2048, 256]
-
blk.34.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.34.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.34.ffn_norm.weightF32[2048]
-
blk.34.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.35.attn_k.biasF32[256]
-
blk.35.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.35.attn_norm.weightF32[2048]
-
blk.35.attn_output.weightQ4_K[2048, 2048]
-
blk.35.attn_q.biasF32[2048]
-
blk.35.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.35.attn_v.biasF32[256]
-
blk.35.attn_v.weightQ4_K[2048, 256]
-
blk.35.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.35.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.35.ffn_norm.weightF32[2048]
-
blk.35.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
output_norm.weightF32[2048]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35