Llama 3.2 with Unsloth
1b
3b
84 Pulls Updated 5 days ago
8bd65f28fc23 · 1.5GB
-
general.architectureqwen2
-
general.base_model.0.nameQwen2.5 3B Instruct
-
general.base_model.0.organizationQwen
-
general.base_model.0.repo_urlhttps://huggingface.co/Qwen/Qwen2.5-3B-Instruct
-
general.base_model.count1
-
general.basenameQwen2.5
-
general.file_type26
-
general.finetuneInstruct
-
general.languages[en]
-
general.licenseother
-
general.nameQwen2.5 3B Instruct
-
general.organizationQwen
-
general.quantization_version2
-
general.size_label3B
-
general.tags[unsloth, transformers]
-
general.typemodel
-
qwen2.attention.head_count16
-
qwen2.attention.head_count_kv2
-
qwen2.attention.layer_norm_rms_epsilon1e-06
-
qwen2.block_count36
-
qwen2.context_length32768
-
qwen2.embedding_length2048
-
qwen2.feed_forward_length11008
-
qwen2.rope.freq_base1e+06
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.bos_token_id151643
-
tokenizer.ggml.eos_token_id151645
-
tokenizer.ggml.merges[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id151665
-
tokenizer.ggml.preqwen2
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weightQ6_K[2048, 151936]
-
blk.0.attn_k.biasF32[256]
-
blk.0.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.0.attn_norm.weightF32[2048]
-
blk.0.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.0.attn_q.biasF32[2048]
-
blk.0.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.0.attn_v.biasF32[256]
-
blk.0.attn_v.weightQ4_K[2048, 256]
-
blk.0.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.0.ffn_norm.weightF32[2048]
-
blk.0.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.1.attn_k.biasF32[256]
-
blk.1.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.1.attn_norm.weightF32[2048]
-
blk.1.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.1.attn_q.biasF32[2048]
-
blk.1.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.1.attn_v.biasF32[256]
-
blk.1.attn_v.weightQ4_K[2048, 256]
-
blk.1.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.1.ffn_norm.weightF32[2048]
-
blk.1.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.2.attn_k.biasF32[256]
-
blk.2.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.2.attn_norm.weightF32[2048]
-
blk.2.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.2.attn_q.biasF32[2048]
-
blk.2.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.2.attn_v.biasF32[256]
-
blk.2.attn_v.weightQ4_K[2048, 256]
-
blk.2.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.2.ffn_norm.weightF32[2048]
-
blk.2.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.3.attn_k.biasF32[256]
-
blk.3.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.3.attn_norm.weightF32[2048]
-
blk.3.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.3.attn_q.biasF32[2048]
-
blk.3.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.3.attn_v.biasF32[256]
-
blk.3.attn_v.weightQ4_K[2048, 256]
-
blk.3.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.3.ffn_norm.weightF32[2048]
-
blk.3.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.4.attn_k.biasF32[256]
-
blk.4.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.4.attn_norm.weightF32[2048]
-
blk.4.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.4.attn_q.biasF32[2048]
-
blk.4.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.4.attn_v.biasF32[256]
-
blk.4.attn_v.weightQ4_K[2048, 256]
-
blk.4.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.4.ffn_norm.weightF32[2048]
-
blk.4.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.5.attn_k.biasF32[256]
-
blk.5.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.5.attn_norm.weightF32[2048]
-
blk.5.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.5.attn_q.biasF32[2048]
-
blk.5.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.5.attn_v.biasF32[256]
-
blk.5.attn_v.weightQ4_K[2048, 256]
-
blk.5.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.5.ffn_norm.weightF32[2048]
-
blk.5.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.6.attn_k.biasF32[256]
-
blk.6.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.6.attn_norm.weightF32[2048]
-
blk.6.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.6.attn_q.biasF32[2048]
-
blk.6.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.6.attn_v.biasF32[256]
-
blk.6.attn_v.weightQ4_K[2048, 256]
-
blk.6.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.6.ffn_norm.weightF32[2048]
-
blk.6.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.7.attn_k.biasF32[256]
-
blk.7.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.7.attn_norm.weightF32[2048]
-
blk.7.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.7.attn_q.biasF32[2048]
-
blk.7.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.7.attn_v.biasF32[256]
-
blk.7.attn_v.weightQ4_K[2048, 256]
-
blk.7.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.7.ffn_norm.weightF32[2048]
-
blk.7.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.8.attn_k.biasF32[256]
-
blk.8.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.8.attn_norm.weightF32[2048]
-
blk.8.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.8.attn_q.biasF32[2048]
-
blk.8.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.8.attn_v.biasF32[256]
-
blk.8.attn_v.weightQ4_K[2048, 256]
-
blk.8.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.8.ffn_norm.weightF32[2048]
-
blk.8.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.9.attn_k.biasF32[256]
-
blk.9.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.9.attn_norm.weightF32[2048]
-
blk.9.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.9.attn_q.biasF32[2048]
-
blk.9.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.9.attn_v.biasF32[256]
-
blk.9.attn_v.weightQ4_K[2048, 256]
-
blk.9.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.9.ffn_norm.weightF32[2048]
-
blk.9.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.10.attn_k.biasF32[256]
-
blk.10.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.10.attn_norm.weightF32[2048]
-
blk.10.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.10.attn_q.biasF32[2048]
-
blk.10.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.10.attn_v.biasF32[256]
-
blk.10.attn_v.weightQ4_K[2048, 256]
-
blk.10.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.10.ffn_norm.weightF32[2048]
-
blk.10.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.11.attn_k.biasF32[256]
-
blk.11.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.11.attn_norm.weightF32[2048]
-
blk.11.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.11.attn_q.biasF32[2048]
-
blk.11.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.11.attn_v.biasF32[256]
-
blk.11.attn_v.weightQ4_K[2048, 256]
-
blk.11.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.11.ffn_norm.weightF32[2048]
-
blk.11.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.12.attn_k.biasF32[256]
-
blk.12.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.12.attn_norm.weightF32[2048]
-
blk.12.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.12.attn_q.biasF32[2048]
-
blk.12.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.12.attn_v.biasF32[256]
-
blk.12.attn_v.weightQ4_K[2048, 256]
-
blk.12.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.12.ffn_norm.weightF32[2048]
-
blk.12.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.13.attn_k.biasF32[256]
-
blk.13.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.13.attn_norm.weightF32[2048]
-
blk.13.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.13.attn_q.biasF32[2048]
-
blk.13.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.13.attn_v.biasF32[256]
-
blk.13.attn_v.weightQ4_K[2048, 256]
-
blk.13.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.13.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.13.ffn_norm.weightF32[2048]
-
blk.13.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.14.attn_k.biasF32[256]
-
blk.14.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.14.attn_norm.weightF32[2048]
-
blk.14.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.14.attn_q.biasF32[2048]
-
blk.14.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.14.attn_v.biasF32[256]
-
blk.14.attn_v.weightQ4_K[2048, 256]
-
blk.14.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.14.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.14.ffn_norm.weightF32[2048]
-
blk.14.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.15.attn_k.biasF32[256]
-
blk.15.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.15.attn_norm.weightF32[2048]
-
blk.15.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.15.attn_q.biasF32[2048]
-
blk.15.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.15.attn_v.biasF32[256]
-
blk.15.attn_v.weightQ4_K[2048, 256]
-
blk.15.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.15.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.15.ffn_norm.weightF32[2048]
-
blk.15.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.16.attn_k.biasF32[256]
-
blk.16.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.16.attn_norm.weightF32[2048]
-
blk.16.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.16.attn_q.biasF32[2048]
-
blk.16.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.16.attn_v.biasF32[256]
-
blk.16.attn_v.weightQ4_K[2048, 256]
-
blk.16.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.16.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.16.ffn_norm.weightF32[2048]
-
blk.16.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.17.attn_k.biasF32[256]
-
blk.17.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.17.attn_norm.weightF32[2048]
-
blk.17.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.17.attn_q.biasF32[2048]
-
blk.17.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.17.attn_v.biasF32[256]
-
blk.17.attn_v.weightQ4_K[2048, 256]
-
blk.17.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.17.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.17.ffn_norm.weightF32[2048]
-
blk.17.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.18.attn_k.biasF32[256]
-
blk.18.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.18.attn_norm.weightF32[2048]
-
blk.18.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.18.attn_q.biasF32[2048]
-
blk.18.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.18.attn_v.biasF32[256]
-
blk.18.attn_v.weightQ4_K[2048, 256]
-
blk.18.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.18.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.18.ffn_norm.weightF32[2048]
-
blk.18.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.19.attn_k.biasF32[256]
-
blk.19.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.19.attn_norm.weightF32[2048]
-
blk.19.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.19.attn_q.biasF32[2048]
-
blk.19.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.19.attn_v.biasF32[256]
-
blk.19.attn_v.weightQ4_K[2048, 256]
-
blk.19.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.19.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.19.ffn_norm.weightF32[2048]
-
blk.19.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.20.attn_k.biasF32[256]
-
blk.20.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.20.attn_norm.weightF32[2048]
-
blk.20.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.20.attn_q.biasF32[2048]
-
blk.20.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.20.attn_v.biasF32[256]
-
blk.20.attn_v.weightQ4_K[2048, 256]
-
blk.20.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.20.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.20.ffn_norm.weightF32[2048]
-
blk.20.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.21.attn_k.biasF32[256]
-
blk.21.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.21.attn_norm.weightF32[2048]
-
blk.21.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.21.attn_q.biasF32[2048]
-
blk.21.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.21.attn_v.biasF32[256]
-
blk.21.attn_v.weightQ4_K[2048, 256]
-
blk.21.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.21.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.21.ffn_norm.weightF32[2048]
-
blk.21.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.22.attn_k.biasF32[256]
-
blk.22.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.22.attn_norm.weightF32[2048]
-
blk.22.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.22.attn_q.biasF32[2048]
-
blk.22.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.22.attn_v.biasF32[256]
-
blk.22.attn_v.weightQ4_K[2048, 256]
-
blk.22.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.22.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.22.ffn_norm.weightF32[2048]
-
blk.22.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.23.attn_k.biasF32[256]
-
blk.23.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.23.attn_norm.weightF32[2048]
-
blk.23.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.23.attn_q.biasF32[2048]
-
blk.23.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.23.attn_v.biasF32[256]
-
blk.23.attn_v.weightQ4_K[2048, 256]
-
blk.23.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.23.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.23.ffn_norm.weightF32[2048]
-
blk.23.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.24.attn_k.biasF32[256]
-
blk.24.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.24.attn_norm.weightF32[2048]
-
blk.24.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.24.attn_q.biasF32[2048]
-
blk.24.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.24.attn_v.biasF32[256]
-
blk.24.attn_v.weightQ4_K[2048, 256]
-
blk.24.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.24.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.24.ffn_norm.weightF32[2048]
-
blk.24.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.25.attn_k.biasF32[256]
-
blk.25.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.25.attn_norm.weightF32[2048]
-
blk.25.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.25.attn_q.biasF32[2048]
-
blk.25.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.25.attn_v.biasF32[256]
-
blk.25.attn_v.weightQ4_K[2048, 256]
-
blk.25.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.25.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.25.ffn_norm.weightF32[2048]
-
blk.25.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.26.attn_k.biasF32[256]
-
blk.26.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.26.attn_norm.weightF32[2048]
-
blk.26.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.26.attn_q.biasF32[2048]
-
blk.26.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.26.attn_v.biasF32[256]
-
blk.26.attn_v.weightQ4_K[2048, 256]
-
blk.26.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.26.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.26.ffn_norm.weightF32[2048]
-
blk.26.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.27.attn_k.biasF32[256]
-
blk.27.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.27.attn_norm.weightF32[2048]
-
blk.27.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.27.attn_q.biasF32[2048]
-
blk.27.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.27.attn_v.biasF32[256]
-
blk.27.attn_v.weightQ4_K[2048, 256]
-
blk.27.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.27.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.27.ffn_norm.weightF32[2048]
-
blk.27.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.28.attn_k.biasF32[256]
-
blk.28.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.28.attn_norm.weightF32[2048]
-
blk.28.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.28.attn_q.biasF32[2048]
-
blk.28.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.28.attn_v.biasF32[256]
-
blk.28.attn_v.weightQ4_K[2048, 256]
-
blk.28.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.28.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.28.ffn_norm.weightF32[2048]
-
blk.28.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.29.attn_k.biasF32[256]
-
blk.29.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.29.attn_norm.weightF32[2048]
-
blk.29.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.29.attn_q.biasF32[2048]
-
blk.29.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.29.attn_v.biasF32[256]
-
blk.29.attn_v.weightQ4_K[2048, 256]
-
blk.29.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.29.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.29.ffn_norm.weightF32[2048]
-
blk.29.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.30.attn_k.biasF32[256]
-
blk.30.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.30.attn_norm.weightF32[2048]
-
blk.30.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.30.attn_q.biasF32[2048]
-
blk.30.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.30.attn_v.biasF32[256]
-
blk.30.attn_v.weightQ4_K[2048, 256]
-
blk.30.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.30.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.30.ffn_norm.weightF32[2048]
-
blk.30.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.31.attn_k.biasF32[256]
-
blk.31.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.31.attn_norm.weightF32[2048]
-
blk.31.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.31.attn_q.biasF32[2048]
-
blk.31.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.31.attn_v.biasF32[256]
-
blk.31.attn_v.weightQ4_K[2048, 256]
-
blk.31.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.31.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.31.ffn_norm.weightF32[2048]
-
blk.31.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.32.attn_k.biasF32[256]
-
blk.32.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.32.attn_norm.weightF32[2048]
-
blk.32.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.32.attn_q.biasF32[2048]
-
blk.32.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.32.attn_v.biasF32[256]
-
blk.32.attn_v.weightQ4_K[2048, 256]
-
blk.32.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.32.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.32.ffn_norm.weightF32[2048]
-
blk.32.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.33.attn_k.biasF32[256]
-
blk.33.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.33.attn_norm.weightF32[2048]
-
blk.33.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.33.attn_q.biasF32[2048]
-
blk.33.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.33.attn_v.biasF32[256]
-
blk.33.attn_v.weightQ4_K[2048, 256]
-
blk.33.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.33.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.33.ffn_norm.weightF32[2048]
-
blk.33.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.34.attn_k.biasF32[256]
-
blk.34.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.34.attn_norm.weightF32[2048]
-
blk.34.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.34.attn_q.biasF32[2048]
-
blk.34.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.34.attn_v.biasF32[256]
-
blk.34.attn_v.weightQ4_K[2048, 256]
-
blk.34.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.34.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.34.ffn_norm.weightF32[2048]
-
blk.34.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
blk.35.attn_k.biasF32[256]
-
blk.35.attn_k.weight(!unknown_type 21!)[2048, 256]
-
blk.35.attn_norm.weightF32[2048]
-
blk.35.attn_output.weight(!unknown_type 21!)[2048, 2048]
-
blk.35.attn_q.biasF32[2048]
-
blk.35.attn_q.weight(!unknown_type 21!)[2048, 2048]
-
blk.35.attn_v.biasF32[256]
-
blk.35.attn_v.weightQ4_K[2048, 256]
-
blk.35.ffn_down.weight(!unknown_type 21!)[11008, 2048]
-
blk.35.ffn_gate.weight(!unknown_type 21!)[2048, 11008]
-
blk.35.ffn_norm.weightF32[2048]
-
blk.35.ffn_up.weight(!unknown_type 21!)[2048, 11008]
-
output_norm.weightF32[2048]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35