Made from Gemma 2 9B SPPO iter3 and SimPO
249 Pulls Updated 2 months ago
6d5fdfe3dcd1 · 4.5GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It SimPO
-
general.base_model.0.organizationPrinceton Nlp
-
general.base_model.0.repo_urlhttps://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO
-
general.base_model.1.nameGemma2 Gutenberg 9B
-
general.base_model.1.organizationNbeerbower
-
general.base_model.1.repo_urlhttps://huggingface.co/nbeerbower/gemma2-gutenberg-9B
-
general.base_model.count2
-
general.basenamegemma2-gutenberg
-
general.file_type27
-
general.licensegemma
-
general.nameGemma2 Gutenberg 9B
-
general.organizationNbeerbower
-
general.quantization_version2
-
general.size_label9B
-
general.tags[mergekit, merge]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/training_dir/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.file/models_out/Gemma-2-Ataraxy-9B-GGUF/Gemma-2-Ataraxy-9B.imatrix
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weightQ4_K[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.0.attn_v.weightQ4_K[3584, 2048]
-
blk.0.ffn_down.weightQ4_K[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weightQ4_K[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.1.attn_v.weightQ4_K[3584, 2048]
-
blk.1.ffn_down.weightQ4_K[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weightQ4_K[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.2.attn_v.weightQ4_K[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weightQ4_K[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.3.attn_v.weightQ4_K[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weightQ4_K[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.4.attn_v.weightQ4_K[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weightQ4_K[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.5.attn_v.weightQ4_K[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weightQ4_K[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.6.attn_v.weightQ4_K[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weightQ4_K[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.7.attn_v.weightQ4_K[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weightQ4_K[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.8.attn_v.weightQ4_K[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weightQ4_K[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.9.attn_v.weightQ4_K[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weightQ4_K[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.10.attn_v.weightQ4_K[3584, 2048]
-
blk.10.ffn_down.weightQ4_K[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weightQ4_K[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.11.attn_v.weightQ4_K[3584, 2048]
-
blk.11.ffn_down.weightQ4_K[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weightQ4_K[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.12.attn_v.weightQ4_K[3584, 2048]
-
blk.12.ffn_down.weightQ4_K[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weightQ4_K[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.13.attn_v.weightQ4_K[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weightQ4_K[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.14.attn_v.weightQ4_K[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weightQ4_K[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.15.attn_v.weightQ4_K[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weightQ4_K[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.16.attn_v.weightQ4_K[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weightQ4_K[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.17.attn_v.weightQ4_K[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weightQ4_K[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.18.attn_v.weightQ4_K[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weightQ4_K[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.19.attn_v.weightQ4_K[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weightQ4_K[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.20.attn_v.weightQ4_K[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weightQ4_K[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.21.attn_v.weightQ4_K[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weightQ4_K[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.22.attn_v.weightQ4_K[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weightQ4_K[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.23.attn_v.weightQ4_K[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weightQ4_K[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.24.attn_v.weightQ4_K[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weightQ4_K[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.25.attn_v.weightQ4_K[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weightQ4_K[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.26.attn_v.weightQ4_K[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weightQ4_K[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.27.attn_v.weightQ4_K[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weightQ4_K[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.28.attn_v.weightQ4_K[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weightQ4_K[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.29.attn_v.weightQ4_K[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weightQ4_K[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.30.attn_v.weightQ4_K[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weightQ4_K[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.31.attn_v.weightQ4_K[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weightQ4_K[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.32.attn_v.weightQ4_K[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weightQ4_K[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.33.attn_v.weightQ4_K[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weightQ4_K[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.34.attn_v.weightQ4_K[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weightQ4_K[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.35.attn_v.weightQ4_K[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weightQ4_K[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.36.attn_v.weightQ4_K[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weightQ4_K[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.37.attn_v.weightQ4_K[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weightQ4_K[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.38.attn_v.weightQ4_K[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weightQ4_K[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.39.attn_v.weightQ4_K[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weightQ4_K[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.40.attn_v.weightQ4_K[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weightQ4_K[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.41.attn_v.weightQ4_K[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41