Made from Gemma 2 9B SPPO iter3 and SimPO
249 Pulls Updated 2 months ago
0b2e5a2c9615 · 3.4GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It SimPO
-
general.base_model.0.organizationPrinceton Nlp
-
general.base_model.0.repo_urlhttps://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO
-
general.base_model.1.nameGemma2 Gutenberg 9B
-
general.base_model.1.organizationNbeerbower
-
general.base_model.1.repo_urlhttps://huggingface.co/nbeerbower/gemma2-gutenberg-9B
-
general.base_model.count2
-
general.basenamegemma2-gutenberg
-
general.file_type29
-
general.licensegemma
-
general.nameGemma2 Gutenberg 9B
-
general.organizationNbeerbower
-
general.quantization_version2
-
general.size_label9B
-
general.tags[mergekit, merge]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/training_dir/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.file/models_out/Gemma-2-Ataraxy-9B-GGUF/Gemma-2-Ataraxy-9B.imatrix
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ5_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 22!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 22!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 22!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41