Made from Gemma 2 9B SPPO iter3 and SimPO
249 Pulls Updated 2 months ago
49da72b2292f · 4.1GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It SimPO
-
general.base_model.0.organizationPrinceton Nlp
-
general.base_model.0.repo_urlhttps://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO
-
general.base_model.1.nameGemma2 Gutenberg 9B
-
general.base_model.1.organizationNbeerbower
-
general.base_model.1.repo_urlhttps://huggingface.co/nbeerbower/gemma2-gutenberg-9B
-
general.base_model.count2
-
general.basenamegemma2-gutenberg
-
general.file_type22
-
general.licensegemma
-
general.nameGemma2 Gutenberg 9B
-
general.organizationNbeerbower
-
general.quantization_version2
-
general.size_label9B
-
general.tags[mergekit, merge]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/training_dir/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.file/models_out/Gemma-2-Ataraxy-9B-GGUF/Gemma-2-Ataraxy-9B.imatrix
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weightI32[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.0.attn_q.weightI32[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weightI32[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.1.attn_q.weightI32[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weightI32[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.2.attn_q.weightI32[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.2.ffn_gate.weightI32[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weightI32[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weightI32[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.3.attn_q.weightI32[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.3.ffn_gate.weightI32[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weightI32[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weightI32[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.4.attn_q.weightI32[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.4.ffn_gate.weightI32[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weightI32[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weightI32[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.5.attn_q.weightI32[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weightI32[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.6.attn_q.weightI32[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weightI32[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.7.attn_q.weightI32[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weightI32[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.8.attn_q.weightI32[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weightI32[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.9.attn_q.weightI32[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weightI32[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.10.attn_q.weightI32[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weightI32[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.11.attn_q.weightI32[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weightI32[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.12.attn_q.weightI32[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weightI32[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.13.attn_q.weightI32[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.13.ffn_gate.weightI32[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weightI32[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weightI32[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.14.attn_q.weightI32[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.14.ffn_gate.weightI32[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weightI32[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weightI32[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.15.attn_q.weightI32[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.15.ffn_gate.weightI32[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weightI32[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weightI32[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.16.attn_q.weightI32[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.16.ffn_gate.weightI32[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weightI32[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weightI32[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.17.attn_q.weightI32[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.17.ffn_gate.weightI32[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weightI32[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weightI32[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.18.attn_q.weightI32[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.18.ffn_gate.weightI32[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weightI32[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weightI32[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.19.attn_q.weightI32[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.19.ffn_gate.weightI32[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weightI32[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weightI32[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.20.attn_q.weightI32[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.20.ffn_gate.weightI32[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weightI32[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weightI32[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.21.attn_q.weightI32[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.21.ffn_gate.weightI32[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weightI32[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weightI32[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.22.attn_q.weightI32[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.22.ffn_gate.weightI32[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weightI32[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weightI32[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.23.attn_q.weightI32[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.23.ffn_gate.weightI32[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weightI32[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weightI32[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.24.attn_q.weightI32[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.24.ffn_gate.weightI32[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weightI32[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weightI32[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.25.attn_q.weightI32[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.25.ffn_gate.weightI32[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weightI32[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weightI32[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.26.attn_q.weightI32[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.26.ffn_gate.weightI32[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weightI32[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weightI32[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.27.attn_q.weightI32[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.27.ffn_gate.weightI32[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weightI32[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weightI32[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.28.attn_q.weightI32[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.28.ffn_gate.weightI32[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weightI32[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weightI32[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.29.attn_q.weightI32[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.29.ffn_gate.weightI32[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weightI32[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weightI32[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.30.attn_q.weightI32[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.30.ffn_gate.weightI32[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weightI32[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weightI32[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.31.attn_q.weightI32[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.31.ffn_gate.weightI32[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weightI32[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weightI32[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.32.attn_q.weightI32[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.32.ffn_gate.weightI32[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weightI32[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weightI32[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.33.attn_q.weightI32[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.33.ffn_gate.weightI32[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weightI32[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weightI32[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.34.attn_q.weightI32[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.34.ffn_gate.weightI32[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weightI32[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weightI32[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.35.attn_q.weightI32[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.35.ffn_gate.weightI32[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weightI32[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weightI32[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.36.attn_q.weightI32[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.36.ffn_gate.weightI32[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weightI32[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weightI32[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.37.attn_q.weightI32[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.37.ffn_gate.weightI32[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weightI32[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weightI32[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.38.attn_q.weightI32[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.38.ffn_gate.weightI32[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weightI32[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weightI32[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.39.attn_q.weightI32[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.39.ffn_gate.weightI32[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weightI32[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weightI32[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.40.attn_q.weightI32[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.40.ffn_gate.weightI32[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weightI32[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weightI32[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.41.attn_q.weightI32[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41