Fine-tuned google/gemma-2-9b-it on princeton-nlp/gemma2-ultrafeedback-armorm with the SimPO objective.
1,021 Pulls Updated 3 months ago
b0c20d70b821 · 4.1GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It
-
general.base_model.0.organizationGoogle
-
general.base_model.0.repo_urlhttps://huggingface.co/google/gemma-2-9b-it
-
general.base_model.count1
-
general.basenamegemma-2
-
general.datasets[princeton-nlp/gemma2-ultrafeedback-armorm]
-
general.file_type22
-
general.finetuneit-SimPO
-
general.licensemit
-
general.nameGemma 2 9b It SimPO
-
general.organizationPrinceton Nlp
-
general.quantization_version2
-
general.size_label9B
-
general.tags[alignment-handbook, generated_from_trainer]
-
general.typemodel
-
quantize.imatrix.chunks_count64
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weightI32[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.0.attn_q.weightI32[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weightI32[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.1.attn_q.weightI32[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weightI32[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.2.attn_q.weightI32[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weightI32[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.3.attn_q.weightI32[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weightI32[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.4.attn_q.weightI32[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weightI32[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.5.attn_q.weightI32[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.5.ffn_gate.weightI32[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weightI32[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weightI32[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.6.attn_q.weightI32[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.6.ffn_gate.weightI32[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weightI32[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weightI32[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.7.attn_q.weightI32[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.7.ffn_gate.weightI32[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weightI32[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weightI32[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.8.attn_q.weightI32[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.8.ffn_gate.weightI32[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weightI32[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weightI32[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.9.attn_q.weightI32[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.9.ffn_gate.weightI32[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weightI32[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weightI32[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.10.attn_q.weightI32[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.10.ffn_gate.weightI32[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weightI32[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weightI32[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.11.attn_q.weightI32[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.11.ffn_gate.weightI32[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weightI32[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weightI32[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.12.attn_q.weightI32[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.12.ffn_gate.weightI32[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weightI32[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weightI32[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.13.attn_q.weightI32[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.13.ffn_gate.weightI32[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weightI32[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weightI32[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.14.attn_q.weightI32[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.14.ffn_gate.weightI32[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weightI32[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weightI32[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.15.attn_q.weightI32[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.15.ffn_gate.weightI32[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weightI32[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weightI32[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.16.attn_q.weightI32[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.16.ffn_gate.weightI32[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weightI32[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weightI32[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.17.attn_q.weightI32[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.17.ffn_gate.weightI32[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weightI32[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weightI32[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.18.attn_q.weightI32[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.18.ffn_gate.weightI32[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weightI32[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weightI32[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.19.attn_q.weightI32[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.19.ffn_gate.weightI32[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weightI32[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weightI32[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.20.attn_q.weightI32[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.20.ffn_gate.weightI32[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weightI32[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weightI32[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.21.attn_q.weightI32[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.21.ffn_gate.weightI32[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weightI32[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weightI32[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.22.attn_q.weightI32[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.22.ffn_gate.weightI32[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weightI32[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weightI32[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.23.attn_q.weightI32[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.23.ffn_gate.weightI32[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weightI32[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weightI32[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.24.attn_q.weightI32[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.24.ffn_gate.weightI32[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weightI32[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weightI32[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.25.attn_q.weightI32[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.25.ffn_gate.weightI32[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weightI32[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weightI32[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.26.attn_q.weightI32[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.26.ffn_gate.weightI32[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weightI32[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weightI32[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.27.attn_q.weightI32[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.27.ffn_gate.weightI32[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weightI32[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weightI32[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.28.attn_q.weightI32[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.28.ffn_gate.weightI32[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weightI32[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weightI32[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.29.attn_q.weightI32[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.29.ffn_gate.weightI32[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weightI32[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weightI32[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.30.attn_q.weightI32[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.30.ffn_gate.weightI32[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weightI32[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weightI32[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.31.attn_q.weightI32[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.31.ffn_gate.weightI32[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weightI32[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weightI32[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.32.attn_q.weightI32[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.32.ffn_gate.weightI32[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weightI32[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weightI32[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.33.attn_q.weightI32[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.33.ffn_gate.weightI32[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weightI32[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weightI32[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.34.attn_q.weightI32[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.34.ffn_gate.weightI32[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weightI32[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weightI32[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.35.attn_q.weightI32[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.35.ffn_gate.weightI32[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weightI32[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weightI32[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.36.attn_q.weightI32[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weightI32[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.37.attn_q.weightI32[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weightI32[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.38.attn_q.weightI32[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weightI32[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.39.attn_q.weightI32[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weightI32[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.40.attn_q.weightI32[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weightI32[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.41.attn_q.weightI32[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41