Fine-tuned google/gemma-2-9b-it on princeton-nlp/gemma2-ultrafeedback-armorm with the SimPO objective.
1,021 Pulls Updated 3 months ago
9db5f2794f53 · 5.2GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It
-
general.base_model.0.organizationGoogle
-
general.base_model.0.repo_urlhttps://huggingface.co/google/gemma-2-9b-it
-
general.base_model.count1
-
general.basenamegemma-2
-
general.datasets[princeton-nlp/gemma2-ultrafeedback-armorm]
-
general.file_type30
-
general.finetuneit-SimPO
-
general.licensemit
-
general.nameGemma 2 9b It SimPO
-
general.organizationPrinceton Nlp
-
general.quantization_version2
-
general.size_label9B
-
general.tags[alignment-handbook, generated_from_trainer]
-
general.typemodel
-
quantize.imatrix.chunks_count64
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41