Fine-tuned google/gemma-2-9b-it on princeton-nlp/gemma2-ultrafeedback-armorm with the SimPO objective.
1,218 Pulls Updated 3 months ago
9db5f2794f53 · 5.2GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It
-
general.base_model.0.organizationGoogle
-
general.base_model.0.repo_urlhttps://huggingface.co/google/gemma-2-9b-it
-
general.base_model.count1
-
general.basenamegemma-2
-
general.datasets[princeton-nlp/gemma2-ultrafeedback-armorm]
-
general.file_type30
-
general.finetuneit-SimPO
-
general.licensemit
-
general.nameGemma 2 9b It SimPO
-
general.organizationPrinceton Nlp
-
general.quantization_version2
-
general.size_label9B
-
general.tags[alignment-handbook, generated_from_trainer]
-
general.typemodel
-
quantize.imatrix.chunks_count64
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41