Fine-tuned google/gemma-2-9b-it on princeton-nlp/gemma2-ultrafeedback-armorm with the SimPO objective.
1,218 Pulls Updated 3 months ago
dbeb81e40b84 · 5.4GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It
-
general.base_model.0.organizationGoogle
-
general.base_model.0.repo_urlhttps://huggingface.co/google/gemma-2-9b-it
-
general.base_model.count1
-
general.basenamegemma-2
-
general.datasets[princeton-nlp/gemma2-ultrafeedback-armorm]
-
general.file_type25
-
general.finetuneit-SimPO
-
general.licensemit
-
general.nameGemma 2 9b It SimPO
-
general.organizationPrinceton Nlp
-
general.quantization_version2
-
general.size_label9B
-
general.tags[alignment-handbook, generated_from_trainer]
-
general.typemodel
-
quantize.imatrix.chunks_count64
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41