This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point.
804 Pulls Updated 4 months ago
65c95f30c57b · 4.3GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.datasets[openbmb/UltraFeedback]
-
general.file_type26
-
general.finetune..
-
general.languages[en]
-
general.licenseapache-2.0
-
general.name..
-
general.quantization_version2
-
general.size_label9.2B
-
general.tags[text-generation]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 21!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 21!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 21!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 21!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 21!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41