This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point.
773 Pulls Updated 3 months ago
53f5e4e62801 · 5.4GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.datasets[openbmb/UltraFeedback]
-
general.file_type25
-
general.finetune..
-
general.languages[en]
-
general.licenseapache-2.0
-
general.name..
-
general.quantization_version2
-
general.size_label9.2B
-
general.tags[text-generation]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41