This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point.
773 Pulls Updated 3 months ago
499a3f704b60 · 3.8GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.datasets[openbmb/UltraFeedback]
-
general.file_type23
-
general.finetune..
-
general.languages[en]
-
general.licenseapache-2.0
-
general.name..
-
general.quantization_version2
-
general.size_label9.2B
-
general.tags[text-generation]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ5_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.0.attn_v.weightI32[3584, 2048]
-
blk.0.ffn_down.weightI32[14336, 3584]
-
blk.0.ffn_gate.weightI32[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weightI32[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.1.attn_v.weightI32[3584, 2048]
-
blk.1.ffn_down.weightI32[14336, 3584]
-
blk.1.ffn_gate.weightI32[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weightI32[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.2.attn_v.weightI32[3584, 2048]
-
blk.2.ffn_down.weightI32[14336, 3584]
-
blk.2.ffn_gate.weightI32[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weightI32[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.3.attn_v.weightI32[3584, 2048]
-
blk.3.ffn_down.weightI32[14336, 3584]
-
blk.3.ffn_gate.weightI32[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weightI32[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.4.attn_v.weightI32[3584, 2048]
-
blk.4.ffn_down.weightI32[14336, 3584]
-
blk.4.ffn_gate.weightI32[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weightI32[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.5.attn_v.weightI32[3584, 2048]
-
blk.5.ffn_down.weightI32[14336, 3584]
-
blk.5.ffn_gate.weightI32[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weightI32[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.6.attn_v.weightI32[3584, 2048]
-
blk.6.ffn_down.weightI32[14336, 3584]
-
blk.6.ffn_gate.weightI32[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weightI32[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.7.attn_v.weightI32[3584, 2048]
-
blk.7.ffn_down.weightI32[14336, 3584]
-
blk.7.ffn_gate.weightI32[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weightI32[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.8.attn_v.weightI32[3584, 2048]
-
blk.8.ffn_down.weightI32[14336, 3584]
-
blk.8.ffn_gate.weightI32[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weightI32[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.9.attn_v.weightI32[3584, 2048]
-
blk.9.ffn_down.weightI32[14336, 3584]
-
blk.9.ffn_gate.weightI32[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weightI32[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.10.attn_v.weightI32[3584, 2048]
-
blk.10.ffn_down.weightI32[14336, 3584]
-
blk.10.ffn_gate.weightI32[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weightI32[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.11.attn_v.weightI32[3584, 2048]
-
blk.11.ffn_down.weightI32[14336, 3584]
-
blk.11.ffn_gate.weightI32[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weightI32[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.12.attn_v.weightI32[3584, 2048]
-
blk.12.ffn_down.weightI32[14336, 3584]
-
blk.12.ffn_gate.weightI32[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weightI32[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.13.attn_v.weightI32[3584, 2048]
-
blk.13.ffn_down.weightI32[14336, 3584]
-
blk.13.ffn_gate.weightI32[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weightI32[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.14.attn_v.weightI32[3584, 2048]
-
blk.14.ffn_down.weightI32[14336, 3584]
-
blk.14.ffn_gate.weightI32[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weightI32[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.15.attn_v.weightI32[3584, 2048]
-
blk.15.ffn_down.weightI32[14336, 3584]
-
blk.15.ffn_gate.weightI32[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weightI32[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.16.attn_v.weightI32[3584, 2048]
-
blk.16.ffn_down.weightI32[14336, 3584]
-
blk.16.ffn_gate.weightI32[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weightI32[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.17.attn_v.weightI32[3584, 2048]
-
blk.17.ffn_down.weightI32[14336, 3584]
-
blk.17.ffn_gate.weightI32[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weightI32[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.18.attn_v.weightI32[3584, 2048]
-
blk.18.ffn_down.weightI32[14336, 3584]
-
blk.18.ffn_gate.weightI32[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weightI32[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.19.attn_v.weightI32[3584, 2048]
-
blk.19.ffn_down.weightI32[14336, 3584]
-
blk.19.ffn_gate.weightI32[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weightI32[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.20.attn_v.weightI32[3584, 2048]
-
blk.20.ffn_down.weightI32[14336, 3584]
-
blk.20.ffn_gate.weightI32[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weightI32[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.21.attn_v.weightI32[3584, 2048]
-
blk.21.ffn_down.weightI32[14336, 3584]
-
blk.21.ffn_gate.weightI32[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weightI32[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.22.attn_v.weightI32[3584, 2048]
-
blk.22.ffn_down.weightI32[14336, 3584]
-
blk.22.ffn_gate.weightI32[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weightI32[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.23.attn_v.weightI32[3584, 2048]
-
blk.23.ffn_down.weightI32[14336, 3584]
-
blk.23.ffn_gate.weightI32[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weightI32[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.24.attn_v.weightI32[3584, 2048]
-
blk.24.ffn_down.weightI32[14336, 3584]
-
blk.24.ffn_gate.weightI32[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weightI32[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.25.attn_v.weightI32[3584, 2048]
-
blk.25.ffn_down.weightI32[14336, 3584]
-
blk.25.ffn_gate.weightI32[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weightI32[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.26.attn_v.weightI32[3584, 2048]
-
blk.26.ffn_down.weightI32[14336, 3584]
-
blk.26.ffn_gate.weightI32[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weightI32[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.27.attn_v.weightI32[3584, 2048]
-
blk.27.ffn_down.weightI32[14336, 3584]
-
blk.27.ffn_gate.weightI32[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weightI32[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.28.attn_v.weightI32[3584, 2048]
-
blk.28.ffn_down.weightI32[14336, 3584]
-
blk.28.ffn_gate.weightI32[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weightI32[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.29.attn_v.weightI32[3584, 2048]
-
blk.29.ffn_down.weightI32[14336, 3584]
-
blk.29.ffn_gate.weightI32[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weightI32[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.30.attn_v.weightI32[3584, 2048]
-
blk.30.ffn_down.weightI32[14336, 3584]
-
blk.30.ffn_gate.weightI32[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weightI32[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.31.attn_v.weightI32[3584, 2048]
-
blk.31.ffn_down.weightI32[14336, 3584]
-
blk.31.ffn_gate.weightI32[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weightI32[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.32.attn_v.weightI32[3584, 2048]
-
blk.32.ffn_down.weightI32[14336, 3584]
-
blk.32.ffn_gate.weightI32[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weightI32[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.33.attn_v.weightI32[3584, 2048]
-
blk.33.ffn_down.weightI32[14336, 3584]
-
blk.33.ffn_gate.weightI32[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weightI32[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.34.attn_v.weightI32[3584, 2048]
-
blk.34.ffn_down.weightI32[14336, 3584]
-
blk.34.ffn_gate.weightI32[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weightI32[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.35.attn_v.weightI32[3584, 2048]
-
blk.35.ffn_down.weightI32[14336, 3584]
-
blk.35.ffn_gate.weightI32[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weightI32[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.36.attn_v.weightI32[3584, 2048]
-
blk.36.ffn_down.weightI32[14336, 3584]
-
blk.36.ffn_gate.weightI32[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weightI32[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.37.attn_v.weightI32[3584, 2048]
-
blk.37.ffn_down.weightI32[14336, 3584]
-
blk.37.ffn_gate.weightI32[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weightI32[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.38.attn_v.weightI32[3584, 2048]
-
blk.38.ffn_down.weightI32[14336, 3584]
-
blk.38.ffn_gate.weightI32[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weightI32[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.39.attn_v.weightI32[3584, 2048]
-
blk.39.ffn_down.weightI32[14336, 3584]
-
blk.39.ffn_gate.weightI32[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weightI32[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.40.attn_v.weightI32[3584, 2048]
-
blk.40.ffn_down.weightI32[14336, 3584]
-
blk.40.ffn_gate.weightI32[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weightI32[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.41.attn_v.weightI32[3584, 2048]
-
blk.41.ffn_down.weightI32[14336, 3584]
-
blk.41.ffn_gate.weightI32[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weightI32[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41