Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models
364 Pulls Updated 4 months ago
d27b187ba2cb · 5.4GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.file_type25
-
general.finetune..
-
general.licensegemma
-
general.name..
-
general.quantization_version2
-
general.size_label9.2B
-
general.tags[conversational, text-generation]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41