Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models
352 Pulls Updated 3 months ago
2df127f0f32e · 5.2GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.file_type30
-
general.finetune..
-
general.licensegemma
-
general.name..
-
general.quantization_version2
-
general.size_label9.2B
-
general.tags[conversational, text-generation]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 23!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 23!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 23!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 23!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 23!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 23!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 23!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41