Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models
365 Pulls Updated 4 months ago
154da930a6da · 3.8GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.file_type23
-
general.finetune..
-
general.licensegemma
-
general.name..
-
general.quantization_version2
-
general.size_label9.2B
-
general.tags[conversational, text-generation]
-
general.typemodel
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ5_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.0.attn_v.weightI32[3584, 2048]
-
blk.0.ffn_down.weightI32[14336, 3584]
-
blk.0.ffn_gate.weightI32[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weightI32[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.1.attn_v.weightI32[3584, 2048]
-
blk.1.ffn_down.weightI32[14336, 3584]
-
blk.1.ffn_gate.weightI32[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weightI32[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.2.attn_v.weightI32[3584, 2048]
-
blk.2.ffn_down.weightI32[14336, 3584]
-
blk.2.ffn_gate.weightI32[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weightI32[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.3.attn_v.weightI32[3584, 2048]
-
blk.3.ffn_down.weightI32[14336, 3584]
-
blk.3.ffn_gate.weightI32[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weightI32[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.4.attn_v.weightI32[3584, 2048]
-
blk.4.ffn_down.weightI32[14336, 3584]
-
blk.4.ffn_gate.weightI32[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weightI32[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.5.attn_v.weightI32[3584, 2048]
-
blk.5.ffn_down.weightI32[14336, 3584]
-
blk.5.ffn_gate.weightI32[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weightI32[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.6.attn_v.weightI32[3584, 2048]
-
blk.6.ffn_down.weightI32[14336, 3584]
-
blk.6.ffn_gate.weightI32[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weightI32[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.7.attn_v.weightI32[3584, 2048]
-
blk.7.ffn_down.weightI32[14336, 3584]
-
blk.7.ffn_gate.weightI32[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weightI32[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.8.attn_v.weightI32[3584, 2048]
-
blk.8.ffn_down.weightI32[14336, 3584]
-
blk.8.ffn_gate.weightI32[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weightI32[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.9.attn_v.weightI32[3584, 2048]
-
blk.9.ffn_down.weightI32[14336, 3584]
-
blk.9.ffn_gate.weightI32[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weightI32[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.10.attn_v.weightI32[3584, 2048]
-
blk.10.ffn_down.weightI32[14336, 3584]
-
blk.10.ffn_gate.weightI32[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weightI32[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.11.attn_v.weightI32[3584, 2048]
-
blk.11.ffn_down.weightI32[14336, 3584]
-
blk.11.ffn_gate.weightI32[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weightI32[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.12.attn_v.weightI32[3584, 2048]
-
blk.12.ffn_down.weightI32[14336, 3584]
-
blk.12.ffn_gate.weightI32[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weightI32[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.13.attn_v.weightI32[3584, 2048]
-
blk.13.ffn_down.weightI32[14336, 3584]
-
blk.13.ffn_gate.weightI32[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weightI32[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.14.attn_v.weightI32[3584, 2048]
-
blk.14.ffn_down.weightI32[14336, 3584]
-
blk.14.ffn_gate.weightI32[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weightI32[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.15.attn_v.weightI32[3584, 2048]
-
blk.15.ffn_down.weightI32[14336, 3584]
-
blk.15.ffn_gate.weightI32[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weightI32[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.16.attn_v.weightI32[3584, 2048]
-
blk.16.ffn_down.weightI32[14336, 3584]
-
blk.16.ffn_gate.weightI32[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weightI32[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.17.attn_v.weightI32[3584, 2048]
-
blk.17.ffn_down.weightI32[14336, 3584]
-
blk.17.ffn_gate.weightI32[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weightI32[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.18.attn_v.weightI32[3584, 2048]
-
blk.18.ffn_down.weightI32[14336, 3584]
-
blk.18.ffn_gate.weightI32[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weightI32[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.19.attn_v.weightI32[3584, 2048]
-
blk.19.ffn_down.weightI32[14336, 3584]
-
blk.19.ffn_gate.weightI32[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weightI32[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.20.attn_v.weightI32[3584, 2048]
-
blk.20.ffn_down.weightI32[14336, 3584]
-
blk.20.ffn_gate.weightI32[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weightI32[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.21.attn_v.weightI32[3584, 2048]
-
blk.21.ffn_down.weightI32[14336, 3584]
-
blk.21.ffn_gate.weightI32[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weightI32[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.22.attn_v.weightI32[3584, 2048]
-
blk.22.ffn_down.weightI32[14336, 3584]
-
blk.22.ffn_gate.weightI32[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weightI32[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.23.attn_v.weightI32[3584, 2048]
-
blk.23.ffn_down.weightI32[14336, 3584]
-
blk.23.ffn_gate.weightI32[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weightI32[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.24.attn_v.weightI32[3584, 2048]
-
blk.24.ffn_down.weightI32[14336, 3584]
-
blk.24.ffn_gate.weightI32[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weightI32[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.25.attn_v.weightI32[3584, 2048]
-
blk.25.ffn_down.weightI32[14336, 3584]
-
blk.25.ffn_gate.weightI32[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weightI32[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.26.attn_v.weightI32[3584, 2048]
-
blk.26.ffn_down.weightI32[14336, 3584]
-
blk.26.ffn_gate.weightI32[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weightI32[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.27.attn_v.weightI32[3584, 2048]
-
blk.27.ffn_down.weightI32[14336, 3584]
-
blk.27.ffn_gate.weightI32[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weightI32[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.28.attn_v.weightI32[3584, 2048]
-
blk.28.ffn_down.weightI32[14336, 3584]
-
blk.28.ffn_gate.weightI32[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weightI32[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.29.attn_v.weightI32[3584, 2048]
-
blk.29.ffn_down.weightI32[14336, 3584]
-
blk.29.ffn_gate.weightI32[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weightI32[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.30.attn_v.weightI32[3584, 2048]
-
blk.30.ffn_down.weightI32[14336, 3584]
-
blk.30.ffn_gate.weightI32[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weightI32[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.31.attn_v.weightI32[3584, 2048]
-
blk.31.ffn_down.weightI32[14336, 3584]
-
blk.31.ffn_gate.weightI32[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weightI32[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.32.attn_v.weightI32[3584, 2048]
-
blk.32.ffn_down.weightI32[14336, 3584]
-
blk.32.ffn_gate.weightI32[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weightI32[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.33.attn_v.weightI32[3584, 2048]
-
blk.33.ffn_down.weightI32[14336, 3584]
-
blk.33.ffn_gate.weightI32[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weightI32[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.34.attn_v.weightI32[3584, 2048]
-
blk.34.ffn_down.weightI32[14336, 3584]
-
blk.34.ffn_gate.weightI32[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weightI32[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.35.attn_v.weightI32[3584, 2048]
-
blk.35.ffn_down.weightI32[14336, 3584]
-
blk.35.ffn_gate.weightI32[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weightI32[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.36.attn_v.weightI32[3584, 2048]
-
blk.36.ffn_down.weightI32[14336, 3584]
-
blk.36.ffn_gate.weightI32[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weightI32[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.37.attn_v.weightI32[3584, 2048]
-
blk.37.ffn_down.weightI32[14336, 3584]
-
blk.37.ffn_gate.weightI32[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weightI32[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.38.attn_v.weightI32[3584, 2048]
-
blk.38.ffn_down.weightI32[14336, 3584]
-
blk.38.ffn_gate.weightI32[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weightI32[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.39.attn_v.weightI32[3584, 2048]
-
blk.39.ffn_down.weightI32[14336, 3584]
-
blk.39.ffn_gate.weightI32[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weightI32[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.40.attn_v.weightI32[3584, 2048]
-
blk.40.ffn_down.weightI32[14336, 3584]
-
blk.40.ffn_gate.weightI32[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weightI32[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 22!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 21!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 22!)[3584, 4096]
-
blk.41.attn_v.weightI32[3584, 2048]
-
blk.41.ffn_down.weightI32[14336, 3584]
-
blk.41.ffn_gate.weightI32[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weightI32[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41