Meta Llama 3 SimPO : The most powerful <10B LLM to date on Chatbot leaderboards from Princeton-NLP
8b
286 Pulls Updated 5 months ago
1e5644fc8dbe · 3.8GB
-
general.architecturellama
-
general.file_type27
-
general.namemodel
-
general.quantization_version2
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length8192
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128001
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.prellama-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weightQ4_K[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.0.attn_v.weightQ4_K[4096, 1024]
-
blk.0.ffn_down.weightQ4_K[14336, 4096]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weightQ4_K[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.1.attn_v.weightQ4_K[4096, 1024]
-
blk.1.ffn_down.weightQ4_K[14336, 4096]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weightQ4_K[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.2.attn_v.weightQ4_K[4096, 1024]
-
blk.2.ffn_down.weightQ4_K[14336, 4096]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weightQ4_K[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.3.attn_v.weightQ4_K[4096, 1024]
-
blk.3.ffn_down.weightQ4_K[14336, 4096]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weightQ4_K[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.4.attn_v.weightQ4_K[4096, 1024]
-
blk.4.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weightQ4_K[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.5.attn_v.weightQ4_K[4096, 1024]
-
blk.5.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weightQ4_K[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.6.attn_v.weightQ4_K[4096, 1024]
-
blk.6.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weightQ4_K[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.7.attn_v.weightQ4_K[4096, 1024]
-
blk.7.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weightQ4_K[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.8.attn_v.weightQ4_K[4096, 1024]
-
blk.8.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weightQ4_K[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.9.attn_v.weightQ4_K[4096, 1024]
-
blk.9.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weightQ4_K[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.10.attn_v.weightQ4_K[4096, 1024]
-
blk.10.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weightQ4_K[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.11.attn_v.weightQ4_K[4096, 1024]
-
blk.11.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weightQ4_K[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.12.attn_v.weightQ4_K[4096, 1024]
-
blk.12.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weightQ4_K[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.13.attn_v.weightQ4_K[4096, 1024]
-
blk.13.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.13.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weightQ4_K[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.14.attn_v.weightQ4_K[4096, 1024]
-
blk.14.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.14.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weightQ4_K[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.15.attn_v.weightQ4_K[4096, 1024]
-
blk.15.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.15.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weightQ4_K[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.16.attn_v.weightQ4_K[4096, 1024]
-
blk.16.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.16.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weightQ4_K[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.17.attn_v.weightQ4_K[4096, 1024]
-
blk.17.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.17.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weightQ4_K[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.18.attn_v.weightQ4_K[4096, 1024]
-
blk.18.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.18.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weightQ4_K[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.19.attn_v.weightQ4_K[4096, 1024]
-
blk.19.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.19.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weightQ4_K[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.20.attn_v.weightQ4_K[4096, 1024]
-
blk.20.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.20.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weightQ4_K[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.21.attn_v.weightQ4_K[4096, 1024]
-
blk.21.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.21.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weightQ4_K[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.22.attn_v.weightQ4_K[4096, 1024]
-
blk.22.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.22.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weightQ4_K[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.23.attn_v.weightQ4_K[4096, 1024]
-
blk.23.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.23.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weightQ4_K[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.24.attn_v.weightQ4_K[4096, 1024]
-
blk.24.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.24.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weightQ4_K[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.25.attn_v.weightQ4_K[4096, 1024]
-
blk.25.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.25.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weightQ4_K[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.26.attn_v.weightQ4_K[4096, 1024]
-
blk.26.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.26.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weightQ4_K[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.27.attn_v.weightQ4_K[4096, 1024]
-
blk.27.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.27.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weightQ4_K[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.28.attn_v.weightQ4_K[4096, 1024]
-
blk.28.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.28.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weightQ4_K[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.29.attn_v.weightQ4_K[4096, 1024]
-
blk.29.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.29.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weightQ4_K[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.30.attn_v.weightQ4_K[4096, 1024]
-
blk.30.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.30.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weightQ4_K[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.31.attn_v.weightQ4_K[4096, 1024]
-
blk.31.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.31.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
output.weightQ6_K[4096, 128256]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31