I-quants for meta-llama-3.1-8b-instruct
tools
121 Pulls Updated 6 weeks ago
2bfbaad26361 · 3.8GB
-
general.architecturellama
-
general.basenamemeta-llama_Meta-Llama-3.1
-
general.file_type27
-
general.finetuneInstruct
-
general.languages[en, de, fr, it, pt, ...]
-
general.licensellama3.1
-
general.nameMeta llama_Meta Llama 3.1 8B Instruct
-
general.quantization_version2
-
general.size_label8B
-
general.tags[facebook, meta, pytorch, llama, llama-3, ...]
-
general.typemodel
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length131072
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
quantize.imatrix.chunks_count125
-
quantize.imatrix.datasetmisc/calibration_datav3.txt
-
quantize.imatrix.entries_count224
-
quantize.imatrix.filegguf/Meta-Llama-3.1-8B-Instruct/imatrix.dat
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128009
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.prellama-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weightQ4_K[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.0.attn_v.weightQ4_K[4096, 1024]
-
blk.0.ffn_down.weightQ4_K[14336, 4096]
-
blk.0.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weightQ4_K[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.1.attn_v.weightQ4_K[4096, 1024]
-
blk.1.ffn_down.weightQ4_K[14336, 4096]
-
blk.1.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weightQ4_K[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.2.attn_v.weightQ4_K[4096, 1024]
-
blk.2.ffn_down.weightQ4_K[14336, 4096]
-
blk.2.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weightQ4_K[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.3.attn_v.weightQ4_K[4096, 1024]
-
blk.3.ffn_down.weightQ4_K[14336, 4096]
-
blk.3.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weightQ4_K[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.4.attn_v.weightQ4_K[4096, 1024]
-
blk.4.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.4.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weightQ4_K[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.5.attn_v.weightQ4_K[4096, 1024]
-
blk.5.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.5.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weightQ4_K[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.6.attn_v.weightQ4_K[4096, 1024]
-
blk.6.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.6.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weightQ4_K[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.7.attn_v.weightQ4_K[4096, 1024]
-
blk.7.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.7.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weightQ4_K[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.8.attn_v.weightQ4_K[4096, 1024]
-
blk.8.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.8.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weightQ4_K[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.9.attn_v.weightQ4_K[4096, 1024]
-
blk.9.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.9.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weightQ4_K[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.10.attn_v.weightQ4_K[4096, 1024]
-
blk.10.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.10.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weightQ4_K[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.11.attn_v.weightQ4_K[4096, 1024]
-
blk.11.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.11.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weightQ4_K[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.12.attn_v.weightQ4_K[4096, 1024]
-
blk.12.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.12.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weightQ4_K[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.13.attn_v.weightQ4_K[4096, 1024]
-
blk.13.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.13.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weightQ4_K[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.14.attn_v.weightQ4_K[4096, 1024]
-
blk.14.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.14.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weightQ4_K[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.15.attn_v.weightQ4_K[4096, 1024]
-
blk.15.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.15.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weightQ4_K[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.16.attn_v.weightQ4_K[4096, 1024]
-
blk.16.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.16.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weightQ4_K[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.17.attn_v.weightQ4_K[4096, 1024]
-
blk.17.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.17.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weightQ4_K[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.18.attn_v.weightQ4_K[4096, 1024]
-
blk.18.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.18.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weightQ4_K[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.19.attn_v.weightQ4_K[4096, 1024]
-
blk.19.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.19.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weightQ4_K[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.20.attn_v.weightQ4_K[4096, 1024]
-
blk.20.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.20.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weightQ4_K[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.21.attn_v.weightQ4_K[4096, 1024]
-
blk.21.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.21.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weightQ4_K[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.22.attn_v.weightQ4_K[4096, 1024]
-
blk.22.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.22.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weightQ4_K[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.23.attn_v.weightQ4_K[4096, 1024]
-
blk.23.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.23.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weightQ4_K[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.24.attn_v.weightQ4_K[4096, 1024]
-
blk.24.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.24.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weightQ4_K[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.25.attn_v.weightQ4_K[4096, 1024]
-
blk.25.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.25.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weightQ4_K[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.26.attn_v.weightQ4_K[4096, 1024]
-
blk.26.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.26.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weightQ4_K[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.27.attn_v.weightQ4_K[4096, 1024]
-
blk.27.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.27.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weightQ4_K[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.28.attn_v.weightQ4_K[4096, 1024]
-
blk.28.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.28.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weightQ4_K[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.29.attn_v.weightQ4_K[4096, 1024]
-
blk.29.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.29.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weightQ4_K[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.30.attn_v.weightQ4_K[4096, 1024]
-
blk.30.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.30.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 21!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weightQ4_K[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 21!)[4096, 4096]
-
blk.31.attn_v.weightQ4_K[4096, 1024]
-
blk.31.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.31.ffn_gate.weight(!unknown_type 21!)[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weight(!unknown_type 21!)[4096, 14336]
-
output.weightQ6_K[4096, 128256]
-
rope_freqs.weightF32[64]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31