I-quants for meta-llama-3.1-8b-instruct
tools
121 Pulls Updated 6 weeks ago
849856e1e7ef · 2.9GB
-
general.architecturellama
-
general.basenameMeta-Llama-3.1
-
general.file_type29
-
general.finetuneInstruct
-
general.languages[en, de, fr, it, pt, ...]
-
general.licensellama3.1
-
general.nameMeta Llama 3.1 8B Instruct
-
general.quantization_version2
-
general.size_label8B
-
general.tags[facebook, meta, pytorch, llama, llama-3, ...]
-
general.typemodel
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length131072
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
quantize.imatrix.chunks_count125
-
quantize.imatrix.dataset/training_dir/calibration_datav3.txt
-
quantize.imatrix.entries_count224
-
quantize.imatrix.file/models_out/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.imatrix
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128009
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.prellama-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.0.attn_v.weightQ4_K[4096, 1024]
-
blk.0.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.0.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.1.attn_v.weightQ4_K[4096, 1024]
-
blk.1.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.1.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.2.attn_v.weightQ4_K[4096, 1024]
-
blk.2.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.2.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.3.attn_v.weightQ4_K[4096, 1024]
-
blk.3.ffn_down.weight(!unknown_type 21!)[14336, 4096]
-
blk.3.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.4.attn_v.weightQ4_K[4096, 1024]
-
blk.4.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.4.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.5.attn_v.weightQ4_K[4096, 1024]
-
blk.5.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.5.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.6.attn_v.weightQ4_K[4096, 1024]
-
blk.6.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.6.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.7.attn_v.weightQ4_K[4096, 1024]
-
blk.7.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.7.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.8.attn_v.weightQ4_K[4096, 1024]
-
blk.8.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.8.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.9.attn_v.weightQ4_K[4096, 1024]
-
blk.9.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.9.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.10.attn_v.weightQ4_K[4096, 1024]
-
blk.10.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.10.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.11.attn_v.weightQ4_K[4096, 1024]
-
blk.11.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.11.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.12.attn_v.weightQ4_K[4096, 1024]
-
blk.12.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.12.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.13.attn_v.weightQ4_K[4096, 1024]
-
blk.13.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.13.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.14.attn_v.weightQ4_K[4096, 1024]
-
blk.14.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.14.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.15.attn_v.weightQ4_K[4096, 1024]
-
blk.15.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.15.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.16.attn_v.weightQ4_K[4096, 1024]
-
blk.16.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.16.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.17.attn_v.weightQ4_K[4096, 1024]
-
blk.17.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.17.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.18.attn_v.weightQ4_K[4096, 1024]
-
blk.18.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.18.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.19.attn_v.weightQ4_K[4096, 1024]
-
blk.19.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.19.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.20.attn_v.weightQ4_K[4096, 1024]
-
blk.20.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.20.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.21.attn_v.weightQ4_K[4096, 1024]
-
blk.21.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.21.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.22.attn_v.weightQ4_K[4096, 1024]
-
blk.22.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.22.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.23.attn_v.weightQ4_K[4096, 1024]
-
blk.23.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.23.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.24.attn_v.weightQ4_K[4096, 1024]
-
blk.24.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.24.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.25.attn_v.weightQ4_K[4096, 1024]
-
blk.25.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.25.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.26.attn_v.weightQ4_K[4096, 1024]
-
blk.26.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.26.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.27.attn_v.weightQ4_K[4096, 1024]
-
blk.27.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.27.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.28.attn_v.weightQ4_K[4096, 1024]
-
blk.28.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.28.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.29.attn_v.weightQ4_K[4096, 1024]
-
blk.29.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.29.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.30.attn_v.weightQ4_K[4096, 1024]
-
blk.30.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.30.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.31.attn_v.weightQ4_K[4096, 1024]
-
blk.31.ffn_down.weight(!unknown_type 22!)[14336, 4096]
-
blk.31.ffn_gate.weight(!unknown_type 22!)[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weight(!unknown_type 22!)[4096, 14336]
-
output.weightQ5_K[4096, 128256]
-
rope_freqs.weightF32[64]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31