I-quants for meta-llama-3.1-8b-instruct
tools
121 Pulls Updated 6 weeks ago
1072cd1fb852 · 2.2GB
-
general.architecturellama
-
general.basenamemeta-llama_Meta-Llama-3.1
-
general.file_type31
-
general.finetuneInstruct
-
general.languages[en, de, fr, it, pt, ...]
-
general.licensellama3.1
-
general.nameMeta llama_Meta Llama 3.1 8B Instruct
-
general.quantization_version2
-
general.size_label8B
-
general.tags[facebook, meta, pytorch, llama, llama-3, ...]
-
general.typemodel
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length131072
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
quantize.imatrix.chunks_count125
-
quantize.imatrix.datasetmisc/calibration_datav3.txt
-
quantize.imatrix.entries_count224
-
quantize.imatrix.filegguf/Meta-Llama-3.1-8B-Instruct/imatrix.dat
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128009
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.prellama-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weightQ2_K[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weightI8[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.0.attn_v.weightQ4_K[4096, 1024]
-
blk.0.ffn_down.weightQ2_K[14336, 4096]
-
blk.0.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weightI8[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.1.attn_v.weightQ4_K[4096, 1024]
-
blk.1.ffn_down.weightQ2_K[14336, 4096]
-
blk.1.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weightI8[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.2.attn_v.weightQ4_K[4096, 1024]
-
blk.2.ffn_down.weightQ2_K[14336, 4096]
-
blk.2.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weightI8[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.3.attn_v.weightQ4_K[4096, 1024]
-
blk.3.ffn_down.weightQ2_K[14336, 4096]
-
blk.3.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weightI8[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.4.attn_v.weightQ4_K[4096, 1024]
-
blk.4.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.4.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weightI8[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.5.attn_v.weightQ4_K[4096, 1024]
-
blk.5.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.5.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weightI8[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.6.attn_v.weightQ4_K[4096, 1024]
-
blk.6.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.6.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weightI8[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.7.attn_v.weightQ4_K[4096, 1024]
-
blk.7.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.7.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weightI8[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.8.attn_v.weightQ4_K[4096, 1024]
-
blk.8.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.8.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weightI8[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.9.attn_v.weightQ4_K[4096, 1024]
-
blk.9.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.9.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weightI8[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.10.attn_v.weightQ4_K[4096, 1024]
-
blk.10.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.10.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weightI8[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.11.attn_v.weightQ4_K[4096, 1024]
-
blk.11.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.11.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weightI8[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.12.attn_v.weightQ4_K[4096, 1024]
-
blk.12.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.12.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weightI8[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.13.attn_v.weightQ4_K[4096, 1024]
-
blk.13.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.13.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weightI8[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.14.attn_v.weightQ4_K[4096, 1024]
-
blk.14.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.14.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weightI8[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.15.attn_v.weightQ4_K[4096, 1024]
-
blk.15.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.15.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weightI8[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.16.attn_v.weightQ4_K[4096, 1024]
-
blk.16.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.16.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weightI8[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.17.attn_v.weightQ4_K[4096, 1024]
-
blk.17.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.17.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weightI8[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.18.attn_v.weightQ4_K[4096, 1024]
-
blk.18.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.18.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weightI8[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.19.attn_v.weightQ4_K[4096, 1024]
-
blk.19.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.19.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weightI8[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.20.attn_v.weightQ4_K[4096, 1024]
-
blk.20.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.20.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weightI8[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.21.attn_v.weightQ4_K[4096, 1024]
-
blk.21.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.21.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weightI8[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.22.attn_v.weightQ4_K[4096, 1024]
-
blk.22.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.22.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weightI8[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.23.attn_v.weightQ4_K[4096, 1024]
-
blk.23.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.23.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weightI8[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.24.attn_v.weightQ4_K[4096, 1024]
-
blk.24.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.24.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weightI8[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.25.attn_v.weightQ4_K[4096, 1024]
-
blk.25.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.25.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weightI8[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.26.attn_v.weightQ4_K[4096, 1024]
-
blk.26.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.26.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weightI8[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.27.attn_v.weightQ4_K[4096, 1024]
-
blk.27.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.27.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weightI8[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.28.attn_v.weightQ4_K[4096, 1024]
-
blk.28.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.28.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weightI8[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.29.attn_v.weightQ4_K[4096, 1024]
-
blk.29.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.29.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weightI8[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.30.attn_v.weightQ4_K[4096, 1024]
-
blk.30.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.30.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 29!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weightI8[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 29!)[4096, 4096]
-
blk.31.attn_v.weightQ4_K[4096, 1024]
-
blk.31.ffn_down.weight(!unknown_type 29!)[14336, 4096]
-
blk.31.ffn_gate.weight(!unknown_type 29!)[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weight(!unknown_type 29!)[4096, 14336]
-
output.weightQ5_K[4096, 128256]
-
rope_freqs.weightF32[64]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31