This model was built using the Smaug recipe for improving performance on real world multi-turn conversations applied to meta-llama/Meta-Llama-3-8B-Instruct.
206 Pulls Updated 4 months ago
5706da938025 · 3.3GB
-
general.architecturellama
-
general.file_type23
-
general.name..
-
general.quantization_version2
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length8192
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
quantize.imatrix.chunks_count88
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/groups_merged.txt
-
quantize.imatrix.entries_count224
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128001
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id128001
-
tokenizer.ggml.presmaug-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.0.attn_v.weightQ4_K[4096, 1024]
-
blk.0.ffn_down.weightI32[14336, 4096]
-
blk.0.ffn_gate.weightI32[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weightI32[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.1.attn_v.weightQ4_K[4096, 1024]
-
blk.1.ffn_down.weightI32[14336, 4096]
-
blk.1.ffn_gate.weightI32[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weightI32[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.2.attn_v.weightQ4_K[4096, 1024]
-
blk.2.ffn_down.weightI32[14336, 4096]
-
blk.2.ffn_gate.weightI32[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weightI32[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.3.attn_v.weightQ4_K[4096, 1024]
-
blk.3.ffn_down.weightI32[14336, 4096]
-
blk.3.ffn_gate.weightI32[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weightI32[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.4.attn_v.weightQ4_K[4096, 1024]
-
blk.4.ffn_down.weightI32[14336, 4096]
-
blk.4.ffn_gate.weightI32[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weightI32[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.5.attn_v.weightQ4_K[4096, 1024]
-
blk.5.ffn_down.weightI32[14336, 4096]
-
blk.5.ffn_gate.weightI32[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weightI32[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.6.attn_v.weightQ4_K[4096, 1024]
-
blk.6.ffn_down.weightI32[14336, 4096]
-
blk.6.ffn_gate.weightI32[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weightI32[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.7.attn_v.weightQ4_K[4096, 1024]
-
blk.7.ffn_down.weightI32[14336, 4096]
-
blk.7.ffn_gate.weightI32[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weightI32[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.8.attn_v.weightQ4_K[4096, 1024]
-
blk.8.ffn_down.weightI32[14336, 4096]
-
blk.8.ffn_gate.weightI32[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weightI32[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.9.attn_v.weightQ4_K[4096, 1024]
-
blk.9.ffn_down.weightI32[14336, 4096]
-
blk.9.ffn_gate.weightI32[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weightI32[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.10.attn_v.weightQ4_K[4096, 1024]
-
blk.10.ffn_down.weightI32[14336, 4096]
-
blk.10.ffn_gate.weightI32[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weightI32[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.11.attn_v.weightQ4_K[4096, 1024]
-
blk.11.ffn_down.weightI32[14336, 4096]
-
blk.11.ffn_gate.weightI32[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weightI32[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.12.attn_v.weightQ4_K[4096, 1024]
-
blk.12.ffn_down.weightI32[14336, 4096]
-
blk.12.ffn_gate.weightI32[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weightI32[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.13.attn_v.weightQ4_K[4096, 1024]
-
blk.13.ffn_down.weightI32[14336, 4096]
-
blk.13.ffn_gate.weightI32[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weightI32[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.14.attn_v.weightQ4_K[4096, 1024]
-
blk.14.ffn_down.weightI32[14336, 4096]
-
blk.14.ffn_gate.weightI32[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weightI32[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.15.attn_v.weightQ4_K[4096, 1024]
-
blk.15.ffn_down.weightI32[14336, 4096]
-
blk.15.ffn_gate.weightI32[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weightI32[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.16.attn_v.weightQ4_K[4096, 1024]
-
blk.16.ffn_down.weightI32[14336, 4096]
-
blk.16.ffn_gate.weightI32[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weightI32[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.17.attn_v.weightQ4_K[4096, 1024]
-
blk.17.ffn_down.weightI32[14336, 4096]
-
blk.17.ffn_gate.weightI32[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weightI32[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.18.attn_v.weightQ4_K[4096, 1024]
-
blk.18.ffn_down.weightI32[14336, 4096]
-
blk.18.ffn_gate.weightI32[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weightI32[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.19.attn_v.weightQ4_K[4096, 1024]
-
blk.19.ffn_down.weightI32[14336, 4096]
-
blk.19.ffn_gate.weightI32[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weightI32[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.20.attn_v.weightQ4_K[4096, 1024]
-
blk.20.ffn_down.weightI32[14336, 4096]
-
blk.20.ffn_gate.weightI32[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weightI32[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.21.attn_v.weightQ4_K[4096, 1024]
-
blk.21.ffn_down.weightI32[14336, 4096]
-
blk.21.ffn_gate.weightI32[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weightI32[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.22.attn_v.weightQ4_K[4096, 1024]
-
blk.22.ffn_down.weightI32[14336, 4096]
-
blk.22.ffn_gate.weightI32[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weightI32[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.23.attn_v.weightQ4_K[4096, 1024]
-
blk.23.ffn_down.weightI32[14336, 4096]
-
blk.23.ffn_gate.weightI32[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weightI32[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.24.attn_v.weightQ4_K[4096, 1024]
-
blk.24.ffn_down.weightI32[14336, 4096]
-
blk.24.ffn_gate.weightI32[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weightI32[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.25.attn_v.weightQ4_K[4096, 1024]
-
blk.25.ffn_down.weightI32[14336, 4096]
-
blk.25.ffn_gate.weightI32[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weightI32[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.26.attn_v.weightQ4_K[4096, 1024]
-
blk.26.ffn_down.weightI32[14336, 4096]
-
blk.26.ffn_gate.weightI32[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weightI32[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.27.attn_v.weightQ4_K[4096, 1024]
-
blk.27.ffn_down.weightI32[14336, 4096]
-
blk.27.ffn_gate.weightI32[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weightI32[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.28.attn_v.weightQ4_K[4096, 1024]
-
blk.28.ffn_down.weightI32[14336, 4096]
-
blk.28.ffn_gate.weightI32[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weightI32[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.29.attn_v.weightQ4_K[4096, 1024]
-
blk.29.ffn_down.weightI32[14336, 4096]
-
blk.29.ffn_gate.weightI32[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weightI32[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.30.attn_v.weightQ4_K[4096, 1024]
-
blk.30.ffn_down.weightI32[14336, 4096]
-
blk.30.ffn_gate.weightI32[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weightI32[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.31.attn_v.weightQ4_K[4096, 1024]
-
blk.31.ffn_down.weightI32[14336, 4096]
-
blk.31.ffn_gate.weightI32[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weightI32[4096, 14336]
-
output.weightQ5_K[4096, 128256]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31