Llama-3.1-Storm-8B outperforms both Llama-3.1-8B-Instruct and Hermes-3-Llama-3.1-8B!
tools
433 Pulls Updated 3 months ago
df41478f1384 · 3.3GB
-
general.architecturellama
-
general.basenameLlama-3.1-Storm
-
general.file_type23
-
general.finetune..
-
general.languages[en, de, fr, it, pt, ...]
-
general.licensellama3.1
-
general.nameLlama 3.1 Storm 8B
-
general.organizationAkjindal53244
-
general.quantization_version2
-
general.size_label8B
-
general.tags[llama-3.1, conversational, instruction following, reasoning, function calling, ...]
-
general.typemodel
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length131072
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
quantize.imatrix.chunks_count124
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count224
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128009
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id128001
-
tokenizer.ggml.prellama-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.0.attn_v.weightQ4_K[4096, 1024]
-
blk.0.ffn_down.weightI32[14336, 4096]
-
blk.0.ffn_gate.weightI32[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weightI32[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.1.attn_v.weightQ4_K[4096, 1024]
-
blk.1.ffn_down.weightI32[14336, 4096]
-
blk.1.ffn_gate.weightI32[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weightI32[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.2.attn_v.weightQ4_K[4096, 1024]
-
blk.2.ffn_down.weightI32[14336, 4096]
-
blk.2.ffn_gate.weightI32[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weightI32[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.3.attn_v.weightQ4_K[4096, 1024]
-
blk.3.ffn_down.weightI32[14336, 4096]
-
blk.3.ffn_gate.weightI32[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weightI32[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.4.attn_v.weightQ4_K[4096, 1024]
-
blk.4.ffn_down.weightI32[14336, 4096]
-
blk.4.ffn_gate.weightI32[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weightI32[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.5.attn_v.weightQ4_K[4096, 1024]
-
blk.5.ffn_down.weightI32[14336, 4096]
-
blk.5.ffn_gate.weightI32[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weightI32[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.6.attn_v.weightQ4_K[4096, 1024]
-
blk.6.ffn_down.weightI32[14336, 4096]
-
blk.6.ffn_gate.weightI32[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weightI32[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.7.attn_v.weightQ4_K[4096, 1024]
-
blk.7.ffn_down.weightI32[14336, 4096]
-
blk.7.ffn_gate.weightI32[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weightI32[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.8.attn_v.weightQ4_K[4096, 1024]
-
blk.8.ffn_down.weightI32[14336, 4096]
-
blk.8.ffn_gate.weightI32[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weightI32[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.9.attn_v.weightQ4_K[4096, 1024]
-
blk.9.ffn_down.weightI32[14336, 4096]
-
blk.9.ffn_gate.weightI32[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weightI32[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.10.attn_v.weightQ4_K[4096, 1024]
-
blk.10.ffn_down.weightI32[14336, 4096]
-
blk.10.ffn_gate.weightI32[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weightI32[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.11.attn_v.weightQ4_K[4096, 1024]
-
blk.11.ffn_down.weightI32[14336, 4096]
-
blk.11.ffn_gate.weightI32[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weightI32[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.12.attn_v.weightQ4_K[4096, 1024]
-
blk.12.ffn_down.weightI32[14336, 4096]
-
blk.12.ffn_gate.weightI32[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weightI32[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.13.attn_v.weightQ4_K[4096, 1024]
-
blk.13.ffn_down.weightI32[14336, 4096]
-
blk.13.ffn_gate.weightI32[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weightI32[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.14.attn_v.weightQ4_K[4096, 1024]
-
blk.14.ffn_down.weightI32[14336, 4096]
-
blk.14.ffn_gate.weightI32[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weightI32[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.15.attn_v.weightQ4_K[4096, 1024]
-
blk.15.ffn_down.weightI32[14336, 4096]
-
blk.15.ffn_gate.weightI32[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weightI32[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.16.attn_v.weightQ4_K[4096, 1024]
-
blk.16.ffn_down.weightI32[14336, 4096]
-
blk.16.ffn_gate.weightI32[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weightI32[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.17.attn_v.weightQ4_K[4096, 1024]
-
blk.17.ffn_down.weightI32[14336, 4096]
-
blk.17.ffn_gate.weightI32[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weightI32[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.18.attn_v.weightQ4_K[4096, 1024]
-
blk.18.ffn_down.weightI32[14336, 4096]
-
blk.18.ffn_gate.weightI32[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weightI32[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.19.attn_v.weightQ4_K[4096, 1024]
-
blk.19.ffn_down.weightI32[14336, 4096]
-
blk.19.ffn_gate.weightI32[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weightI32[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.20.attn_v.weightQ4_K[4096, 1024]
-
blk.20.ffn_down.weightI32[14336, 4096]
-
blk.20.ffn_gate.weightI32[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weightI32[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.21.attn_v.weightQ4_K[4096, 1024]
-
blk.21.ffn_down.weightI32[14336, 4096]
-
blk.21.ffn_gate.weightI32[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weightI32[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.22.attn_v.weightQ4_K[4096, 1024]
-
blk.22.ffn_down.weightI32[14336, 4096]
-
blk.22.ffn_gate.weightI32[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weightI32[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.23.attn_v.weightQ4_K[4096, 1024]
-
blk.23.ffn_down.weightI32[14336, 4096]
-
blk.23.ffn_gate.weightI32[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weightI32[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.24.attn_v.weightQ4_K[4096, 1024]
-
blk.24.ffn_down.weightI32[14336, 4096]
-
blk.24.ffn_gate.weightI32[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weightI32[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.25.attn_v.weightQ4_K[4096, 1024]
-
blk.25.ffn_down.weightI32[14336, 4096]
-
blk.25.ffn_gate.weightI32[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weightI32[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.26.attn_v.weightQ4_K[4096, 1024]
-
blk.26.ffn_down.weightI32[14336, 4096]
-
blk.26.ffn_gate.weightI32[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weightI32[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.27.attn_v.weightQ4_K[4096, 1024]
-
blk.27.ffn_down.weightI32[14336, 4096]
-
blk.27.ffn_gate.weightI32[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weightI32[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.28.attn_v.weightQ4_K[4096, 1024]
-
blk.28.ffn_down.weightI32[14336, 4096]
-
blk.28.ffn_gate.weightI32[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weightI32[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.29.attn_v.weightQ4_K[4096, 1024]
-
blk.29.ffn_down.weightI32[14336, 4096]
-
blk.29.ffn_gate.weightI32[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weightI32[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.30.attn_v.weightQ4_K[4096, 1024]
-
blk.30.ffn_down.weightI32[14336, 4096]
-
blk.30.ffn_gate.weightI32[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weightI32[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 22!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weight(!unknown_type 21!)[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 22!)[4096, 4096]
-
blk.31.attn_v.weightQ4_K[4096, 1024]
-
blk.31.ffn_down.weightI32[14336, 4096]
-
blk.31.ffn_gate.weightI32[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weightI32[4096, 14336]
-
output.weightQ5_K[4096, 128256]
-
rope_freqs.weightF32[64]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31