LLaMAX is a multilingual language model, developed through continued pre-training on Llama3, and supports over 100 languages
618 Pulls Updated 3 months ago
964837e63cbe · 4.7GB
-
general.architecturellama
-
general.file_type25
-
general.name..
-
general.quantization_version2
-
general.typemodel
-
llama.attention.head_count32
-
llama.attention.head_count_kv8
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length8192
-
llama.embedding_length4096
-
llama.feed_forward_length14336
-
llama.rope.dimension_count128
-
llama.rope.freq_base500000
-
llama.vocab_size128256
-
quantize.imatrix.chunks_count124
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count224
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.bos_token_id128000
-
tokenizer.ggml.eos_token_id128001
-
tokenizer.ggml.merges[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id128001
-
tokenizer.ggml.presmaug-bpe
-
tokenizer.ggml.token_type[1, 1, 1, 1, 1, ...]
-
tokenizer.ggml.tokens[!, ", #, $, %, ...]
-
NameTypeShape
-
token_embd.weight(!unknown_type 20!)[4096, 128256]
-
blk.0.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.0.attn_norm.weightF32[4096]
-
blk.0.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.0.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.0.attn_v.weightQ5_K[4096, 1024]
-
blk.0.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.0.ffn_norm.weightF32[4096]
-
blk.0.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.1.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.1.attn_norm.weightF32[4096]
-
blk.1.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.1.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.1.attn_v.weightQ5_K[4096, 1024]
-
blk.1.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.1.ffn_norm.weightF32[4096]
-
blk.1.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.2.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.2.attn_norm.weightF32[4096]
-
blk.2.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.2.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.2.attn_v.weightQ5_K[4096, 1024]
-
blk.2.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.2.ffn_norm.weightF32[4096]
-
blk.2.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.3.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.3.attn_norm.weightF32[4096]
-
blk.3.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.3.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.3.attn_v.weightQ5_K[4096, 1024]
-
blk.3.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.3.ffn_norm.weightF32[4096]
-
blk.3.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.4.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.4.attn_norm.weightF32[4096]
-
blk.4.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.4.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.4.attn_v.weightQ5_K[4096, 1024]
-
blk.4.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.4.ffn_norm.weightF32[4096]
-
blk.4.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.5.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.5.attn_norm.weightF32[4096]
-
blk.5.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.5.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.5.attn_v.weightQ5_K[4096, 1024]
-
blk.5.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.5.ffn_norm.weightF32[4096]
-
blk.5.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.6.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.6.attn_norm.weightF32[4096]
-
blk.6.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.6.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.6.attn_v.weightQ5_K[4096, 1024]
-
blk.6.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.6.ffn_norm.weightF32[4096]
-
blk.6.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.7.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.7.attn_norm.weightF32[4096]
-
blk.7.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.7.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.7.attn_v.weightQ5_K[4096, 1024]
-
blk.7.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.7.ffn_norm.weightF32[4096]
-
blk.7.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.8.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.8.attn_norm.weightF32[4096]
-
blk.8.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.8.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.8.attn_v.weightQ5_K[4096, 1024]
-
blk.8.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.8.ffn_norm.weightF32[4096]
-
blk.8.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.9.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.9.attn_norm.weightF32[4096]
-
blk.9.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.9.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.9.attn_v.weightQ5_K[4096, 1024]
-
blk.9.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.9.ffn_norm.weightF32[4096]
-
blk.9.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.10.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.10.attn_norm.weightF32[4096]
-
blk.10.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.10.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.10.attn_v.weightQ5_K[4096, 1024]
-
blk.10.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.10.ffn_norm.weightF32[4096]
-
blk.10.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.11.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.11.attn_norm.weightF32[4096]
-
blk.11.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.11.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.11.attn_v.weightQ5_K[4096, 1024]
-
blk.11.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.11.ffn_norm.weightF32[4096]
-
blk.11.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.12.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.12.attn_norm.weightF32[4096]
-
blk.12.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.12.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.12.attn_v.weightQ5_K[4096, 1024]
-
blk.12.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.12.ffn_norm.weightF32[4096]
-
blk.12.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.13.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.13.attn_norm.weightF32[4096]
-
blk.13.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.13.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.13.attn_v.weightQ5_K[4096, 1024]
-
blk.13.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.13.ffn_norm.weightF32[4096]
-
blk.13.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.14.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.14.attn_norm.weightF32[4096]
-
blk.14.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.14.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.14.attn_v.weightQ5_K[4096, 1024]
-
blk.14.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.14.ffn_norm.weightF32[4096]
-
blk.14.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.15.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.15.attn_norm.weightF32[4096]
-
blk.15.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.15.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.15.attn_v.weightQ5_K[4096, 1024]
-
blk.15.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.15.ffn_norm.weightF32[4096]
-
blk.15.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.16.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.16.attn_norm.weightF32[4096]
-
blk.16.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.16.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.16.attn_v.weightQ5_K[4096, 1024]
-
blk.16.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.16.ffn_norm.weightF32[4096]
-
blk.16.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.17.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.17.attn_norm.weightF32[4096]
-
blk.17.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.17.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.17.attn_v.weightQ5_K[4096, 1024]
-
blk.17.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.17.ffn_norm.weightF32[4096]
-
blk.17.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.18.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.18.attn_norm.weightF32[4096]
-
blk.18.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.18.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.18.attn_v.weightQ5_K[4096, 1024]
-
blk.18.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.18.ffn_norm.weightF32[4096]
-
blk.18.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.19.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.19.attn_norm.weightF32[4096]
-
blk.19.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.19.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.19.attn_v.weightQ5_K[4096, 1024]
-
blk.19.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.19.ffn_norm.weightF32[4096]
-
blk.19.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.20.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.20.attn_norm.weightF32[4096]
-
blk.20.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.20.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.20.attn_v.weightQ5_K[4096, 1024]
-
blk.20.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.20.ffn_norm.weightF32[4096]
-
blk.20.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.21.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.21.attn_norm.weightF32[4096]
-
blk.21.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.21.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.21.attn_v.weightQ5_K[4096, 1024]
-
blk.21.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.21.ffn_norm.weightF32[4096]
-
blk.21.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.22.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.22.attn_norm.weightF32[4096]
-
blk.22.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.22.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.22.attn_v.weightQ5_K[4096, 1024]
-
blk.22.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.22.ffn_norm.weightF32[4096]
-
blk.22.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.23.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.23.attn_norm.weightF32[4096]
-
blk.23.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.23.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.23.attn_v.weightQ5_K[4096, 1024]
-
blk.23.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.23.ffn_norm.weightF32[4096]
-
blk.23.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.24.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.24.attn_norm.weightF32[4096]
-
blk.24.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.24.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.24.attn_v.weightQ5_K[4096, 1024]
-
blk.24.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.24.ffn_norm.weightF32[4096]
-
blk.24.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.25.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.25.attn_norm.weightF32[4096]
-
blk.25.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.25.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.25.attn_v.weightQ5_K[4096, 1024]
-
blk.25.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.25.ffn_norm.weightF32[4096]
-
blk.25.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.26.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.26.attn_norm.weightF32[4096]
-
blk.26.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.26.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.26.attn_v.weightQ5_K[4096, 1024]
-
blk.26.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.26.ffn_norm.weightF32[4096]
-
blk.26.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.27.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.27.attn_norm.weightF32[4096]
-
blk.27.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.27.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.27.attn_v.weightQ5_K[4096, 1024]
-
blk.27.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.27.ffn_norm.weightF32[4096]
-
blk.27.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.28.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.28.attn_norm.weightF32[4096]
-
blk.28.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.28.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.28.attn_v.weightQ5_K[4096, 1024]
-
blk.28.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.28.ffn_norm.weightF32[4096]
-
blk.28.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.29.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.29.attn_norm.weightF32[4096]
-
blk.29.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.29.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.29.attn_v.weightQ5_K[4096, 1024]
-
blk.29.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.29.ffn_norm.weightF32[4096]
-
blk.29.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.30.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.30.attn_norm.weightF32[4096]
-
blk.30.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.30.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.30.attn_v.weightQ5_K[4096, 1024]
-
blk.30.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.30.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.30.ffn_norm.weightF32[4096]
-
blk.30.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
blk.31.attn_k.weight(!unknown_type 20!)[4096, 1024]
-
blk.31.attn_norm.weightF32[4096]
-
blk.31.attn_output.weight(!unknown_type 20!)[4096, 4096]
-
blk.31.attn_q.weight(!unknown_type 20!)[4096, 4096]
-
blk.31.attn_v.weightQ5_K[4096, 1024]
-
blk.31.ffn_down.weight(!unknown_type 20!)[14336, 4096]
-
blk.31.ffn_gate.weight(!unknown_type 20!)[4096, 14336]
-
blk.31.ffn_norm.weightF32[4096]
-
blk.31.ffn_up.weight(!unknown_type 20!)[4096, 14336]
-
output.weightQ6_K[4096, 128256]
-
output_norm.weightF32[4096]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31