latest
4.1GB
Phi-3 Mini 4K fine-tuned on verbalized rebus solving in Italian
3B
Updated 6 weeks ago
6e0b838a1d81 · 4.1GB
-
general.architecturellama
-
general.file_typeQ8_0
-
llama.attention.head_count32
-
llama.attention.head_count_kv32
-
llama.attention.key_length96
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.attention.value_length96
-
llama.block_count32
-
llama.context_length4096
-
llama.embedding_length3072
-
llama.feed_forward_length8192
-
llama.rope.dimension_count96
-
llama.rope.freq_base10000
-
llama.vocab_size32064
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32009
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000 -1000 -1000 0 0 ...]
-
tokenizer.ggml.token_type[3 3 4 6 6 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <0x00> <0x01> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ8_0[3072 32064]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_down.weightQ8_0[8192 3072]
-
blk.0.ffn_gate.weightQ8_0[3072 8192]
-
blk.0.ffn_up.weightQ8_0[3072 8192]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.attn_k.weightQ8_0[3072 3072]
-
blk.0.attn_output.weightQ8_0[3072 3072]
-
blk.0.attn_q.weightQ8_0[3072 3072]
-
blk.0.attn_v.weightQ8_0[3072 3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_down.weightQ8_0[8192 3072]
-
blk.1.ffn_gate.weightQ8_0[3072 8192]
-
blk.1.ffn_up.weightQ8_0[3072 8192]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.attn_k.weightQ8_0[3072 3072]
-
blk.1.attn_output.weightQ8_0[3072 3072]
-
blk.1.attn_q.weightQ8_0[3072 3072]
-
blk.1.attn_v.weightQ8_0[3072 3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_down.weightQ8_0[8192 3072]
-
blk.2.ffn_gate.weightQ8_0[3072 8192]
-
blk.2.ffn_up.weightQ8_0[3072 8192]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.attn_k.weightQ8_0[3072 3072]
-
blk.2.attn_output.weightQ8_0[3072 3072]
-
blk.2.attn_q.weightQ8_0[3072 3072]
-
blk.2.attn_v.weightQ8_0[3072 3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_down.weightQ8_0[8192 3072]
-
blk.3.ffn_gate.weightQ8_0[3072 8192]
-
blk.3.ffn_up.weightQ8_0[3072 8192]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.attn_k.weightQ8_0[3072 3072]
-
blk.3.attn_output.weightQ8_0[3072 3072]
-
blk.3.attn_q.weightQ8_0[3072 3072]
-
blk.3.attn_v.weightQ8_0[3072 3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_down.weightQ8_0[8192 3072]
-
blk.4.ffn_gate.weightQ8_0[3072 8192]
-
blk.4.ffn_up.weightQ8_0[3072 8192]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.attn_k.weightQ8_0[3072 3072]
-
blk.4.attn_output.weightQ8_0[3072 3072]
-
blk.4.attn_q.weightQ8_0[3072 3072]
-
blk.4.attn_v.weightQ8_0[3072 3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_down.weightQ8_0[8192 3072]
-
blk.5.ffn_gate.weightQ8_0[3072 8192]
-
blk.5.ffn_up.weightQ8_0[3072 8192]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.attn_k.weightQ8_0[3072 3072]
-
blk.5.attn_output.weightQ8_0[3072 3072]
-
blk.5.attn_q.weightQ8_0[3072 3072]
-
blk.5.attn_v.weightQ8_0[3072 3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_down.weightQ8_0[8192 3072]
-
blk.6.ffn_gate.weightQ8_0[3072 8192]
-
blk.6.ffn_up.weightQ8_0[3072 8192]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.attn_k.weightQ8_0[3072 3072]
-
blk.6.attn_output.weightQ8_0[3072 3072]
-
blk.6.attn_q.weightQ8_0[3072 3072]
-
blk.6.attn_v.weightQ8_0[3072 3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_down.weightQ8_0[8192 3072]
-
blk.7.ffn_gate.weightQ8_0[3072 8192]
-
blk.7.ffn_up.weightQ8_0[3072 8192]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.attn_k.weightQ8_0[3072 3072]
-
blk.7.attn_output.weightQ8_0[3072 3072]
-
blk.7.attn_q.weightQ8_0[3072 3072]
-
blk.7.attn_v.weightQ8_0[3072 3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_down.weightQ8_0[8192 3072]
-
blk.8.ffn_gate.weightQ8_0[3072 8192]
-
blk.8.ffn_up.weightQ8_0[3072 8192]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.attn_k.weightQ8_0[3072 3072]
-
blk.8.attn_output.weightQ8_0[3072 3072]
-
blk.8.attn_q.weightQ8_0[3072 3072]
-
blk.8.attn_v.weightQ8_0[3072 3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_down.weightQ8_0[8192 3072]
-
blk.9.ffn_gate.weightQ8_0[3072 8192]
-
blk.9.ffn_up.weightQ8_0[3072 8192]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.attn_k.weightQ8_0[3072 3072]
-
blk.9.attn_output.weightQ8_0[3072 3072]
-
blk.9.attn_q.weightQ8_0[3072 3072]
-
blk.9.attn_v.weightQ8_0[3072 3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_down.weightQ8_0[8192 3072]
-
blk.10.ffn_gate.weightQ8_0[3072 8192]
-
blk.10.ffn_up.weightQ8_0[3072 8192]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.attn_k.weightQ8_0[3072 3072]
-
blk.10.attn_output.weightQ8_0[3072 3072]
-
blk.10.attn_q.weightQ8_0[3072 3072]
-
blk.10.attn_v.weightQ8_0[3072 3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_down.weightQ8_0[8192 3072]
-
blk.11.ffn_gate.weightQ8_0[3072 8192]
-
blk.11.ffn_up.weightQ8_0[3072 8192]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.attn_k.weightQ8_0[3072 3072]
-
blk.11.attn_output.weightQ8_0[3072 3072]
-
blk.11.attn_q.weightQ8_0[3072 3072]
-
blk.11.attn_v.weightQ8_0[3072 3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_down.weightQ8_0[8192 3072]
-
blk.12.ffn_gate.weightQ8_0[3072 8192]
-
blk.12.ffn_up.weightQ8_0[3072 8192]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.attn_k.weightQ8_0[3072 3072]
-
blk.12.attn_output.weightQ8_0[3072 3072]
-
blk.12.attn_q.weightQ8_0[3072 3072]
-
blk.12.attn_v.weightQ8_0[3072 3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_down.weightQ8_0[8192 3072]
-
blk.13.ffn_gate.weightQ8_0[3072 8192]
-
blk.13.ffn_up.weightQ8_0[3072 8192]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.attn_k.weightQ8_0[3072 3072]
-
blk.13.attn_output.weightQ8_0[3072 3072]
-
blk.13.attn_q.weightQ8_0[3072 3072]
-
blk.13.attn_v.weightQ8_0[3072 3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_down.weightQ8_0[8192 3072]
-
blk.14.ffn_gate.weightQ8_0[3072 8192]
-
blk.14.ffn_up.weightQ8_0[3072 8192]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.attn_k.weightQ8_0[3072 3072]
-
blk.14.attn_output.weightQ8_0[3072 3072]
-
blk.14.attn_q.weightQ8_0[3072 3072]
-
blk.14.attn_v.weightQ8_0[3072 3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_down.weightQ8_0[8192 3072]
-
blk.15.ffn_gate.weightQ8_0[3072 8192]
-
blk.15.ffn_up.weightQ8_0[3072 8192]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.attn_k.weightQ8_0[3072 3072]
-
blk.15.attn_output.weightQ8_0[3072 3072]
-
blk.15.attn_q.weightQ8_0[3072 3072]
-
blk.15.attn_v.weightQ8_0[3072 3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_down.weightQ8_0[8192 3072]
-
blk.16.ffn_gate.weightQ8_0[3072 8192]
-
blk.16.ffn_up.weightQ8_0[3072 8192]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.attn_k.weightQ8_0[3072 3072]
-
blk.16.attn_output.weightQ8_0[3072 3072]
-
blk.16.attn_q.weightQ8_0[3072 3072]
-
blk.16.attn_v.weightQ8_0[3072 3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_down.weightQ8_0[8192 3072]
-
blk.17.ffn_gate.weightQ8_0[3072 8192]
-
blk.17.ffn_up.weightQ8_0[3072 8192]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.attn_k.weightQ8_0[3072 3072]
-
blk.17.attn_output.weightQ8_0[3072 3072]
-
blk.17.attn_q.weightQ8_0[3072 3072]
-
blk.17.attn_v.weightQ8_0[3072 3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_down.weightQ8_0[8192 3072]
-
blk.18.ffn_gate.weightQ8_0[3072 8192]
-
blk.18.ffn_up.weightQ8_0[3072 8192]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.attn_k.weightQ8_0[3072 3072]
-
blk.18.attn_output.weightQ8_0[3072 3072]
-
blk.18.attn_q.weightQ8_0[3072 3072]
-
blk.18.attn_v.weightQ8_0[3072 3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_down.weightQ8_0[8192 3072]
-
blk.19.ffn_gate.weightQ8_0[3072 8192]
-
blk.19.ffn_up.weightQ8_0[3072 8192]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.attn_k.weightQ8_0[3072 3072]
-
blk.19.attn_output.weightQ8_0[3072 3072]
-
blk.19.attn_q.weightQ8_0[3072 3072]
-
blk.19.attn_v.weightQ8_0[3072 3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_down.weightQ8_0[8192 3072]
-
blk.20.ffn_gate.weightQ8_0[3072 8192]
-
blk.20.ffn_up.weightQ8_0[3072 8192]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.attn_k.weightQ8_0[3072 3072]
-
blk.20.attn_output.weightQ8_0[3072 3072]
-
blk.20.attn_q.weightQ8_0[3072 3072]
-
blk.20.attn_v.weightQ8_0[3072 3072]
-
blk.21.attn_k.weightQ8_0[3072 3072]
-
blk.21.attn_q.weightQ8_0[3072 3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_down.weightQ8_0[8192 3072]
-
blk.21.ffn_gate.weightQ8_0[3072 8192]
-
blk.21.ffn_up.weightQ8_0[3072 8192]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.attn_output.weightQ8_0[3072 3072]
-
blk.21.attn_v.weightQ8_0[3072 3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_down.weightQ8_0[8192 3072]
-
blk.22.ffn_gate.weightQ8_0[3072 8192]
-
blk.22.ffn_up.weightQ8_0[3072 8192]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.attn_k.weightQ8_0[3072 3072]
-
blk.22.attn_output.weightQ8_0[3072 3072]
-
blk.22.attn_q.weightQ8_0[3072 3072]
-
blk.22.attn_v.weightQ8_0[3072 3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_down.weightQ8_0[8192 3072]
-
blk.23.ffn_gate.weightQ8_0[3072 8192]
-
blk.23.ffn_up.weightQ8_0[3072 8192]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.attn_k.weightQ8_0[3072 3072]
-
blk.23.attn_output.weightQ8_0[3072 3072]
-
blk.23.attn_q.weightQ8_0[3072 3072]
-
blk.23.attn_v.weightQ8_0[3072 3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_down.weightQ8_0[8192 3072]
-
blk.24.ffn_gate.weightQ8_0[3072 8192]
-
blk.24.ffn_up.weightQ8_0[3072 8192]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.attn_k.weightQ8_0[3072 3072]
-
blk.24.attn_output.weightQ8_0[3072 3072]
-
blk.24.attn_q.weightQ8_0[3072 3072]
-
blk.24.attn_v.weightQ8_0[3072 3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_down.weightQ8_0[8192 3072]
-
blk.25.ffn_gate.weightQ8_0[3072 8192]
-
blk.25.ffn_up.weightQ8_0[3072 8192]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.attn_k.weightQ8_0[3072 3072]
-
blk.25.attn_output.weightQ8_0[3072 3072]
-
blk.25.attn_q.weightQ8_0[3072 3072]
-
blk.25.attn_v.weightQ8_0[3072 3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_down.weightQ8_0[8192 3072]
-
blk.26.ffn_gate.weightQ8_0[3072 8192]
-
blk.26.ffn_up.weightQ8_0[3072 8192]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.attn_k.weightQ8_0[3072 3072]
-
blk.26.attn_output.weightQ8_0[3072 3072]
-
blk.26.attn_q.weightQ8_0[3072 3072]
-
blk.26.attn_v.weightQ8_0[3072 3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_down.weightQ8_0[8192 3072]
-
blk.27.ffn_gate.weightQ8_0[3072 8192]
-
blk.27.ffn_up.weightQ8_0[3072 8192]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.attn_k.weightQ8_0[3072 3072]
-
blk.27.attn_output.weightQ8_0[3072 3072]
-
blk.27.attn_q.weightQ8_0[3072 3072]
-
blk.27.attn_v.weightQ8_0[3072 3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_down.weightQ8_0[8192 3072]
-
blk.28.ffn_gate.weightQ8_0[3072 8192]
-
blk.28.ffn_up.weightQ8_0[3072 8192]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.attn_k.weightQ8_0[3072 3072]
-
blk.28.attn_output.weightQ8_0[3072 3072]
-
blk.28.attn_q.weightQ8_0[3072 3072]
-
blk.28.attn_v.weightQ8_0[3072 3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_down.weightQ8_0[8192 3072]
-
blk.29.ffn_gate.weightQ8_0[3072 8192]
-
blk.29.ffn_up.weightQ8_0[3072 8192]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.attn_k.weightQ8_0[3072 3072]
-
blk.29.attn_output.weightQ8_0[3072 3072]
-
blk.29.attn_q.weightQ8_0[3072 3072]
-
blk.29.attn_v.weightQ8_0[3072 3072]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.ffn_down.weightQ8_0[8192 3072]
-
blk.30.ffn_gate.weightQ8_0[3072 8192]
-
blk.30.ffn_up.weightQ8_0[3072 8192]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.attn_k.weightQ8_0[3072 3072]
-
blk.30.attn_output.weightQ8_0[3072 3072]
-
blk.30.attn_q.weightQ8_0[3072 3072]
-
blk.30.attn_v.weightQ8_0[3072 3072]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.ffn_down.weightQ8_0[8192 3072]
-
blk.31.ffn_gate.weightQ8_0[3072 8192]
-
blk.31.ffn_up.weightQ8_0[3072 8192]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.attn_k.weightQ8_0[3072 3072]
-
blk.31.attn_output.weightQ8_0[3072 3072]
-
blk.31.attn_q.weightQ8_0[3072 3072]
-
blk.31.attn_v.weightQ8_0[3072 3072]
-
output.weightQ8_0[3072 32064]
-
output_norm.weightF32[3072]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31