latest
2.3GB
Fine Tuning Phi3:mini on a private datase
3B
2 Pulls Updated 2 months ago
b82497a73f55 · 2.3GB
-
general.architecturellama
-
general.file_typeQ4_K_M
-
llama.attention.head_count32
-
llama.attention.head_count_kv32
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length4096
-
llama.embedding_length3072
-
llama.feed_forward_length8192
-
llama.rope.dimension_count96
-
llama.rope.freq_base10000
-
llama.vocab_size32064
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32009
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000 -1000 -1000 0 0 ...]
-
tokenizer.ggml.token_type[3 3 4 6 6 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <0x00> <0x01> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ4_K[3072 32064]
-
blk.0.attn_q.weightQ4_K[3072 3072]
-
blk.0.attn_k.weightQ4_K[3072 3072]
-
blk.0.attn_v.weightQ6_K[3072 3072]
-
blk.0.attn_output.weightQ4_K[3072 3072]
-
blk.0.ffn_gate.weightQ4_K[3072 8192]
-
blk.0.ffn_up.weightQ4_K[3072 8192]
-
blk.0.ffn_down.weightQ6_K[8192 3072]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.1.attn_q.weightQ4_K[3072 3072]
-
blk.1.attn_k.weightQ4_K[3072 3072]
-
blk.1.attn_v.weightQ6_K[3072 3072]
-
blk.1.attn_output.weightQ4_K[3072 3072]
-
blk.1.ffn_gate.weightQ4_K[3072 8192]
-
blk.1.ffn_up.weightQ4_K[3072 8192]
-
blk.1.ffn_down.weightQ6_K[8192 3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.2.attn_q.weightQ4_K[3072 3072]
-
blk.2.attn_k.weightQ4_K[3072 3072]
-
blk.2.attn_v.weightQ6_K[3072 3072]
-
blk.2.attn_output.weightQ4_K[3072 3072]
-
blk.2.ffn_gate.weightQ4_K[3072 8192]
-
blk.2.ffn_up.weightQ4_K[3072 8192]
-
blk.2.ffn_down.weightQ6_K[8192 3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.3.attn_q.weightQ4_K[3072 3072]
-
blk.3.attn_k.weightQ4_K[3072 3072]
-
blk.3.attn_v.weightQ6_K[3072 3072]
-
blk.3.attn_output.weightQ4_K[3072 3072]
-
blk.3.ffn_gate.weightQ4_K[3072 8192]
-
blk.3.ffn_up.weightQ4_K[3072 8192]
-
blk.3.ffn_down.weightQ6_K[8192 3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.4.attn_q.weightQ4_K[3072 3072]
-
blk.4.attn_k.weightQ4_K[3072 3072]
-
blk.4.attn_v.weightQ4_K[3072 3072]
-
blk.4.attn_output.weightQ4_K[3072 3072]
-
blk.4.ffn_gate.weightQ4_K[3072 8192]
-
blk.4.ffn_up.weightQ4_K[3072 8192]
-
blk.4.ffn_down.weightQ4_K[8192 3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.5.attn_q.weightQ4_K[3072 3072]
-
blk.5.attn_k.weightQ4_K[3072 3072]
-
blk.5.attn_v.weightQ4_K[3072 3072]
-
blk.5.attn_output.weightQ4_K[3072 3072]
-
blk.5.ffn_gate.weightQ4_K[3072 8192]
-
blk.5.ffn_up.weightQ4_K[3072 8192]
-
blk.5.ffn_down.weightQ4_K[8192 3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.6.attn_q.weightQ4_K[3072 3072]
-
blk.6.attn_k.weightQ4_K[3072 3072]
-
blk.6.attn_v.weightQ6_K[3072 3072]
-
blk.6.attn_output.weightQ4_K[3072 3072]
-
blk.6.ffn_gate.weightQ4_K[3072 8192]
-
blk.6.ffn_up.weightQ4_K[3072 8192]
-
blk.6.ffn_down.weightQ6_K[8192 3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.7.attn_q.weightQ4_K[3072 3072]
-
blk.7.attn_k.weightQ4_K[3072 3072]
-
blk.7.attn_v.weightQ4_K[3072 3072]
-
blk.7.attn_output.weightQ4_K[3072 3072]
-
blk.7.ffn_gate.weightQ4_K[3072 8192]
-
blk.7.ffn_up.weightQ4_K[3072 8192]
-
blk.7.ffn_down.weightQ4_K[8192 3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.8.attn_q.weightQ4_K[3072 3072]
-
blk.8.attn_k.weightQ4_K[3072 3072]
-
blk.8.attn_v.weightQ4_K[3072 3072]
-
blk.8.attn_output.weightQ4_K[3072 3072]
-
blk.8.ffn_gate.weightQ4_K[3072 8192]
-
blk.8.ffn_up.weightQ4_K[3072 8192]
-
blk.8.ffn_down.weightQ4_K[8192 3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.9.attn_q.weightQ4_K[3072 3072]
-
blk.9.attn_k.weightQ4_K[3072 3072]
-
blk.9.attn_v.weightQ6_K[3072 3072]
-
blk.9.attn_output.weightQ4_K[3072 3072]
-
blk.9.ffn_gate.weightQ4_K[3072 8192]
-
blk.9.ffn_up.weightQ4_K[3072 8192]
-
blk.9.ffn_down.weightQ6_K[8192 3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.10.attn_q.weightQ4_K[3072 3072]
-
blk.10.attn_k.weightQ4_K[3072 3072]
-
blk.10.attn_v.weightQ4_K[3072 3072]
-
blk.10.attn_output.weightQ4_K[3072 3072]
-
blk.10.ffn_gate.weightQ4_K[3072 8192]
-
blk.10.ffn_up.weightQ4_K[3072 8192]
-
blk.10.ffn_down.weightQ4_K[8192 3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.11.attn_q.weightQ4_K[3072 3072]
-
blk.11.attn_k.weightQ4_K[3072 3072]
-
blk.11.attn_v.weightQ4_K[3072 3072]
-
blk.11.attn_output.weightQ4_K[3072 3072]
-
blk.11.ffn_gate.weightQ4_K[3072 8192]
-
blk.11.ffn_up.weightQ4_K[3072 8192]
-
blk.11.ffn_down.weightQ4_K[8192 3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.12.attn_q.weightQ4_K[3072 3072]
-
blk.12.attn_k.weightQ4_K[3072 3072]
-
blk.12.attn_v.weightQ6_K[3072 3072]
-
blk.12.attn_output.weightQ4_K[3072 3072]
-
blk.12.ffn_gate.weightQ4_K[3072 8192]
-
blk.12.ffn_up.weightQ4_K[3072 8192]
-
blk.12.ffn_down.weightQ6_K[8192 3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.13.attn_q.weightQ4_K[3072 3072]
-
blk.13.attn_k.weightQ4_K[3072 3072]
-
blk.13.attn_v.weightQ4_K[3072 3072]
-
blk.13.attn_output.weightQ4_K[3072 3072]
-
blk.13.ffn_gate.weightQ4_K[3072 8192]
-
blk.13.ffn_up.weightQ4_K[3072 8192]
-
blk.13.ffn_down.weightQ4_K[8192 3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.14.attn_q.weightQ4_K[3072 3072]
-
blk.14.attn_k.weightQ4_K[3072 3072]
-
blk.14.attn_v.weightQ4_K[3072 3072]
-
blk.14.attn_output.weightQ4_K[3072 3072]
-
blk.14.ffn_gate.weightQ4_K[3072 8192]
-
blk.14.ffn_up.weightQ4_K[3072 8192]
-
blk.14.ffn_down.weightQ4_K[8192 3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.15.attn_q.weightQ4_K[3072 3072]
-
blk.15.attn_k.weightQ4_K[3072 3072]
-
blk.15.attn_v.weightQ6_K[3072 3072]
-
blk.15.attn_output.weightQ4_K[3072 3072]
-
blk.15.ffn_gate.weightQ4_K[3072 8192]
-
blk.15.ffn_up.weightQ4_K[3072 8192]
-
blk.15.ffn_down.weightQ6_K[8192 3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.16.attn_q.weightQ4_K[3072 3072]
-
blk.16.attn_k.weightQ4_K[3072 3072]
-
blk.16.attn_v.weightQ4_K[3072 3072]
-
blk.16.attn_output.weightQ4_K[3072 3072]
-
blk.16.ffn_gate.weightQ4_K[3072 8192]
-
blk.16.ffn_up.weightQ4_K[3072 8192]
-
blk.16.ffn_down.weightQ4_K[8192 3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.17.attn_q.weightQ4_K[3072 3072]
-
blk.17.attn_k.weightQ4_K[3072 3072]
-
blk.17.attn_v.weightQ4_K[3072 3072]
-
blk.17.attn_output.weightQ4_K[3072 3072]
-
blk.17.ffn_gate.weightQ4_K[3072 8192]
-
blk.17.ffn_up.weightQ4_K[3072 8192]
-
blk.17.ffn_down.weightQ4_K[8192 3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.18.attn_q.weightQ4_K[3072 3072]
-
blk.18.attn_k.weightQ4_K[3072 3072]
-
blk.18.attn_v.weightQ6_K[3072 3072]
-
blk.18.attn_output.weightQ4_K[3072 3072]
-
blk.18.ffn_gate.weightQ4_K[3072 8192]
-
blk.18.ffn_up.weightQ4_K[3072 8192]
-
blk.18.ffn_down.weightQ6_K[8192 3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.19.attn_q.weightQ4_K[3072 3072]
-
blk.19.attn_k.weightQ4_K[3072 3072]
-
blk.19.attn_v.weightQ4_K[3072 3072]
-
blk.19.attn_output.weightQ4_K[3072 3072]
-
blk.19.ffn_gate.weightQ4_K[3072 8192]
-
blk.19.ffn_up.weightQ4_K[3072 8192]
-
blk.19.ffn_down.weightQ4_K[8192 3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.20.attn_q.weightQ4_K[3072 3072]
-
blk.20.attn_k.weightQ4_K[3072 3072]
-
blk.20.attn_v.weightQ4_K[3072 3072]
-
blk.20.attn_output.weightQ4_K[3072 3072]
-
blk.20.ffn_gate.weightQ4_K[3072 8192]
-
blk.20.ffn_up.weightQ4_K[3072 8192]
-
blk.20.ffn_down.weightQ4_K[8192 3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.21.attn_q.weightQ4_K[3072 3072]
-
blk.21.attn_k.weightQ4_K[3072 3072]
-
blk.21.attn_v.weightQ6_K[3072 3072]
-
blk.21.attn_output.weightQ4_K[3072 3072]
-
blk.21.ffn_gate.weightQ4_K[3072 8192]
-
blk.21.ffn_up.weightQ4_K[3072 8192]
-
blk.21.ffn_down.weightQ6_K[8192 3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.22.attn_q.weightQ4_K[3072 3072]
-
blk.22.attn_k.weightQ4_K[3072 3072]
-
blk.22.attn_v.weightQ4_K[3072 3072]
-
blk.22.attn_output.weightQ4_K[3072 3072]
-
blk.22.ffn_gate.weightQ4_K[3072 8192]
-
blk.22.ffn_up.weightQ4_K[3072 8192]
-
blk.22.ffn_down.weightQ4_K[8192 3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.23.attn_q.weightQ4_K[3072 3072]
-
blk.23.attn_k.weightQ4_K[3072 3072]
-
blk.23.attn_v.weightQ4_K[3072 3072]
-
blk.23.attn_output.weightQ4_K[3072 3072]
-
blk.23.ffn_gate.weightQ4_K[3072 8192]
-
blk.23.ffn_up.weightQ4_K[3072 8192]
-
blk.23.ffn_down.weightQ4_K[8192 3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.24.attn_q.weightQ4_K[3072 3072]
-
blk.24.attn_k.weightQ4_K[3072 3072]
-
blk.24.attn_v.weightQ6_K[3072 3072]
-
blk.24.attn_output.weightQ4_K[3072 3072]
-
blk.24.ffn_gate.weightQ4_K[3072 8192]
-
blk.24.ffn_up.weightQ4_K[3072 8192]
-
blk.24.ffn_down.weightQ6_K[8192 3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.25.attn_q.weightQ4_K[3072 3072]
-
blk.25.attn_k.weightQ4_K[3072 3072]
-
blk.25.attn_v.weightQ4_K[3072 3072]
-
blk.25.attn_output.weightQ4_K[3072 3072]
-
blk.25.ffn_gate.weightQ4_K[3072 8192]
-
blk.25.ffn_up.weightQ4_K[3072 8192]
-
blk.25.ffn_down.weightQ4_K[8192 3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.26.attn_q.weightQ4_K[3072 3072]
-
blk.26.attn_k.weightQ4_K[3072 3072]
-
blk.26.attn_v.weightQ4_K[3072 3072]
-
blk.26.attn_output.weightQ4_K[3072 3072]
-
blk.26.ffn_gate.weightQ4_K[3072 8192]
-
blk.26.ffn_up.weightQ4_K[3072 8192]
-
blk.26.ffn_down.weightQ4_K[8192 3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.27.attn_q.weightQ4_K[3072 3072]
-
blk.27.attn_k.weightQ4_K[3072 3072]
-
blk.27.attn_v.weightQ6_K[3072 3072]
-
blk.27.attn_output.weightQ4_K[3072 3072]
-
blk.27.ffn_gate.weightQ4_K[3072 8192]
-
blk.27.ffn_up.weightQ4_K[3072 8192]
-
blk.27.ffn_down.weightQ6_K[8192 3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.28.attn_q.weightQ4_K[3072 3072]
-
blk.28.attn_k.weightQ4_K[3072 3072]
-
blk.28.attn_v.weightQ6_K[3072 3072]
-
blk.28.attn_output.weightQ4_K[3072 3072]
-
blk.28.ffn_gate.weightQ4_K[3072 8192]
-
blk.28.ffn_up.weightQ4_K[3072 8192]
-
blk.28.ffn_down.weightQ6_K[8192 3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.29.attn_q.weightQ4_K[3072 3072]
-
blk.29.attn_k.weightQ4_K[3072 3072]
-
blk.29.attn_v.weightQ6_K[3072 3072]
-
blk.29.attn_output.weightQ4_K[3072 3072]
-
blk.29.ffn_gate.weightQ4_K[3072 8192]
-
blk.29.ffn_up.weightQ4_K[3072 8192]
-
blk.29.ffn_down.weightQ6_K[8192 3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.30.attn_q.weightQ4_K[3072 3072]
-
blk.30.attn_k.weightQ4_K[3072 3072]
-
blk.30.attn_v.weightQ6_K[3072 3072]
-
blk.30.attn_output.weightQ4_K[3072 3072]
-
blk.30.ffn_gate.weightQ4_K[3072 8192]
-
blk.30.ffn_up.weightQ4_K[3072 8192]
-
blk.30.ffn_down.weightQ6_K[8192 3072]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.31.attn_q.weightQ4_K[3072 3072]
-
blk.31.attn_k.weightQ4_K[3072 3072]
-
blk.31.attn_v.weightQ6_K[3072 3072]
-
blk.31.attn_output.weightQ4_K[3072 3072]
-
blk.31.ffn_gate.weightQ4_K[3072 8192]
-
blk.31.ffn_up.weightQ4_K[3072 8192]
-
blk.31.ffn_down.weightQ6_K[8192 3072]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.ffn_norm.weightF32[3072]
-
output_norm.weightF32[3072]
-
output.weightQ6_K[3072 32064]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31