Phi-3-mini-4K-instruct with CPO-SimPO
36 Pulls Updated 4 months ago
84ade86b61b7 · 1.7GB
-
general.architecturephi3
-
general.file_type26
-
general.namePhi3
-
general.quantization_version2
-
phi3.attention.head_count32
-
phi3.attention.head_count_kv32
-
phi3.attention.layer_norm_rms_epsilon1e-05
-
phi3.block_count32
-
phi3.context_length4096
-
phi3.embedding_length3072
-
phi3.feed_forward_length8192
-
phi3.rope.dimension_count96
-
phi3.rope.freq_base10000
-
phi3.rope.scaling.original_context_length4096
-
quantize.imatrix.chunks_count80
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count128
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32000
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, 0, 0, ...]
-
tokenizer.ggml.token_type[3, 3, 4, 6, 6, ...]
-
tokenizer.ggml.tokens[<unk>, <s>, </s>, <0x00>, <0x01>, ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[3072, 32064]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.0.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.0.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.1.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.1.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.2.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.2.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.3.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.3.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.4.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.4.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.5.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.5.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.6.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.6.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.7.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.7.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.8.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.8.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.9.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.9.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.10.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.10.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.11.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.11.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.12.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.12.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.13.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.13.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.14.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.14.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.15.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.15.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.16.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.16.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.17.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.17.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.18.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.18.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.19.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.19.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.20.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.20.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.21.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.21.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.22.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.22.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.23.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.23.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.24.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.24.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.25.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.25.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.26.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.26.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.27.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.27.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.28.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.28.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.29.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.29.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.30.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.30.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.31.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.31.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
output.weightQ6_K[3072, 32064]
-
output_norm.weightF32[3072]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31