Phi-3-mini-4K-instruct with CPO-SimPO
36 Pulls Updated 4 months ago
2db19146ce27 · 2.1GB
-
general.architecturephi3
-
general.file_type30
-
general.namePhi3
-
general.quantization_version2
-
phi3.attention.head_count32
-
phi3.attention.head_count_kv32
-
phi3.attention.layer_norm_rms_epsilon1e-05
-
phi3.block_count32
-
phi3.context_length4096
-
phi3.embedding_length3072
-
phi3.feed_forward_length8192
-
phi3.rope.dimension_count96
-
phi3.rope.freq_base10000
-
phi3.rope.scaling.original_context_length4096
-
quantize.imatrix.chunks_count80
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count128
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32000
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, 0, 0, ...]
-
tokenizer.ggml.token_type[3, 3, 4, 6, 6, ...]
-
tokenizer.ggml.tokens[<unk>, <s>, </s>, <0x00>, <0x01>, ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weight(!unknown_type 23!)[3072, 32064]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.0.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.0.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.1.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.1.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.2.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.2.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.3.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.3.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.4.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.4.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.5.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.5.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.6.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.6.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.7.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.7.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.8.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.8.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.9.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.9.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.10.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.10.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.11.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.11.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.12.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.12.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.13.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.13.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.14.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.14.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.15.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.15.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.16.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.16.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.17.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.17.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.18.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.18.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.19.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.19.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.20.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.20.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.21.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.21.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.22.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.22.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.23.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.23.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.24.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.24.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.25.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.25.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.26.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.26.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.27.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.27.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.28.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.28.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.29.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.29.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.30.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.30.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.attn_output.weight(!unknown_type 23!)[3072, 3072]
-
blk.31.attn_qkv.weight(!unknown_type 23!)[3072, 9216]
-
blk.31.ffn_down.weight(!unknown_type 23!)[8192, 3072]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.ffn_up.weight(!unknown_type 23!)[3072, 16384]
-
output.weightQ6_K[3072, 32064]
-
output_norm.weightF32[3072]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31