Phi-3-mini-4K-instruct with CPO-SimPO
36 Pulls Updated 4 months ago
f2d4c49f15f5 · 1.6GB
-
general.architecturephi3
-
general.file_type22
-
general.namePhi3
-
general.quantization_version2
-
phi3.attention.head_count32
-
phi3.attention.head_count_kv32
-
phi3.attention.layer_norm_rms_epsilon1e-05
-
phi3.block_count32
-
phi3.context_length4096
-
phi3.embedding_length3072
-
phi3.feed_forward_length8192
-
phi3.rope.dimension_count96
-
phi3.rope.freq_base10000
-
phi3.rope.scaling.original_context_length4096
-
quantize.imatrix.chunks_count80
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count128
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32000
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, 0, 0, ...]
-
tokenizer.ggml.token_type[3, 3, 4, 6, 6, ...]
-
tokenizer.ggml.tokens[<unk>, <s>, </s>, <0x00>, <0x01>, ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weight(!unknown_type 21!)[3072, 32064]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.0.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.0.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.1.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.1.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.2.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.2.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.ffn_up.weightI32[3072, 16384]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.3.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.3.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.ffn_up.weightI32[3072, 16384]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.4.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.4.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.ffn_up.weightI32[3072, 16384]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.5.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.5.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.ffn_up.weightI32[3072, 16384]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.6.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.6.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.ffn_up.weightI32[3072, 16384]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.7.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.7.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.ffn_up.weightI32[3072, 16384]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.8.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.8.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.ffn_up.weightI32[3072, 16384]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.9.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.9.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.ffn_up.weightI32[3072, 16384]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.10.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.10.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.11.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.11.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.12.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.12.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.ffn_up.weightI32[3072, 16384]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.13.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.13.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.ffn_up.weightI32[3072, 16384]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.14.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.14.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.ffn_up.weightI32[3072, 16384]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.15.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.15.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.ffn_up.weightI32[3072, 16384]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.16.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.16.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.ffn_up.weightI32[3072, 16384]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.17.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.17.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.ffn_up.weightI32[3072, 16384]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.18.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.18.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.ffn_up.weightI32[3072, 16384]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.19.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.19.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.ffn_up.weightI32[3072, 16384]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.20.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.20.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.ffn_up.weightI32[3072, 16384]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.21.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.21.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.ffn_up.weightI32[3072, 16384]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.22.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.22.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.ffn_up.weightI32[3072, 16384]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.23.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.23.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.ffn_up.weightI32[3072, 16384]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.24.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.24.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.ffn_up.weightI32[3072, 16384]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.25.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.25.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.ffn_up.weightI32[3072, 16384]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.26.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.26.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.ffn_up.weightI32[3072, 16384]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.27.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.27.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.ffn_up.weightI32[3072, 16384]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.28.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.28.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.29.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.29.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.30.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.30.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.attn_output.weight(!unknown_type 21!)[3072, 3072]
-
blk.31.attn_qkv.weight(!unknown_type 21!)[3072, 9216]
-
blk.31.ffn_down.weight(!unknown_type 21!)[8192, 3072]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.ffn_up.weight(!unknown_type 21!)[3072, 16384]
-
output.weightQ6_K[3072, 32064]
-
output_norm.weightF32[3072]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31