latest
7.6GB
phi3 mini 4k instruct model and perform continual pretraining using data extracted from the "Reflextion LLM" research paper.
3B
3 Pulls Updated 2 days ago
ace958f36426 · 7.6GB
-
general.architecturellama
-
general.file_typeF16
-
llama.attention.head_count32
-
llama.attention.head_count_kv32
-
llama.attention.key_length96
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.attention.value_length96
-
llama.block_count32
-
llama.context_length4096
-
llama.embedding_length3072
-
llama.feed_forward_length8192
-
llama.rope.dimension_count96
-
llama.rope.freq_base10000
-
llama.vocab_size32064
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32009
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000 -1000 -1000 0 0 ...]
-
tokenizer.ggml.token_type[3 3 4 6 6 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <0x00> <0x01> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[3072 32064]
-
blk.0.attn_q.weightF16[3072 3072]
-
blk.0.attn_k.weightF16[3072 3072]
-
blk.0.attn_v.weightF16[3072 3072]
-
blk.0.attn_output.weightF16[3072 3072]
-
blk.0.ffn_gate.weightF16[3072 8192]
-
blk.0.ffn_up.weightF16[3072 8192]
-
blk.0.ffn_down.weightF16[8192 3072]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.1.attn_q.weightF16[3072 3072]
-
blk.1.attn_k.weightF16[3072 3072]
-
blk.1.attn_v.weightF16[3072 3072]
-
blk.1.attn_output.weightF16[3072 3072]
-
blk.1.ffn_gate.weightF16[3072 8192]
-
blk.1.ffn_up.weightF16[3072 8192]
-
blk.1.ffn_down.weightF16[8192 3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.2.attn_q.weightF16[3072 3072]
-
blk.2.attn_k.weightF16[3072 3072]
-
blk.2.attn_v.weightF16[3072 3072]
-
blk.2.attn_output.weightF16[3072 3072]
-
blk.2.ffn_gate.weightF16[3072 8192]
-
blk.2.ffn_up.weightF16[3072 8192]
-
blk.2.ffn_down.weightF16[8192 3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.3.attn_q.weightF16[3072 3072]
-
blk.3.attn_k.weightF16[3072 3072]
-
blk.3.attn_v.weightF16[3072 3072]
-
blk.3.attn_output.weightF16[3072 3072]
-
blk.3.ffn_gate.weightF16[3072 8192]
-
blk.3.ffn_up.weightF16[3072 8192]
-
blk.3.ffn_down.weightF16[8192 3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.4.attn_q.weightF16[3072 3072]
-
blk.4.attn_k.weightF16[3072 3072]
-
blk.4.attn_v.weightF16[3072 3072]
-
blk.4.attn_output.weightF16[3072 3072]
-
blk.4.ffn_gate.weightF16[3072 8192]
-
blk.4.ffn_up.weightF16[3072 8192]
-
blk.4.ffn_down.weightF16[8192 3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.5.attn_q.weightF16[3072 3072]
-
blk.5.attn_k.weightF16[3072 3072]
-
blk.5.attn_v.weightF16[3072 3072]
-
blk.5.attn_output.weightF16[3072 3072]
-
blk.5.ffn_gate.weightF16[3072 8192]
-
blk.5.ffn_up.weightF16[3072 8192]
-
blk.5.ffn_down.weightF16[8192 3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.6.attn_q.weightF16[3072 3072]
-
blk.6.attn_k.weightF16[3072 3072]
-
blk.6.attn_v.weightF16[3072 3072]
-
blk.6.attn_output.weightF16[3072 3072]
-
blk.6.ffn_gate.weightF16[3072 8192]
-
blk.6.ffn_up.weightF16[3072 8192]
-
blk.6.ffn_down.weightF16[8192 3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.7.attn_q.weightF16[3072 3072]
-
blk.7.attn_k.weightF16[3072 3072]
-
blk.7.attn_v.weightF16[3072 3072]
-
blk.7.attn_output.weightF16[3072 3072]
-
blk.7.ffn_gate.weightF16[3072 8192]
-
blk.7.ffn_up.weightF16[3072 8192]
-
blk.7.ffn_down.weightF16[8192 3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.8.attn_q.weightF16[3072 3072]
-
blk.8.attn_k.weightF16[3072 3072]
-
blk.8.attn_v.weightF16[3072 3072]
-
blk.8.attn_output.weightF16[3072 3072]
-
blk.8.ffn_gate.weightF16[3072 8192]
-
blk.8.ffn_up.weightF16[3072 8192]
-
blk.8.ffn_down.weightF16[8192 3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.9.attn_q.weightF16[3072 3072]
-
blk.9.attn_k.weightF16[3072 3072]
-
blk.9.attn_v.weightF16[3072 3072]
-
blk.9.attn_output.weightF16[3072 3072]
-
blk.9.ffn_gate.weightF16[3072 8192]
-
blk.9.ffn_up.weightF16[3072 8192]
-
blk.9.ffn_down.weightF16[8192 3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.10.attn_q.weightF16[3072 3072]
-
blk.10.attn_k.weightF16[3072 3072]
-
blk.10.attn_v.weightF16[3072 3072]
-
blk.10.attn_output.weightF16[3072 3072]
-
blk.10.ffn_gate.weightF16[3072 8192]
-
blk.10.ffn_up.weightF16[3072 8192]
-
blk.10.ffn_down.weightF16[8192 3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.11.attn_q.weightF16[3072 3072]
-
blk.11.attn_k.weightF16[3072 3072]
-
blk.11.attn_v.weightF16[3072 3072]
-
blk.11.attn_output.weightF16[3072 3072]
-
blk.11.ffn_gate.weightF16[3072 8192]
-
blk.11.ffn_up.weightF16[3072 8192]
-
blk.11.ffn_down.weightF16[8192 3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.12.attn_q.weightF16[3072 3072]
-
blk.12.attn_k.weightF16[3072 3072]
-
blk.12.attn_v.weightF16[3072 3072]
-
blk.12.attn_output.weightF16[3072 3072]
-
blk.12.ffn_gate.weightF16[3072 8192]
-
blk.12.ffn_up.weightF16[3072 8192]
-
blk.12.ffn_down.weightF16[8192 3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.13.attn_q.weightF16[3072 3072]
-
blk.13.attn_k.weightF16[3072 3072]
-
blk.13.attn_v.weightF16[3072 3072]
-
blk.13.attn_output.weightF16[3072 3072]
-
blk.13.ffn_gate.weightF16[3072 8192]
-
blk.13.ffn_up.weightF16[3072 8192]
-
blk.13.ffn_down.weightF16[8192 3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.14.attn_q.weightF16[3072 3072]
-
blk.14.attn_k.weightF16[3072 3072]
-
blk.14.attn_v.weightF16[3072 3072]
-
blk.14.attn_output.weightF16[3072 3072]
-
blk.14.ffn_gate.weightF16[3072 8192]
-
blk.14.ffn_up.weightF16[3072 8192]
-
blk.14.ffn_down.weightF16[8192 3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.15.attn_q.weightF16[3072 3072]
-
blk.15.attn_k.weightF16[3072 3072]
-
blk.15.attn_v.weightF16[3072 3072]
-
blk.15.attn_output.weightF16[3072 3072]
-
blk.15.ffn_gate.weightF16[3072 8192]
-
blk.15.ffn_up.weightF16[3072 8192]
-
blk.15.ffn_down.weightF16[8192 3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.16.attn_q.weightF16[3072 3072]
-
blk.16.attn_k.weightF16[3072 3072]
-
blk.16.attn_v.weightF16[3072 3072]
-
blk.16.attn_output.weightF16[3072 3072]
-
blk.16.ffn_gate.weightF16[3072 8192]
-
blk.16.ffn_up.weightF16[3072 8192]
-
blk.16.ffn_down.weightF16[8192 3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.17.attn_q.weightF16[3072 3072]
-
blk.17.attn_k.weightF16[3072 3072]
-
blk.17.attn_v.weightF16[3072 3072]
-
blk.17.attn_output.weightF16[3072 3072]
-
blk.17.ffn_gate.weightF16[3072 8192]
-
blk.17.ffn_up.weightF16[3072 8192]
-
blk.17.ffn_down.weightF16[8192 3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.18.attn_q.weightF16[3072 3072]
-
blk.18.attn_k.weightF16[3072 3072]
-
blk.18.attn_v.weightF16[3072 3072]
-
blk.18.attn_output.weightF16[3072 3072]
-
blk.18.ffn_gate.weightF16[3072 8192]
-
blk.18.ffn_up.weightF16[3072 8192]
-
blk.18.ffn_down.weightF16[8192 3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.19.attn_q.weightF16[3072 3072]
-
blk.19.attn_k.weightF16[3072 3072]
-
blk.19.attn_v.weightF16[3072 3072]
-
blk.19.attn_output.weightF16[3072 3072]
-
blk.19.ffn_gate.weightF16[3072 8192]
-
blk.19.ffn_up.weightF16[3072 8192]
-
blk.19.ffn_down.weightF16[8192 3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.20.attn_q.weightF16[3072 3072]
-
blk.20.attn_k.weightF16[3072 3072]
-
blk.20.attn_v.weightF16[3072 3072]
-
blk.20.attn_output.weightF16[3072 3072]
-
blk.20.ffn_gate.weightF16[3072 8192]
-
blk.20.ffn_up.weightF16[3072 8192]
-
blk.20.ffn_down.weightF16[8192 3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.21.attn_q.weightF16[3072 3072]
-
blk.21.attn_k.weightF16[3072 3072]
-
blk.21.attn_v.weightF16[3072 3072]
-
blk.21.attn_output.weightF16[3072 3072]
-
blk.21.ffn_gate.weightF16[3072 8192]
-
blk.21.ffn_up.weightF16[3072 8192]
-
blk.21.ffn_down.weightF16[8192 3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.22.attn_q.weightF16[3072 3072]
-
blk.22.attn_k.weightF16[3072 3072]
-
blk.22.attn_v.weightF16[3072 3072]
-
blk.22.attn_output.weightF16[3072 3072]
-
blk.22.ffn_gate.weightF16[3072 8192]
-
blk.22.ffn_up.weightF16[3072 8192]
-
blk.22.ffn_down.weightF16[8192 3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.23.attn_q.weightF16[3072 3072]
-
blk.23.attn_k.weightF16[3072 3072]
-
blk.23.attn_v.weightF16[3072 3072]
-
blk.23.attn_output.weightF16[3072 3072]
-
blk.23.ffn_gate.weightF16[3072 8192]
-
blk.23.ffn_up.weightF16[3072 8192]
-
blk.23.ffn_down.weightF16[8192 3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.24.attn_q.weightF16[3072 3072]
-
blk.24.attn_k.weightF16[3072 3072]
-
blk.24.attn_v.weightF16[3072 3072]
-
blk.24.attn_output.weightF16[3072 3072]
-
blk.24.ffn_gate.weightF16[3072 8192]
-
blk.24.ffn_up.weightF16[3072 8192]
-
blk.24.ffn_down.weightF16[8192 3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.25.attn_q.weightF16[3072 3072]
-
blk.25.attn_k.weightF16[3072 3072]
-
blk.25.attn_v.weightF16[3072 3072]
-
blk.25.attn_output.weightF16[3072 3072]
-
blk.25.ffn_gate.weightF16[3072 8192]
-
blk.25.ffn_up.weightF16[3072 8192]
-
blk.25.ffn_down.weightF16[8192 3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.26.attn_q.weightF16[3072 3072]
-
blk.26.attn_k.weightF16[3072 3072]
-
blk.26.attn_v.weightF16[3072 3072]
-
blk.26.attn_output.weightF16[3072 3072]
-
blk.26.ffn_gate.weightF16[3072 8192]
-
blk.26.ffn_up.weightF16[3072 8192]
-
blk.26.ffn_down.weightF16[8192 3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.27.attn_q.weightF16[3072 3072]
-
blk.27.attn_k.weightF16[3072 3072]
-
blk.27.attn_v.weightF16[3072 3072]
-
blk.27.attn_output.weightF16[3072 3072]
-
blk.27.ffn_gate.weightF16[3072 8192]
-
blk.27.ffn_up.weightF16[3072 8192]
-
blk.27.ffn_down.weightF16[8192 3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.28.attn_q.weightF16[3072 3072]
-
blk.28.attn_k.weightF16[3072 3072]
-
blk.28.attn_v.weightF16[3072 3072]
-
blk.28.attn_output.weightF16[3072 3072]
-
blk.28.ffn_gate.weightF16[3072 8192]
-
blk.28.ffn_up.weightF16[3072 8192]
-
blk.28.ffn_down.weightF16[8192 3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.29.attn_q.weightF16[3072 3072]
-
blk.29.attn_k.weightF16[3072 3072]
-
blk.29.attn_v.weightF16[3072 3072]
-
blk.29.attn_output.weightF16[3072 3072]
-
blk.29.ffn_gate.weightF16[3072 8192]
-
blk.29.ffn_up.weightF16[3072 8192]
-
blk.29.ffn_down.weightF16[8192 3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.30.attn_q.weightF16[3072 3072]
-
blk.30.attn_k.weightF16[3072 3072]
-
blk.30.attn_v.weightF16[3072 3072]
-
blk.30.attn_output.weightF16[3072 3072]
-
blk.30.ffn_gate.weightF16[3072 8192]
-
blk.30.ffn_up.weightF16[3072 8192]
-
blk.30.ffn_down.weightF16[8192 3072]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.31.attn_q.weightF16[3072 3072]
-
blk.31.attn_k.weightF16[3072 3072]
-
blk.31.attn_v.weightF16[3072 3072]
-
blk.31.attn_output.weightF16[3072 3072]
-
blk.31.ffn_gate.weightF16[3072 8192]
-
blk.31.ffn_up.weightF16[3072 8192]
-
blk.31.ffn_down.weightF16[8192 3072]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.ffn_norm.weightF32[3072]
-
output_norm.weightF32[3072]
-
output.weightF16[3072 32064]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31