latest
2.0GB
A 3B parameter GPT-like model fine-tuned on a mix of publicly available datasets using DPO.
153 Pulls Updated 7 months ago
6acee498f64c · 2.0GB
-
general.architecturestablelm
-
general.file_typeQ5_K_M
-
stablelm.attention.head_count32
-
stablelm.attention.layer_norm_epsilon1e-05
-
stablelm.block_count32
-
stablelm.context_length4096
-
stablelm.embedding_length2560
-
stablelm.feed_forward_length6912
-
stablelm.rope.dimension_count20
-
stablelm.use_parallel_residualtrue
-
tokenizer.ggml.bos_token_id0
-
tokenizer.ggml.eos_token_id50279
-
tokenizer.ggml.merges[Ġ Ġ Ġ t Ġ a h e i n ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.token_type[3 3 1 1 1 ...]
-
tokenizer.ggml.tokens[<|endoftext|> <|padding|> ! " # ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ5_K[2560 50304]
-
blk.0.attn_norm.biasF32[2560]
-
blk.0.attn_norm.weightF32[2560]
-
blk.0.ffn_down.weightQ6_K[6912 2560]
-
blk.0.ffn_gate.weightQ5_K[2560 6912]
-
blk.0.ffn_up.weightQ5_K[2560 6912]
-
blk.0.ffn_norm.biasF32[2560]
-
blk.0.ffn_norm.weightF32[2560]
-
blk.0.attn_k.weightQ5_K[2560 2560]
-
blk.0.attn_output.weightQ5_K[2560 2560]
-
blk.0.attn_q.weightQ5_K[2560 2560]
-
blk.0.attn_v.weightQ6_K[2560 2560]
-
blk.1.attn_norm.biasF32[2560]
-
blk.1.attn_norm.weightF32[2560]
-
blk.1.ffn_down.weightQ6_K[6912 2560]
-
blk.1.ffn_gate.weightQ5_K[2560 6912]
-
blk.1.ffn_up.weightQ5_K[2560 6912]
-
blk.1.ffn_norm.biasF32[2560]
-
blk.1.ffn_norm.weightF32[2560]
-
blk.1.attn_k.weightQ5_K[2560 2560]
-
blk.1.attn_output.weightQ5_K[2560 2560]
-
blk.1.attn_q.weightQ5_K[2560 2560]
-
blk.1.attn_v.weightQ6_K[2560 2560]
-
blk.2.attn_norm.biasF32[2560]
-
blk.2.attn_norm.weightF32[2560]
-
blk.2.ffn_down.weightQ6_K[6912 2560]
-
blk.2.ffn_gate.weightQ5_K[2560 6912]
-
blk.2.ffn_up.weightQ5_K[2560 6912]
-
blk.2.ffn_norm.biasF32[2560]
-
blk.2.ffn_norm.weightF32[2560]
-
blk.2.attn_k.weightQ5_K[2560 2560]
-
blk.2.attn_output.weightQ5_K[2560 2560]
-
blk.2.attn_q.weightQ5_K[2560 2560]
-
blk.2.attn_v.weightQ6_K[2560 2560]
-
blk.3.attn_norm.biasF32[2560]
-
blk.3.attn_norm.weightF32[2560]
-
blk.3.ffn_down.weightQ5_K[6912 2560]
-
blk.3.ffn_gate.weightQ5_K[2560 6912]
-
blk.3.ffn_up.weightQ5_K[2560 6912]
-
blk.3.ffn_norm.biasF32[2560]
-
blk.3.ffn_norm.weightF32[2560]
-
blk.3.attn_k.weightQ5_K[2560 2560]
-
blk.3.attn_output.weightQ5_K[2560 2560]
-
blk.3.attn_q.weightQ5_K[2560 2560]
-
blk.3.attn_v.weightQ5_K[2560 2560]
-
blk.4.attn_norm.biasF32[2560]
-
blk.4.attn_norm.weightF32[2560]
-
blk.4.ffn_down.weightQ5_K[6912 2560]
-
blk.4.ffn_gate.weightQ5_K[2560 6912]
-
blk.4.ffn_up.weightQ5_K[2560 6912]
-
blk.4.ffn_norm.biasF32[2560]
-
blk.4.ffn_norm.weightF32[2560]
-
blk.4.attn_k.weightQ5_K[2560 2560]
-
blk.4.attn_output.weightQ5_K[2560 2560]
-
blk.4.attn_q.weightQ5_K[2560 2560]
-
blk.4.attn_v.weightQ5_K[2560 2560]
-
blk.5.attn_norm.biasF32[2560]
-
blk.5.attn_norm.weightF32[2560]
-
blk.5.ffn_down.weightQ6_K[6912 2560]
-
blk.5.ffn_gate.weightQ5_K[2560 6912]
-
blk.5.ffn_up.weightQ5_K[2560 6912]
-
blk.5.ffn_norm.biasF32[2560]
-
blk.5.ffn_norm.weightF32[2560]
-
blk.5.attn_k.weightQ5_K[2560 2560]
-
blk.5.attn_output.weightQ5_K[2560 2560]
-
blk.5.attn_q.weightQ5_K[2560 2560]
-
blk.5.attn_v.weightQ6_K[2560 2560]
-
blk.6.attn_norm.biasF32[2560]
-
blk.6.attn_norm.weightF32[2560]
-
blk.6.ffn_down.weightQ6_K[6912 2560]
-
blk.6.ffn_gate.weightQ5_K[2560 6912]
-
blk.6.ffn_up.weightQ5_K[2560 6912]
-
blk.6.ffn_norm.biasF32[2560]
-
blk.6.ffn_norm.weightF32[2560]
-
blk.6.attn_k.weightQ5_K[2560 2560]
-
blk.6.attn_output.weightQ5_K[2560 2560]
-
blk.6.attn_q.weightQ5_K[2560 2560]
-
blk.6.attn_v.weightQ6_K[2560 2560]
-
blk.7.attn_norm.biasF32[2560]
-
blk.7.attn_norm.weightF32[2560]
-
blk.7.ffn_down.weightQ6_K[6912 2560]
-
blk.7.ffn_gate.weightQ5_K[2560 6912]
-
blk.7.ffn_up.weightQ5_K[2560 6912]
-
blk.7.ffn_norm.biasF32[2560]
-
blk.7.ffn_norm.weightF32[2560]
-
blk.7.attn_k.weightQ5_K[2560 2560]
-
blk.7.attn_output.weightQ5_K[2560 2560]
-
blk.7.attn_q.weightQ5_K[2560 2560]
-
blk.7.attn_v.weightQ6_K[2560 2560]
-
blk.8.attn_norm.biasF32[2560]
-
blk.8.attn_norm.weightF32[2560]
-
blk.8.ffn_down.weightQ6_K[6912 2560]
-
blk.8.ffn_gate.weightQ5_K[2560 6912]
-
blk.8.ffn_up.weightQ5_K[2560 6912]
-
blk.8.ffn_norm.biasF32[2560]
-
blk.8.ffn_norm.weightF32[2560]
-
blk.8.attn_k.weightQ5_K[2560 2560]
-
blk.8.attn_output.weightQ5_K[2560 2560]
-
blk.8.attn_q.weightQ5_K[2560 2560]
-
blk.8.attn_v.weightQ6_K[2560 2560]
-
blk.9.attn_norm.biasF32[2560]
-
blk.9.attn_norm.weightF32[2560]
-
blk.9.ffn_down.weightQ6_K[6912 2560]
-
blk.9.ffn_gate.weightQ5_K[2560 6912]
-
blk.9.ffn_up.weightQ5_K[2560 6912]
-
blk.9.ffn_norm.biasF32[2560]
-
blk.9.ffn_norm.weightF32[2560]
-
blk.9.attn_k.weightQ5_K[2560 2560]
-
blk.9.attn_output.weightQ5_K[2560 2560]
-
blk.9.attn_q.weightQ5_K[2560 2560]
-
blk.9.attn_v.weightQ6_K[2560 2560]
-
blk.10.attn_norm.biasF32[2560]
-
blk.10.attn_norm.weightF32[2560]
-
blk.10.ffn_down.weightQ6_K[6912 2560]
-
blk.10.ffn_gate.weightQ5_K[2560 6912]
-
blk.10.ffn_up.weightQ5_K[2560 6912]
-
blk.10.ffn_norm.biasF32[2560]
-
blk.10.ffn_norm.weightF32[2560]
-
blk.10.attn_k.weightQ5_K[2560 2560]
-
blk.10.attn_output.weightQ5_K[2560 2560]
-
blk.10.attn_q.weightQ5_K[2560 2560]
-
blk.10.attn_v.weightQ6_K[2560 2560]
-
blk.11.attn_norm.biasF32[2560]
-
blk.11.attn_norm.weightF32[2560]
-
blk.11.ffn_down.weightQ6_K[6912 2560]
-
blk.11.ffn_gate.weightQ5_K[2560 6912]
-
blk.11.ffn_up.weightQ5_K[2560 6912]
-
blk.11.ffn_norm.biasF32[2560]
-
blk.11.ffn_norm.weightF32[2560]
-
blk.11.attn_k.weightQ5_K[2560 2560]
-
blk.11.attn_output.weightQ5_K[2560 2560]
-
blk.11.attn_q.weightQ5_K[2560 2560]
-
blk.11.attn_v.weightQ6_K[2560 2560]
-
blk.12.attn_norm.biasF32[2560]
-
blk.12.attn_norm.weightF32[2560]
-
blk.12.ffn_down.weightQ5_K[6912 2560]
-
blk.12.ffn_gate.weightQ5_K[2560 6912]
-
blk.12.ffn_up.weightQ5_K[2560 6912]
-
blk.12.ffn_norm.biasF32[2560]
-
blk.12.ffn_norm.weightF32[2560]
-
blk.12.attn_k.weightQ5_K[2560 2560]
-
blk.12.attn_output.weightQ5_K[2560 2560]
-
blk.12.attn_q.weightQ5_K[2560 2560]
-
blk.12.attn_v.weightQ5_K[2560 2560]
-
blk.13.attn_norm.biasF32[2560]
-
blk.13.attn_norm.weightF32[2560]
-
blk.13.ffn_down.weightQ5_K[6912 2560]
-
blk.13.ffn_gate.weightQ5_K[2560 6912]
-
blk.13.ffn_up.weightQ5_K[2560 6912]
-
blk.13.ffn_norm.biasF32[2560]
-
blk.13.ffn_norm.weightF32[2560]
-
blk.13.attn_k.weightQ5_K[2560 2560]
-
blk.13.attn_output.weightQ5_K[2560 2560]
-
blk.13.attn_q.weightQ5_K[2560 2560]
-
blk.13.attn_v.weightQ5_K[2560 2560]
-
blk.14.attn_norm.biasF32[2560]
-
blk.14.attn_norm.weightF32[2560]
-
blk.14.ffn_down.weightQ6_K[6912 2560]
-
blk.14.ffn_gate.weightQ5_K[2560 6912]
-
blk.14.ffn_up.weightQ5_K[2560 6912]
-
blk.14.ffn_norm.biasF32[2560]
-
blk.14.ffn_norm.weightF32[2560]
-
blk.14.attn_k.weightQ5_K[2560 2560]
-
blk.14.attn_output.weightQ5_K[2560 2560]
-
blk.14.attn_q.weightQ5_K[2560 2560]
-
blk.14.attn_v.weightQ6_K[2560 2560]
-
blk.15.attn_norm.biasF32[2560]
-
blk.15.attn_norm.weightF32[2560]
-
blk.15.ffn_down.weightQ5_K[6912 2560]
-
blk.15.ffn_gate.weightQ5_K[2560 6912]
-
blk.15.ffn_up.weightQ5_K[2560 6912]
-
blk.15.ffn_norm.biasF32[2560]
-
blk.15.ffn_norm.weightF32[2560]
-
blk.15.attn_k.weightQ5_K[2560 2560]
-
blk.15.attn_output.weightQ5_K[2560 2560]
-
blk.15.attn_q.weightQ5_K[2560 2560]
-
blk.15.attn_v.weightQ5_K[2560 2560]
-
blk.16.attn_norm.biasF32[2560]
-
blk.16.attn_norm.weightF32[2560]
-
blk.16.ffn_down.weightQ5_K[6912 2560]
-
blk.16.ffn_gate.weightQ5_K[2560 6912]
-
blk.16.ffn_up.weightQ5_K[2560 6912]
-
blk.16.ffn_norm.biasF32[2560]
-
blk.16.ffn_norm.weightF32[2560]
-
blk.16.attn_k.weightQ5_K[2560 2560]
-
blk.16.attn_output.weightQ5_K[2560 2560]
-
blk.16.attn_q.weightQ5_K[2560 2560]
-
blk.16.attn_v.weightQ5_K[2560 2560]
-
blk.17.attn_norm.biasF32[2560]
-
blk.17.attn_norm.weightF32[2560]
-
blk.17.ffn_down.weightQ6_K[6912 2560]
-
blk.17.ffn_gate.weightQ5_K[2560 6912]
-
blk.17.ffn_up.weightQ5_K[2560 6912]
-
blk.17.ffn_norm.biasF32[2560]
-
blk.17.ffn_norm.weightF32[2560]
-
blk.17.attn_k.weightQ5_K[2560 2560]
-
blk.17.attn_output.weightQ5_K[2560 2560]
-
blk.17.attn_q.weightQ5_K[2560 2560]
-
blk.17.attn_v.weightQ6_K[2560 2560]
-
blk.18.attn_norm.biasF32[2560]
-
blk.18.attn_norm.weightF32[2560]
-
blk.18.ffn_down.weightQ5_K[6912 2560]
-
blk.18.ffn_gate.weightQ5_K[2560 6912]
-
blk.18.ffn_up.weightQ5_K[2560 6912]
-
blk.18.ffn_norm.biasF32[2560]
-
blk.18.ffn_norm.weightF32[2560]
-
blk.18.attn_k.weightQ5_K[2560 2560]
-
blk.18.attn_output.weightQ5_K[2560 2560]
-
blk.18.attn_q.weightQ5_K[2560 2560]
-
blk.18.attn_v.weightQ5_K[2560 2560]
-
blk.19.attn_norm.biasF32[2560]
-
blk.19.attn_norm.weightF32[2560]
-
blk.19.ffn_down.weightQ5_K[6912 2560]
-
blk.19.ffn_gate.weightQ5_K[2560 6912]
-
blk.19.ffn_up.weightQ5_K[2560 6912]
-
blk.19.ffn_norm.biasF32[2560]
-
blk.19.ffn_norm.weightF32[2560]
-
blk.19.attn_k.weightQ5_K[2560 2560]
-
blk.19.attn_output.weightQ5_K[2560 2560]
-
blk.19.attn_q.weightQ5_K[2560 2560]
-
blk.19.attn_v.weightQ5_K[2560 2560]
-
blk.20.attn_norm.biasF32[2560]
-
blk.20.attn_norm.weightF32[2560]
-
blk.20.ffn_down.weightQ5_K[6912 2560]
-
blk.20.ffn_gate.weightQ5_K[2560 6912]
-
blk.20.ffn_up.weightQ5_K[2560 6912]
-
blk.20.ffn_norm.biasF32[2560]
-
blk.20.ffn_norm.weightF32[2560]
-
blk.20.attn_k.weightQ5_K[2560 2560]
-
blk.20.attn_output.weightQ5_K[2560 2560]
-
blk.20.attn_q.weightQ5_K[2560 2560]
-
blk.20.attn_v.weightQ5_K[2560 2560]
-
blk.21.attn_norm.biasF32[2560]
-
blk.21.attn_norm.weightF32[2560]
-
blk.21.ffn_down.weightQ5_K[6912 2560]
-
blk.21.ffn_gate.weightQ5_K[2560 6912]
-
blk.21.ffn_up.weightQ5_K[2560 6912]
-
blk.21.ffn_norm.biasF32[2560]
-
blk.21.ffn_norm.weightF32[2560]
-
blk.21.attn_k.weightQ5_K[2560 2560]
-
blk.21.attn_output.weightQ5_K[2560 2560]
-
blk.21.attn_q.weightQ5_K[2560 2560]
-
blk.21.attn_v.weightQ5_K[2560 2560]
-
blk.22.attn_norm.biasF32[2560]
-
blk.22.attn_norm.weightF32[2560]
-
blk.22.ffn_down.weightQ6_K[6912 2560]
-
blk.22.ffn_gate.weightQ5_K[2560 6912]
-
blk.22.ffn_up.weightQ5_K[2560 6912]
-
blk.22.ffn_norm.biasF32[2560]
-
blk.22.ffn_norm.weightF32[2560]
-
blk.22.attn_k.weightQ5_K[2560 2560]
-
blk.22.attn_output.weightQ5_K[2560 2560]
-
blk.22.attn_q.weightQ5_K[2560 2560]
-
blk.22.attn_v.weightQ6_K[2560 2560]
-
blk.23.attn_norm.biasF32[2560]
-
blk.23.attn_norm.weightF32[2560]
-
blk.23.ffn_down.weightQ5_K[6912 2560]
-
blk.23.ffn_gate.weightQ5_K[2560 6912]
-
blk.23.ffn_up.weightQ5_K[2560 6912]
-
blk.23.ffn_norm.biasF32[2560]
-
blk.23.ffn_norm.weightF32[2560]
-
blk.23.attn_k.weightQ5_K[2560 2560]
-
blk.23.attn_output.weightQ5_K[2560 2560]
-
blk.23.attn_q.weightQ5_K[2560 2560]
-
blk.23.attn_v.weightQ5_K[2560 2560]
-
blk.24.attn_norm.biasF32[2560]
-
blk.24.attn_norm.weightF32[2560]
-
blk.24.ffn_down.weightQ5_K[6912 2560]
-
blk.24.ffn_gate.weightQ5_K[2560 6912]
-
blk.24.ffn_up.weightQ5_K[2560 6912]
-
blk.24.ffn_norm.biasF32[2560]
-
blk.24.ffn_norm.weightF32[2560]
-
blk.24.attn_k.weightQ5_K[2560 2560]
-
blk.24.attn_output.weightQ5_K[2560 2560]
-
blk.24.attn_q.weightQ5_K[2560 2560]
-
blk.24.attn_v.weightQ5_K[2560 2560]
-
blk.25.attn_norm.biasF32[2560]
-
blk.25.attn_norm.weightF32[2560]
-
blk.25.ffn_down.weightQ6_K[6912 2560]
-
blk.25.ffn_gate.weightQ5_K[2560 6912]
-
blk.25.ffn_up.weightQ5_K[2560 6912]
-
blk.25.ffn_norm.biasF32[2560]
-
blk.25.ffn_norm.weightF32[2560]
-
blk.25.attn_k.weightQ5_K[2560 2560]
-
blk.25.attn_output.weightQ5_K[2560 2560]
-
blk.25.attn_q.weightQ5_K[2560 2560]
-
blk.25.attn_v.weightQ6_K[2560 2560]
-
blk.26.attn_norm.biasF32[2560]
-
blk.26.attn_norm.weightF32[2560]
-
blk.26.ffn_down.weightQ5_K[6912 2560]
-
blk.26.ffn_gate.weightQ5_K[2560 6912]
-
blk.26.ffn_up.weightQ5_K[2560 6912]
-
blk.26.ffn_norm.biasF32[2560]
-
blk.26.ffn_norm.weightF32[2560]
-
blk.26.attn_k.weightQ5_K[2560 2560]
-
blk.26.attn_output.weightQ5_K[2560 2560]
-
blk.26.attn_q.weightQ5_K[2560 2560]
-
blk.26.attn_v.weightQ5_K[2560 2560]
-
blk.27.attn_norm.biasF32[2560]
-
blk.27.attn_norm.weightF32[2560]
-
blk.27.ffn_down.weightQ5_K[6912 2560]
-
blk.27.ffn_gate.weightQ5_K[2560 6912]
-
blk.27.ffn_up.weightQ5_K[2560 6912]
-
blk.27.ffn_norm.biasF32[2560]
-
blk.27.ffn_norm.weightF32[2560]
-
blk.27.attn_k.weightQ5_K[2560 2560]
-
blk.27.attn_output.weightQ5_K[2560 2560]
-
blk.27.attn_q.weightQ5_K[2560 2560]
-
blk.27.attn_v.weightQ5_K[2560 2560]
-
blk.28.attn_norm.biasF32[2560]
-
blk.28.attn_norm.weightF32[2560]
-
blk.28.ffn_down.weightQ6_K[6912 2560]
-
blk.28.ffn_gate.weightQ5_K[2560 6912]
-
blk.28.ffn_up.weightQ5_K[2560 6912]
-
blk.28.ffn_norm.biasF32[2560]
-
blk.28.ffn_norm.weightF32[2560]
-
blk.28.attn_k.weightQ5_K[2560 2560]
-
blk.28.attn_output.weightQ5_K[2560 2560]
-
blk.28.attn_q.weightQ5_K[2560 2560]
-
blk.28.attn_v.weightQ6_K[2560 2560]
-
blk.29.attn_norm.biasF32[2560]
-
blk.29.attn_norm.weightF32[2560]
-
blk.29.ffn_down.weightQ5_K[6912 2560]
-
blk.29.ffn_gate.weightQ5_K[2560 6912]
-
blk.29.ffn_up.weightQ5_K[2560 6912]
-
blk.29.ffn_norm.biasF32[2560]
-
blk.29.ffn_norm.weightF32[2560]
-
blk.29.attn_k.weightQ5_K[2560 2560]
-
blk.29.attn_output.weightQ5_K[2560 2560]
-
blk.29.attn_q.weightQ5_K[2560 2560]
-
blk.29.attn_v.weightQ5_K[2560 2560]
-
blk.30.attn_norm.biasF32[2560]
-
blk.30.attn_norm.weightF32[2560]
-
blk.30.ffn_down.weightQ6_K[6912 2560]
-
blk.30.ffn_gate.weightQ5_K[2560 6912]
-
blk.30.ffn_up.weightQ5_K[2560 6912]
-
blk.30.ffn_norm.biasF32[2560]
-
blk.30.ffn_norm.weightF32[2560]
-
blk.30.attn_k.weightQ5_K[2560 2560]
-
blk.30.attn_output.weightQ5_K[2560 2560]
-
blk.30.attn_q.weightQ5_K[2560 2560]
-
blk.30.attn_v.weightQ6_K[2560 2560]
-
blk.31.attn_norm.biasF32[2560]
-
blk.31.attn_norm.weightF32[2560]
-
blk.31.ffn_down.weightQ5_K[6912 2560]
-
blk.31.ffn_gate.weightQ5_K[2560 6912]
-
blk.31.ffn_up.weightQ5_K[2560 6912]
-
blk.31.ffn_norm.biasF32[2560]
-
blk.31.ffn_norm.weightF32[2560]
-
blk.31.attn_k.weightQ5_K[2560 2560]
-
blk.31.attn_output.weightQ5_K[2560 2560]
-
blk.31.attn_q.weightQ5_K[2560 2560]
-
blk.31.attn_v.weightQ5_K[2560 2560]
-
output.weightQ6_K[2560 50304]
-
output_norm.biasF32[2560]
-
output_norm.weightF32[2560]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31