SmolLM 135M Instruct Trained on DEVINator Data for Open Hands (Open Devin)
13 Pulls Updated 8 weeks ago
0796c48783a8 · 88MB
-
general.architecturellama
-
general.file_type11
-
general.quantization_version2
-
llama.attention.head_count9
-
llama.attention.head_count_kv3
-
llama.attention.key_length64
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.attention.value_length64
-
llama.block_count30
-
llama.context_length2048
-
llama.embedding_length576
-
llama.feed_forward_length1536
-
llama.rope.dimension_count64
-
llama.rope.freq_base10000
-
llama.vocab_size49152
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_padding_tokenfalse
-
tokenizer.ggml.add_unknown_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id2
-
tokenizer.ggml.merges[Ġ t, Ġ a, i n, h e, Ġ Ġ, ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id2
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[0, 1, 2, 3, 4, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<|endoftext|>, <|im_start|>, <|im_end|>, <repo_name>, <reponame>, ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ8_0[576, 49152]
-
blk.0.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.0.attn_norm.weightF32[576]
-
blk.0.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.0.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.0.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.0.ffn_down.weightQ3_K[1536, 576]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.0.ffn_norm.weightF32[576]
-
blk.0.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.1.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.1.attn_norm.weightF32[576]
-
blk.1.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.1.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.1.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.1.ffn_down.weightQ3_K[1536, 576]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.1.ffn_norm.weightF32[576]
-
blk.1.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.2.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.2.attn_norm.weightF32[576]
-
blk.2.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.2.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.2.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.2.ffn_down.weightQ3_K[1536, 576]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.2.ffn_norm.weightF32[576]
-
blk.2.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.3.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.3.attn_norm.weightF32[576]
-
blk.3.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.3.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.3.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.3.ffn_down.weightQ3_K[1536, 576]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.3.ffn_norm.weightF32[576]
-
blk.3.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.4.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.4.attn_norm.weightF32[576]
-
blk.4.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.4.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.4.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.4.ffn_down.weightQ3_K[1536, 576]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.4.ffn_norm.weightF32[576]
-
blk.4.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.5.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.5.attn_norm.weightF32[576]
-
blk.5.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.5.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.5.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.5.ffn_down.weightQ3_K[1536, 576]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.5.ffn_norm.weightF32[576]
-
blk.5.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.6.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.6.attn_norm.weightF32[576]
-
blk.6.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.6.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.6.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.6.ffn_down.weightQ3_K[1536, 576]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.6.ffn_norm.weightF32[576]
-
blk.6.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.7.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.7.attn_norm.weightF32[576]
-
blk.7.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.7.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.7.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.7.ffn_down.weightQ3_K[1536, 576]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.7.ffn_norm.weightF32[576]
-
blk.7.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.8.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.8.attn_norm.weightF32[576]
-
blk.8.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.8.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.8.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.8.ffn_down.weightQ3_K[1536, 576]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.8.ffn_norm.weightF32[576]
-
blk.8.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.9.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.9.attn_norm.weightF32[576]
-
blk.9.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.9.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.9.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.9.ffn_down.weightQ3_K[1536, 576]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.9.ffn_norm.weightF32[576]
-
blk.9.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.10.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.10.attn_norm.weightF32[576]
-
blk.10.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.10.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.10.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.10.ffn_down.weightQ3_K[1536, 576]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.10.ffn_norm.weightF32[576]
-
blk.10.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.11.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.11.attn_norm.weightF32[576]
-
blk.11.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.11.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.11.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.11.ffn_down.weightQ3_K[1536, 576]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.11.ffn_norm.weightF32[576]
-
blk.11.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.12.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.12.attn_norm.weightF32[576]
-
blk.12.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.12.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.12.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.12.ffn_down.weightQ3_K[1536, 576]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.12.ffn_norm.weightF32[576]
-
blk.12.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.13.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.13.attn_norm.weightF32[576]
-
blk.13.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.13.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.13.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.13.ffn_down.weightQ3_K[1536, 576]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.13.ffn_norm.weightF32[576]
-
blk.13.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.14.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.14.attn_norm.weightF32[576]
-
blk.14.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.14.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.14.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.14.ffn_down.weightQ3_K[1536, 576]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.14.ffn_norm.weightF32[576]
-
blk.14.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.15.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.15.attn_norm.weightF32[576]
-
blk.15.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.15.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.15.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.15.ffn_down.weightQ3_K[1536, 576]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.15.ffn_norm.weightF32[576]
-
blk.15.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.16.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.16.attn_norm.weightF32[576]
-
blk.16.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.16.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.16.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.16.ffn_down.weightQ3_K[1536, 576]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.16.ffn_norm.weightF32[576]
-
blk.16.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.17.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.17.attn_norm.weightF32[576]
-
blk.17.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.17.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.17.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.17.ffn_down.weightQ3_K[1536, 576]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.17.ffn_norm.weightF32[576]
-
blk.17.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.18.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.18.attn_norm.weightF32[576]
-
blk.18.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.18.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.18.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.18.ffn_down.weightQ3_K[1536, 576]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.18.ffn_norm.weightF32[576]
-
blk.18.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.19.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.19.attn_norm.weightF32[576]
-
blk.19.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.19.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.19.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.19.ffn_down.weightQ3_K[1536, 576]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.19.ffn_norm.weightF32[576]
-
blk.19.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.20.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.20.attn_norm.weightF32[576]
-
blk.20.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.20.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.20.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.20.ffn_down.weightQ3_K[1536, 576]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.20.ffn_norm.weightF32[576]
-
blk.20.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.21.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.21.attn_norm.weightF32[576]
-
blk.21.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.21.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.21.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.21.ffn_down.weightQ3_K[1536, 576]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.21.ffn_norm.weightF32[576]
-
blk.21.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.22.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.22.attn_norm.weightF32[576]
-
blk.22.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.22.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.22.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.22.ffn_down.weightQ3_K[1536, 576]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.22.ffn_norm.weightF32[576]
-
blk.22.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.23.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.23.attn_norm.weightF32[576]
-
blk.23.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.23.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.23.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.23.ffn_down.weightQ3_K[1536, 576]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.23.ffn_norm.weightF32[576]
-
blk.23.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.24.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.24.attn_norm.weightF32[576]
-
blk.24.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.24.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.24.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.24.ffn_down.weightQ3_K[1536, 576]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.24.ffn_norm.weightF32[576]
-
blk.24.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.25.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.25.attn_norm.weightF32[576]
-
blk.25.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.25.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.25.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.25.ffn_down.weightQ3_K[1536, 576]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.25.ffn_norm.weightF32[576]
-
blk.25.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.26.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.26.attn_norm.weightF32[576]
-
blk.26.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.26.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.26.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.26.ffn_down.weightQ3_K[1536, 576]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.26.ffn_norm.weightF32[576]
-
blk.26.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.27.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.27.attn_norm.weightF32[576]
-
blk.27.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.27.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.27.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.27.ffn_down.weightQ3_K[1536, 576]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.27.ffn_norm.weightF32[576]
-
blk.27.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.28.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.28.attn_norm.weightF32[576]
-
blk.28.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.28.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.28.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.28.ffn_down.weightQ3_K[1536, 576]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.28.ffn_norm.weightF32[576]
-
blk.28.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
blk.29.attn_k.weight(!unknown_type 20!)[576, 192]
-
blk.29.attn_norm.weightF32[576]
-
blk.29.attn_output.weight(!unknown_type 20!)[576, 576]
-
blk.29.attn_q.weight(!unknown_type 20!)[576, 576]
-
blk.29.attn_v.weight(!unknown_type 20!)[576, 192]
-
blk.29.ffn_down.weightQ3_K[1536, 576]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[576, 1536]
-
blk.29.ffn_norm.weightF32[576]
-
blk.29.ffn_up.weight(!unknown_type 20!)[576, 1536]
-
output_norm.weightF32[576]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29