Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
unclemusclez
/
smollm-135m-instruct-devinator
:v2-q6_K
41
Downloads
Updated
12 months ago
SmolLM 135M Instruct Trained on DEVINator Data for Open Hands (Open Devin)
SmolLM 135M Instruct Trained on DEVINator Data for Open Hands (Open Devin)
Cancel
smollm-135m-instruct-devinator:v2-q6_K
...
/
model
d42736f27f42 · 138MB
Metadata
general.architecture
llama
llama
general.file_type
Q6_K
Q6_K
llama.attention.head_count
9
9
llama.attention.head_count_kv
3
3
llama.attention.key_length
64
64
llama.attention.layer_norm_rms_epsilon
1e-05
1e-05
llama.attention.value_length
64
64
llama.block_count
30
30
llama.context_length
2048
2048
llama.embedding_length
576
576
llama.feed_forward_length
1536
1536
llama.rope.dimension_count
64
64
llama.rope.freq_base
10000
10000
llama.vocab_size
49152
49152
tokenizer.ggml.add_bos_token
false
false
tokenizer.ggml.add_space_prefix
false
false
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
2
2
tokenizer.ggml.merges
[Ġ t, Ġ a, i n, h e, Ġ Ġ, ...]
[Ġ t, Ġ a, i n, h e, Ġ Ġ, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
2
2
tokenizer.ggml.pre
smollm-it-devin
smollm-it-devin
tokenizer.ggml.token_type
[3, 3, 3, 3, 3, ...]
[3, 3, 3, 3, 3, ...]
tokenizer.ggml.tokens
[<|endoftext|>, <|im_start|>, <|im_end|>, <repo_name>, <reponame>, ...]
[<|endoftext|>, <|im_start|>, <|im_end|>, <repo_name>, <reponame>, ...]
tokenizer.ggml.unknown_token_id
0
0
Tensor
Name
Type
Shape
token_embd.weight
Q8_0
Q8_0
[576, 49152]
blk.0
blk.0.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.0.attn_norm.weight
F32
F32
[576]
blk.0.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.0.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.0.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.0.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.0.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.0.ffn_norm.weight
F32
F32
[576]
blk.0.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.1
blk.1.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.1.attn_norm.weight
F32
F32
[576]
blk.1.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.1.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.1.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.1.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.1.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.1.ffn_norm.weight
F32
F32
[576]
blk.1.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.2
blk.2.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.2.attn_norm.weight
F32
F32
[576]
blk.2.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.2.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.2.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.2.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.2.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.2.ffn_norm.weight
F32
F32
[576]
blk.2.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.3
blk.3.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.3.attn_norm.weight
F32
F32
[576]
blk.3.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.3.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.3.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.3.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.3.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.3.ffn_norm.weight
F32
F32
[576]
blk.3.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.4
blk.4.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.4.attn_norm.weight
F32
F32
[576]
blk.4.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.4.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.4.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.4.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.4.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.4.ffn_norm.weight
F32
F32
[576]
blk.4.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.5
blk.5.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.5.attn_norm.weight
F32
F32
[576]
blk.5.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.5.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.5.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.5.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.5.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.5.ffn_norm.weight
F32
F32
[576]
blk.5.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.6
blk.6.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.6.attn_norm.weight
F32
F32
[576]
blk.6.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.6.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.6.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.6.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.6.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.6.ffn_norm.weight
F32
F32
[576]
blk.6.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.7
blk.7.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.7.attn_norm.weight
F32
F32
[576]
blk.7.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.7.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.7.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.7.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.7.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.7.ffn_norm.weight
F32
F32
[576]
blk.7.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.8
blk.8.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.8.attn_norm.weight
F32
F32
[576]
blk.8.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.8.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.8.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.8.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.8.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.8.ffn_norm.weight
F32
F32
[576]
blk.8.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.9
blk.9.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.9.attn_norm.weight
F32
F32
[576]
blk.9.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.9.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.9.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.9.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.9.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.9.ffn_norm.weight
F32
F32
[576]
blk.9.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.10
blk.10.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.10.attn_norm.weight
F32
F32
[576]
blk.10.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.10.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.10.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.10.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.10.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.10.ffn_norm.weight
F32
F32
[576]
blk.10.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.11
blk.11.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.11.attn_norm.weight
F32
F32
[576]
blk.11.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.11.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.11.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.11.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.11.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.11.ffn_norm.weight
F32
F32
[576]
blk.11.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.12
blk.12.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.12.attn_norm.weight
F32
F32
[576]
blk.12.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.12.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.12.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.12.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.12.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.12.ffn_norm.weight
F32
F32
[576]
blk.12.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.13
blk.13.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.13.attn_norm.weight
F32
F32
[576]
blk.13.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.13.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.13.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.13.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.13.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.13.ffn_norm.weight
F32
F32
[576]
blk.13.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.14
blk.14.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.14.attn_norm.weight
F32
F32
[576]
blk.14.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.14.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.14.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.14.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.14.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.14.ffn_norm.weight
F32
F32
[576]
blk.14.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.15
blk.15.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.15.attn_norm.weight
F32
F32
[576]
blk.15.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.15.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.15.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.15.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.15.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.15.ffn_norm.weight
F32
F32
[576]
blk.15.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.16
blk.16.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.16.attn_norm.weight
F32
F32
[576]
blk.16.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.16.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.16.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.16.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.16.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.16.ffn_norm.weight
F32
F32
[576]
blk.16.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.17
blk.17.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.17.attn_norm.weight
F32
F32
[576]
blk.17.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.17.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.17.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.17.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.17.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.17.ffn_norm.weight
F32
F32
[576]
blk.17.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.18
blk.18.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.18.attn_norm.weight
F32
F32
[576]
blk.18.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.18.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.18.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.18.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.18.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.18.ffn_norm.weight
F32
F32
[576]
blk.18.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.19
blk.19.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.19.attn_norm.weight
F32
F32
[576]
blk.19.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.19.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.19.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.19.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.19.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.19.ffn_norm.weight
F32
F32
[576]
blk.19.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.20
blk.20.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.20.attn_norm.weight
F32
F32
[576]
blk.20.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.20.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.20.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.20.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.20.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.20.ffn_norm.weight
F32
F32
[576]
blk.20.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.21
blk.21.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.21.attn_norm.weight
F32
F32
[576]
blk.21.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.21.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.21.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.21.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.21.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.21.ffn_norm.weight
F32
F32
[576]
blk.21.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.22
blk.22.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.22.attn_norm.weight
F32
F32
[576]
blk.22.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.22.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.22.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.22.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.22.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.22.ffn_norm.weight
F32
F32
[576]
blk.22.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.23
blk.23.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.23.attn_norm.weight
F32
F32
[576]
blk.23.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.23.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.23.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.23.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.23.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.23.ffn_norm.weight
F32
F32
[576]
blk.23.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.24
blk.24.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.24.attn_norm.weight
F32
F32
[576]
blk.24.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.24.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.24.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.24.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.24.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.24.ffn_norm.weight
F32
F32
[576]
blk.24.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.25
blk.25.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.25.attn_norm.weight
F32
F32
[576]
blk.25.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.25.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.25.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.25.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.25.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.25.ffn_norm.weight
F32
F32
[576]
blk.25.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.26
blk.26.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.26.attn_norm.weight
F32
F32
[576]
blk.26.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.26.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.26.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.26.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.26.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.26.ffn_norm.weight
F32
F32
[576]
blk.26.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.27
blk.27.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.27.attn_norm.weight
F32
F32
[576]
blk.27.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.27.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.27.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.27.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.27.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.27.ffn_norm.weight
F32
F32
[576]
blk.27.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.28
blk.28.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.28.attn_norm.weight
F32
F32
[576]
blk.28.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.28.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.28.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.28.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.28.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.28.ffn_norm.weight
F32
F32
[576]
blk.28.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
blk.29
blk.29.attn_k.weight
Q8_0
Q8_0
[576, 192]
blk.29.attn_norm.weight
F32
F32
[576]
blk.29.attn_output.weight
Q8_0
Q8_0
[576, 576]
blk.29.attn_q.weight
Q8_0
Q8_0
[576, 576]
blk.29.attn_v.weight
Q8_0
Q8_0
[576, 192]
blk.29.ffn_down.weight
Q6_K
Q6_K
[1536, 576]
blk.29.ffn_gate.weight
Q8_0
Q8_0
[576, 1536]
blk.29.ffn_norm.weight
F32
F32
[576]
blk.29.ffn_up.weight
Q8_0
Q8_0
[576, 1536]
output_norm.weight
F32
F32
[576]