Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
unclemusclez
/
smollm-135m-instruct-devinator
:v3-q3_K_L
41
Downloads
Updated
12 months ago
SmolLM 135M Instruct Trained on DEVINator Data for Open Hands (Open Devin)
SmolLM 135M Instruct Trained on DEVINator Data for Open Hands (Open Devin)
Cancel
smollm-135m-instruct-devinator:v3-q3_K_L
...
/
model
22b3589eed0d · 98MB
Metadata
general.architecture
llama
llama
general.file_type
Q3_K_L
Q3_K_L
llama.attention.head_count
9
9
llama.attention.head_count_kv
3
3
llama.attention.key_length
64
64
llama.attention.layer_norm_rms_epsilon
1e-05
1e-05
llama.attention.value_length
64
64
llama.block_count
30
30
llama.context_length
2048
2048
llama.embedding_length
576
576
llama.feed_forward_length
1536
1536
llama.rope.dimension_count
64
64
llama.rope.freq_base
10000
10000
llama.vocab_size
49152
49152
tokenizer.ggml.add_bos_token
false
false
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.add_padding_token
false
false
tokenizer.ggml.add_unknown_token
false
false
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
2
2
tokenizer.ggml.merges
[Ġ t, Ġ a, i n, h e, Ġ Ġ, ...]
[Ġ t, Ġ a, i n, h e, Ġ Ġ, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
2
2
tokenizer.ggml.pre
default
default
tokenizer.ggml.scores
[0, 1, 2, 3, 4, ...]
[0, 1, 2, 3, 4, ...]
tokenizer.ggml.token_type
[3, 3, 3, 3, 3, ...]
[3, 3, 3, 3, 3, ...]
tokenizer.ggml.tokens
[<|endoftext|>, <|im_start|>, <|im_end|>, <repo_name>, <reponame>, ...]
[<|endoftext|>, <|im_start|>, <|im_end|>, <repo_name>, <reponame>, ...]
tokenizer.ggml.unknown_token_id
0
0
Tensor
Name
Type
Shape
token_embd.weight
Q8_0
Q8_0
[576, 49152]
blk.0
blk.0.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.0.attn_norm.weight
F32
F32
[576]
blk.0.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.0.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.0.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.0.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.0.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.0.ffn_norm.weight
F32
F32
[576]
blk.0.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.1
blk.1.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.1.attn_norm.weight
F32
F32
[576]
blk.1.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.1.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.1.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.1.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.1.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.1.ffn_norm.weight
F32
F32
[576]
blk.1.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.2
blk.2.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.2.attn_norm.weight
F32
F32
[576]
blk.2.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.2.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.2.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.2.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.2.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.2.ffn_norm.weight
F32
F32
[576]
blk.2.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.3
blk.3.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.3.attn_norm.weight
F32
F32
[576]
blk.3.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.3.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.3.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.3.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.3.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.3.ffn_norm.weight
F32
F32
[576]
blk.3.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.4
blk.4.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.4.attn_norm.weight
F32
F32
[576]
blk.4.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.4.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.4.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.4.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.4.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.4.ffn_norm.weight
F32
F32
[576]
blk.4.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.5
blk.5.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.5.attn_norm.weight
F32
F32
[576]
blk.5.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.5.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.5.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.5.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.5.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.5.ffn_norm.weight
F32
F32
[576]
blk.5.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.6
blk.6.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.6.attn_norm.weight
F32
F32
[576]
blk.6.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.6.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.6.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.6.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.6.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.6.ffn_norm.weight
F32
F32
[576]
blk.6.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.7
blk.7.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.7.attn_norm.weight
F32
F32
[576]
blk.7.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.7.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.7.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.7.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.7.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.7.ffn_norm.weight
F32
F32
[576]
blk.7.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.8
blk.8.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.8.attn_norm.weight
F32
F32
[576]
blk.8.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.8.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.8.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.8.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.8.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.8.ffn_norm.weight
F32
F32
[576]
blk.8.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.9
blk.9.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.9.attn_norm.weight
F32
F32
[576]
blk.9.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.9.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.9.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.9.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.9.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.9.ffn_norm.weight
F32
F32
[576]
blk.9.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.10
blk.10.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.10.attn_norm.weight
F32
F32
[576]
blk.10.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.10.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.10.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.10.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.10.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.10.ffn_norm.weight
F32
F32
[576]
blk.10.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.11
blk.11.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.11.attn_norm.weight
F32
F32
[576]
blk.11.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.11.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.11.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.11.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.11.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.11.ffn_norm.weight
F32
F32
[576]
blk.11.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.12
blk.12.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.12.attn_norm.weight
F32
F32
[576]
blk.12.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.12.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.12.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.12.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.12.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.12.ffn_norm.weight
F32
F32
[576]
blk.12.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.13
blk.13.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.13.attn_norm.weight
F32
F32
[576]
blk.13.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.13.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.13.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.13.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.13.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.13.ffn_norm.weight
F32
F32
[576]
blk.13.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.14
blk.14.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.14.attn_norm.weight
F32
F32
[576]
blk.14.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.14.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.14.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.14.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.14.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.14.ffn_norm.weight
F32
F32
[576]
blk.14.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.15
blk.15.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.15.attn_norm.weight
F32
F32
[576]
blk.15.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.15.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.15.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.15.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.15.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.15.ffn_norm.weight
F32
F32
[576]
blk.15.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.16
blk.16.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.16.attn_norm.weight
F32
F32
[576]
blk.16.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.16.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.16.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.16.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.16.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.16.ffn_norm.weight
F32
F32
[576]
blk.16.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.17
blk.17.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.17.attn_norm.weight
F32
F32
[576]
blk.17.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.17.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.17.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.17.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.17.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.17.ffn_norm.weight
F32
F32
[576]
blk.17.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.18
blk.18.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.18.attn_norm.weight
F32
F32
[576]
blk.18.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.18.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.18.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.18.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.18.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.18.ffn_norm.weight
F32
F32
[576]
blk.18.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.19
blk.19.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.19.attn_norm.weight
F32
F32
[576]
blk.19.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.19.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.19.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.19.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.19.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.19.ffn_norm.weight
F32
F32
[576]
blk.19.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.20
blk.20.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.20.attn_norm.weight
F32
F32
[576]
blk.20.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.20.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.20.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.20.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.20.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.20.ffn_norm.weight
F32
F32
[576]
blk.20.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.21
blk.21.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.21.attn_norm.weight
F32
F32
[576]
blk.21.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.21.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.21.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.21.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.21.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.21.ffn_norm.weight
F32
F32
[576]
blk.21.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.22
blk.22.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.22.attn_norm.weight
F32
F32
[576]
blk.22.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.22.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.22.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.22.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.22.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.22.ffn_norm.weight
F32
F32
[576]
blk.22.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.23
blk.23.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.23.attn_norm.weight
F32
F32
[576]
blk.23.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.23.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.23.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.23.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.23.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.23.ffn_norm.weight
F32
F32
[576]
blk.23.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.24
blk.24.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.24.attn_norm.weight
F32
F32
[576]
blk.24.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.24.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.24.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.24.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.24.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.24.ffn_norm.weight
F32
F32
[576]
blk.24.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.25
blk.25.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.25.attn_norm.weight
F32
F32
[576]
blk.25.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.25.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.25.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.25.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.25.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.25.ffn_norm.weight
F32
F32
[576]
blk.25.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.26
blk.26.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.26.attn_norm.weight
F32
F32
[576]
blk.26.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.26.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.26.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.26.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.26.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.26.ffn_norm.weight
F32
F32
[576]
blk.26.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.27
blk.27.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.27.attn_norm.weight
F32
F32
[576]
blk.27.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.27.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.27.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.27.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.27.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.27.ffn_norm.weight
F32
F32
[576]
blk.27.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.28
blk.28.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.28.attn_norm.weight
F32
F32
[576]
blk.28.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.28.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.28.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.28.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.28.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.28.ffn_norm.weight
F32
F32
[576]
blk.28.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.29
blk.29.attn_k.weight
IQ4_NL
IQ4_NL
[576, 192]
blk.29.attn_norm.weight
F32
F32
[576]
blk.29.attn_output.weight
Q5_1
Q5_1
[576, 576]
blk.29.attn_q.weight
IQ4_NL
IQ4_NL
[576, 576]
blk.29.attn_v.weight
Q5_1
Q5_1
[576, 192]
blk.29.ffn_down.weight
Q5_K
Q5_K
[1536, 576]
blk.29.ffn_gate.weight
IQ4_NL
IQ4_NL
[576, 1536]
blk.29.ffn_norm.weight
F32
F32
[576]
blk.29.ffn_up.weight
IQ4_NL
IQ4_NL
[576, 1536]
output_norm.weight
F32
F32
[576]