Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
exaone-deep
:2.4b
149.6K
Downloads
Updated
5 months ago
EXAONE Deep exhibits superior capabilities in various reasoning tasks including math and coding benchmarks, ranging from 2.4B to 32B parameters developed and released by LG AI Research.
EXAONE Deep exhibits superior capabilities in various reasoning tasks including math and coding benchmarks, ranging from 2.4B to 32B parameters developed and released by LG AI Research.
Cancel
2.4b
7.8b
32b
exaone-deep:2.4b
...
/
model
1eefef7fedff · 1.6GB
Metadata
general.architecture
exaone
exaone
general.file_type
Q4_K_M
Q4_K_M
exaone.attention.head_count
32
32
exaone.attention.head_count_kv
8
8
exaone.attention.layer_norm_rms_epsilon
1e-05
1e-05
exaone.block_count
30
30
exaone.context_length
32768
32768
exaone.embedding_length
2560
2560
exaone.feed_forward_length
7168
7168
exaone.rope.dimension_count
80
80
exaone.rope.freq_base
1e+06
1e+06
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
361
361
tokenizer.ggml.merges
[t h, Ġ a, Ġ í, i n, Ġ th, ...]
[t h, Ġ a, Ġ í, i n, Ġ th, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
0
0
tokenizer.ggml.pre
exaone
exaone
tokenizer.ggml.token_type
[3, 3, 3, 3, 4, ...]
[3, 3, 3, 3, 4, ...]
tokenizer.ggml.tokens
[[PAD], [BOS], [EOS], [UNK], , ...]
[[PAD], [BOS], [EOS], [UNK], , ...]
tokenizer.ggml.unknown_token_id
3
3
Tensor
Name
Type
Shape
token_embd.weight
Q4_K
Q4_K
[2560, 102400]
blk.0
blk.0.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.0.attn_norm.weight
F32
F32
[2560]
blk.0.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.0.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.0.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.0.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.0.ffn_norm.weight
F32
F32
[2560]
blk.0.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.1
blk.1.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.1.attn_norm.weight
F32
F32
[2560]
blk.1.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.1.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.1.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.1.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.1.ffn_norm.weight
F32
F32
[2560]
blk.1.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.2
blk.2.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.2.attn_norm.weight
F32
F32
[2560]
blk.2.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.2.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.2.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.2.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.2.ffn_norm.weight
F32
F32
[2560]
blk.2.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.3
blk.3.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.3.attn_norm.weight
F32
F32
[2560]
blk.3.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.3.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.3.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.3.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.3.ffn_norm.weight
F32
F32
[2560]
blk.3.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.4
blk.4.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.4.attn_norm.weight
F32
F32
[2560]
blk.4.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.4.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.4.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.4.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.4.ffn_norm.weight
F32
F32
[2560]
blk.4.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.5
blk.5.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.5.attn_norm.weight
F32
F32
[2560]
blk.5.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.5.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.5.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.5.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.5.ffn_norm.weight
F32
F32
[2560]
blk.5.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.6
blk.6.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.6.attn_norm.weight
F32
F32
[2560]
blk.6.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.6.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.6.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.6.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.6.ffn_norm.weight
F32
F32
[2560]
blk.6.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.7
blk.7.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.7.attn_norm.weight
F32
F32
[2560]
blk.7.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.7.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.7.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.7.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.7.ffn_norm.weight
F32
F32
[2560]
blk.7.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.8
blk.8.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.8.attn_norm.weight
F32
F32
[2560]
blk.8.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.8.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.8.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.8.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.8.ffn_norm.weight
F32
F32
[2560]
blk.8.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.9
blk.9.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.9.attn_norm.weight
F32
F32
[2560]
blk.9.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.9.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.9.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.9.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.9.ffn_norm.weight
F32
F32
[2560]
blk.9.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.10
blk.10.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.10.attn_norm.weight
F32
F32
[2560]
blk.10.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.10.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.10.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.10.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.10.ffn_norm.weight
F32
F32
[2560]
blk.10.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.11
blk.11.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.11.attn_norm.weight
F32
F32
[2560]
blk.11.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.11.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.11.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.11.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.11.ffn_norm.weight
F32
F32
[2560]
blk.11.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.12
blk.12.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.12.attn_norm.weight
F32
F32
[2560]
blk.12.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.12.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.12.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.12.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.12.ffn_norm.weight
F32
F32
[2560]
blk.12.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.13
blk.13.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.13.attn_norm.weight
F32
F32
[2560]
blk.13.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.13.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.13.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.13.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.13.ffn_norm.weight
F32
F32
[2560]
blk.13.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.14
blk.14.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.14.attn_norm.weight
F32
F32
[2560]
blk.14.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.14.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.14.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.14.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.14.ffn_norm.weight
F32
F32
[2560]
blk.14.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.15
blk.15.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.15.attn_norm.weight
F32
F32
[2560]
blk.15.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.15.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.15.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.15.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.15.ffn_norm.weight
F32
F32
[2560]
blk.15.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.16
blk.16.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.16.attn_norm.weight
F32
F32
[2560]
blk.16.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.16.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.16.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.16.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.16.ffn_norm.weight
F32
F32
[2560]
blk.16.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.17
blk.17.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.17.attn_norm.weight
F32
F32
[2560]
blk.17.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.17.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.17.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.17.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.17.ffn_norm.weight
F32
F32
[2560]
blk.17.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.18
blk.18.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.18.attn_norm.weight
F32
F32
[2560]
blk.18.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.18.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.18.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.18.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.18.ffn_norm.weight
F32
F32
[2560]
blk.18.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.19
blk.19.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.19.attn_norm.weight
F32
F32
[2560]
blk.19.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.19.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.19.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.19.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.19.ffn_norm.weight
F32
F32
[2560]
blk.19.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.20
blk.20.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.20.attn_norm.weight
F32
F32
[2560]
blk.20.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.20.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.20.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.20.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.20.ffn_norm.weight
F32
F32
[2560]
blk.20.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.21
blk.21.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.21.attn_norm.weight
F32
F32
[2560]
blk.21.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.21.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.21.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.21.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.21.ffn_norm.weight
F32
F32
[2560]
blk.21.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.22
blk.22.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.22.attn_norm.weight
F32
F32
[2560]
blk.22.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.22.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.22.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.22.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.22.ffn_norm.weight
F32
F32
[2560]
blk.22.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.23
blk.23.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.23.attn_norm.weight
F32
F32
[2560]
blk.23.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.23.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.23.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.23.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.23.ffn_norm.weight
F32
F32
[2560]
blk.23.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.24
blk.24.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.24.attn_norm.weight
F32
F32
[2560]
blk.24.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.24.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.24.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.24.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.24.ffn_norm.weight
F32
F32
[2560]
blk.24.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.25
blk.25.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.25.attn_norm.weight
F32
F32
[2560]
blk.25.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.25.attn_v.weight
Q4_K
Q4_K
[2560, 640]
blk.25.ffn_down.weight
Q4_K
Q4_K
[7168, 2560]
blk.25.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.25.ffn_norm.weight
F32
F32
[2560]
blk.25.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.26
blk.26.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.26.attn_norm.weight
F32
F32
[2560]
blk.26.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.26.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.26.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.26.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.26.ffn_norm.weight
F32
F32
[2560]
blk.26.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.27
blk.27.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.27.attn_norm.weight
F32
F32
[2560]
blk.27.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.27.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.27.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.27.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.27.ffn_norm.weight
F32
F32
[2560]
blk.27.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.28
blk.28.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.28.attn_norm.weight
F32
F32
[2560]
blk.28.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.28.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.28.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.28.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.28.ffn_norm.weight
F32
F32
[2560]
blk.28.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
blk.29
blk.29.attn_k.weight
Q4_K
Q4_K
[2560, 640]
blk.29.attn_norm.weight
F32
F32
[2560]
blk.29.attn_output.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.attn_q.weight
Q4_K
Q4_K
[2560, 2560]
blk.29.attn_v.weight
Q6_K
Q6_K
[2560, 640]
blk.29.ffn_down.weight
Q6_K
Q6_K
[7168, 2560]
blk.29.ffn_gate.weight
Q4_K
Q4_K
[2560, 7168]
blk.29.ffn_norm.weight
F32
F32
[2560]
blk.29.ffn_up.weight
Q4_K
Q4_K
[2560, 7168]
output.weight
Q6_K
Q6_K
[2560, 102400]
rope_freqs.weight
F32
F32
[40]
output_norm.weight
F32
F32
[2560]