Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
exaone-deep
:7.8b-fp16
149.8K
Downloads
Updated
5 months ago
EXAONE Deep exhibits superior capabilities in various reasoning tasks including math and coding benchmarks, ranging from 2.4B to 32B parameters developed and released by LG AI Research.
EXAONE Deep exhibits superior capabilities in various reasoning tasks including math and coding benchmarks, ranging from 2.4B to 32B parameters developed and released by LG AI Research.
Cancel
2.4b
7.8b
32b
exaone-deep:7.8b-fp16
...
/
model
421cb2b03dee · 16GB
Metadata
general.architecture
exaone
exaone
general.file_type
F16
F16
exaone.attention.head_count
32
32
exaone.attention.head_count_kv
8
8
exaone.attention.layer_norm_rms_epsilon
1e-05
1e-05
exaone.block_count
32
32
exaone.context_length
32768
32768
exaone.embedding_length
4096
4096
exaone.feed_forward_length
14336
14336
exaone.rope.dimension_count
128
128
exaone.rope.freq_base
1e+06
1e+06
tokenizer.ggml.bos_token_id
1
1
tokenizer.ggml.eos_token_id
361
361
tokenizer.ggml.merges
[t h, Ġ a, Ġ í, i n, Ġ th, ...]
[t h, Ġ a, Ġ í, i n, Ġ th, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
0
0
tokenizer.ggml.pre
exaone
exaone
tokenizer.ggml.token_type
[3, 3, 3, 3, 4, ...]
[3, 3, 3, 3, 4, ...]
tokenizer.ggml.tokens
[[PAD], [BOS], [EOS], [UNK], , ...]
[[PAD], [BOS], [EOS], [UNK], , ...]
tokenizer.ggml.unknown_token_id
3
3
Tensor
Name
Type
Shape
token_embd.weight
F16
F16
[4096, 102400]
blk.0
blk.0.attn_k.weight
F16
F16
[4096, 1024]
blk.0.attn_norm.weight
F32
F32
[4096]
blk.0.attn_output.weight
F16
F16
[4096, 4096]
blk.0.attn_q.weight
F16
F16
[4096, 4096]
blk.0.attn_v.weight
F16
F16
[4096, 1024]
blk.0.ffn_down.weight
F16
F16
[14336, 4096]
blk.0.ffn_gate.weight
F16
F16
[4096, 14336]
blk.0.ffn_norm.weight
F32
F32
[4096]
blk.0.ffn_up.weight
F16
F16
[4096, 14336]
blk.1
blk.1.attn_k.weight
F16
F16
[4096, 1024]
blk.1.attn_norm.weight
F32
F32
[4096]
blk.1.attn_output.weight
F16
F16
[4096, 4096]
blk.1.attn_q.weight
F16
F16
[4096, 4096]
blk.1.attn_v.weight
F16
F16
[4096, 1024]
blk.1.ffn_down.weight
F16
F16
[14336, 4096]
blk.1.ffn_gate.weight
F16
F16
[4096, 14336]
blk.1.ffn_norm.weight
F32
F32
[4096]
blk.1.ffn_up.weight
F16
F16
[4096, 14336]
blk.2
blk.2.attn_k.weight
F16
F16
[4096, 1024]
blk.2.attn_norm.weight
F32
F32
[4096]
blk.2.attn_output.weight
F16
F16
[4096, 4096]
blk.2.attn_q.weight
F16
F16
[4096, 4096]
blk.2.attn_v.weight
F16
F16
[4096, 1024]
blk.2.ffn_down.weight
F16
F16
[14336, 4096]
blk.2.ffn_gate.weight
F16
F16
[4096, 14336]
blk.2.ffn_norm.weight
F32
F32
[4096]
blk.2.ffn_up.weight
F16
F16
[4096, 14336]
blk.3
blk.3.attn_k.weight
F16
F16
[4096, 1024]
blk.3.attn_norm.weight
F32
F32
[4096]
blk.3.attn_output.weight
F16
F16
[4096, 4096]
blk.3.attn_q.weight
F16
F16
[4096, 4096]
blk.3.attn_v.weight
F16
F16
[4096, 1024]
blk.3.ffn_down.weight
F16
F16
[14336, 4096]
blk.3.ffn_gate.weight
F16
F16
[4096, 14336]
blk.3.ffn_norm.weight
F32
F32
[4096]
blk.3.ffn_up.weight
F16
F16
[4096, 14336]
blk.4
blk.4.attn_k.weight
F16
F16
[4096, 1024]
blk.4.attn_norm.weight
F32
F32
[4096]
blk.4.attn_output.weight
F16
F16
[4096, 4096]
blk.4.attn_q.weight
F16
F16
[4096, 4096]
blk.4.attn_v.weight
F16
F16
[4096, 1024]
blk.4.ffn_down.weight
F16
F16
[14336, 4096]
blk.4.ffn_gate.weight
F16
F16
[4096, 14336]
blk.4.ffn_norm.weight
F32
F32
[4096]
blk.4.ffn_up.weight
F16
F16
[4096, 14336]
blk.5
blk.5.attn_k.weight
F16
F16
[4096, 1024]
blk.5.attn_norm.weight
F32
F32
[4096]
blk.5.attn_output.weight
F16
F16
[4096, 4096]
blk.5.attn_q.weight
F16
F16
[4096, 4096]
blk.5.attn_v.weight
F16
F16
[4096, 1024]
blk.5.ffn_down.weight
F16
F16
[14336, 4096]
blk.5.ffn_gate.weight
F16
F16
[4096, 14336]
blk.5.ffn_norm.weight
F32
F32
[4096]
blk.5.ffn_up.weight
F16
F16
[4096, 14336]
blk.6
blk.6.attn_k.weight
F16
F16
[4096, 1024]
blk.6.attn_norm.weight
F32
F32
[4096]
blk.6.attn_output.weight
F16
F16
[4096, 4096]
blk.6.attn_q.weight
F16
F16
[4096, 4096]
blk.6.attn_v.weight
F16
F16
[4096, 1024]
blk.6.ffn_down.weight
F16
F16
[14336, 4096]
blk.6.ffn_gate.weight
F16
F16
[4096, 14336]
blk.6.ffn_norm.weight
F32
F32
[4096]
blk.6.ffn_up.weight
F16
F16
[4096, 14336]
blk.7
blk.7.attn_k.weight
F16
F16
[4096, 1024]
blk.7.attn_norm.weight
F32
F32
[4096]
blk.7.attn_output.weight
F16
F16
[4096, 4096]
blk.7.attn_q.weight
F16
F16
[4096, 4096]
blk.7.attn_v.weight
F16
F16
[4096, 1024]
blk.7.ffn_down.weight
F16
F16
[14336, 4096]
blk.7.ffn_gate.weight
F16
F16
[4096, 14336]
blk.7.ffn_norm.weight
F32
F32
[4096]
blk.7.ffn_up.weight
F16
F16
[4096, 14336]
blk.8
blk.8.attn_k.weight
F16
F16
[4096, 1024]
blk.8.attn_norm.weight
F32
F32
[4096]
blk.8.attn_output.weight
F16
F16
[4096, 4096]
blk.8.attn_q.weight
F16
F16
[4096, 4096]
blk.8.attn_v.weight
F16
F16
[4096, 1024]
blk.8.ffn_down.weight
F16
F16
[14336, 4096]
blk.8.ffn_gate.weight
F16
F16
[4096, 14336]
blk.8.ffn_norm.weight
F32
F32
[4096]
blk.8.ffn_up.weight
F16
F16
[4096, 14336]
blk.9
blk.9.attn_k.weight
F16
F16
[4096, 1024]
blk.9.attn_norm.weight
F32
F32
[4096]
blk.9.attn_output.weight
F16
F16
[4096, 4096]
blk.9.attn_q.weight
F16
F16
[4096, 4096]
blk.9.attn_v.weight
F16
F16
[4096, 1024]
blk.9.ffn_down.weight
F16
F16
[14336, 4096]
blk.9.ffn_gate.weight
F16
F16
[4096, 14336]
blk.9.ffn_norm.weight
F32
F32
[4096]
blk.9.ffn_up.weight
F16
F16
[4096, 14336]
blk.10
blk.10.attn_k.weight
F16
F16
[4096, 1024]
blk.10.attn_norm.weight
F32
F32
[4096]
blk.10.attn_output.weight
F16
F16
[4096, 4096]
blk.10.attn_q.weight
F16
F16
[4096, 4096]
blk.10.attn_v.weight
F16
F16
[4096, 1024]
blk.10.ffn_down.weight
F16
F16
[14336, 4096]
blk.10.ffn_gate.weight
F16
F16
[4096, 14336]
blk.10.ffn_norm.weight
F32
F32
[4096]
blk.10.ffn_up.weight
F16
F16
[4096, 14336]
blk.11
blk.11.attn_k.weight
F16
F16
[4096, 1024]
blk.11.attn_norm.weight
F32
F32
[4096]
blk.11.attn_output.weight
F16
F16
[4096, 4096]
blk.11.attn_q.weight
F16
F16
[4096, 4096]
blk.11.attn_v.weight
F16
F16
[4096, 1024]
blk.11.ffn_down.weight
F16
F16
[14336, 4096]
blk.11.ffn_gate.weight
F16
F16
[4096, 14336]
blk.11.ffn_norm.weight
F32
F32
[4096]
blk.11.ffn_up.weight
F16
F16
[4096, 14336]
blk.12
blk.12.attn_k.weight
F16
F16
[4096, 1024]
blk.12.attn_norm.weight
F32
F32
[4096]
blk.12.attn_output.weight
F16
F16
[4096, 4096]
blk.12.attn_q.weight
F16
F16
[4096, 4096]
blk.12.attn_v.weight
F16
F16
[4096, 1024]
blk.12.ffn_down.weight
F16
F16
[14336, 4096]
blk.12.ffn_gate.weight
F16
F16
[4096, 14336]
blk.12.ffn_norm.weight
F32
F32
[4096]
blk.12.ffn_up.weight
F16
F16
[4096, 14336]
blk.13
blk.13.attn_k.weight
F16
F16
[4096, 1024]
blk.13.attn_norm.weight
F32
F32
[4096]
blk.13.attn_output.weight
F16
F16
[4096, 4096]
blk.13.attn_q.weight
F16
F16
[4096, 4096]
blk.13.attn_v.weight
F16
F16
[4096, 1024]
blk.13.ffn_down.weight
F16
F16
[14336, 4096]
blk.13.ffn_gate.weight
F16
F16
[4096, 14336]
blk.13.ffn_norm.weight
F32
F32
[4096]
blk.13.ffn_up.weight
F16
F16
[4096, 14336]
blk.14
blk.14.attn_k.weight
F16
F16
[4096, 1024]
blk.14.attn_norm.weight
F32
F32
[4096]
blk.14.attn_output.weight
F16
F16
[4096, 4096]
blk.14.attn_q.weight
F16
F16
[4096, 4096]
blk.14.attn_v.weight
F16
F16
[4096, 1024]
blk.14.ffn_down.weight
F16
F16
[14336, 4096]
blk.14.ffn_gate.weight
F16
F16
[4096, 14336]
blk.14.ffn_norm.weight
F32
F32
[4096]
blk.14.ffn_up.weight
F16
F16
[4096, 14336]
blk.15
blk.15.attn_k.weight
F16
F16
[4096, 1024]
blk.15.attn_norm.weight
F32
F32
[4096]
blk.15.attn_output.weight
F16
F16
[4096, 4096]
blk.15.attn_q.weight
F16
F16
[4096, 4096]
blk.15.attn_v.weight
F16
F16
[4096, 1024]
blk.15.ffn_down.weight
F16
F16
[14336, 4096]
blk.15.ffn_gate.weight
F16
F16
[4096, 14336]
blk.15.ffn_norm.weight
F32
F32
[4096]
blk.15.ffn_up.weight
F16
F16
[4096, 14336]
blk.16
blk.16.attn_k.weight
F16
F16
[4096, 1024]
blk.16.attn_norm.weight
F32
F32
[4096]
blk.16.attn_output.weight
F16
F16
[4096, 4096]
blk.16.attn_q.weight
F16
F16
[4096, 4096]
blk.16.attn_v.weight
F16
F16
[4096, 1024]
blk.16.ffn_down.weight
F16
F16
[14336, 4096]
blk.16.ffn_gate.weight
F16
F16
[4096, 14336]
blk.16.ffn_norm.weight
F32
F32
[4096]
blk.16.ffn_up.weight
F16
F16
[4096, 14336]
blk.17
blk.17.attn_k.weight
F16
F16
[4096, 1024]
blk.17.attn_norm.weight
F32
F32
[4096]
blk.17.attn_output.weight
F16
F16
[4096, 4096]
blk.17.attn_q.weight
F16
F16
[4096, 4096]
blk.17.attn_v.weight
F16
F16
[4096, 1024]
blk.17.ffn_down.weight
F16
F16
[14336, 4096]
blk.17.ffn_gate.weight
F16
F16
[4096, 14336]
blk.17.ffn_norm.weight
F32
F32
[4096]
blk.17.ffn_up.weight
F16
F16
[4096, 14336]
blk.18
blk.18.attn_k.weight
F16
F16
[4096, 1024]
blk.18.attn_norm.weight
F32
F32
[4096]
blk.18.attn_output.weight
F16
F16
[4096, 4096]
blk.18.attn_q.weight
F16
F16
[4096, 4096]
blk.18.attn_v.weight
F16
F16
[4096, 1024]
blk.18.ffn_down.weight
F16
F16
[14336, 4096]
blk.18.ffn_gate.weight
F16
F16
[4096, 14336]
blk.18.ffn_norm.weight
F32
F32
[4096]
blk.18.ffn_up.weight
F16
F16
[4096, 14336]
blk.19
blk.19.attn_k.weight
F16
F16
[4096, 1024]
blk.19.attn_norm.weight
F32
F32
[4096]
blk.19.attn_output.weight
F16
F16
[4096, 4096]
blk.19.attn_q.weight
F16
F16
[4096, 4096]
blk.19.attn_v.weight
F16
F16
[4096, 1024]
blk.19.ffn_down.weight
F16
F16
[14336, 4096]
blk.19.ffn_gate.weight
F16
F16
[4096, 14336]
blk.19.ffn_norm.weight
F32
F32
[4096]
blk.19.ffn_up.weight
F16
F16
[4096, 14336]
blk.20
blk.20.attn_k.weight
F16
F16
[4096, 1024]
blk.20.attn_norm.weight
F32
F32
[4096]
blk.20.attn_output.weight
F16
F16
[4096, 4096]
blk.20.attn_q.weight
F16
F16
[4096, 4096]
blk.20.attn_v.weight
F16
F16
[4096, 1024]
blk.20.ffn_down.weight
F16
F16
[14336, 4096]
blk.20.ffn_gate.weight
F16
F16
[4096, 14336]
blk.20.ffn_norm.weight
F32
F32
[4096]
blk.20.ffn_up.weight
F16
F16
[4096, 14336]
blk.21
blk.21.attn_k.weight
F16
F16
[4096, 1024]
blk.21.attn_norm.weight
F32
F32
[4096]
blk.21.attn_output.weight
F16
F16
[4096, 4096]
blk.21.attn_q.weight
F16
F16
[4096, 4096]
blk.21.attn_v.weight
F16
F16
[4096, 1024]
blk.21.ffn_down.weight
F16
F16
[14336, 4096]
blk.21.ffn_gate.weight
F16
F16
[4096, 14336]
blk.21.ffn_norm.weight
F32
F32
[4096]
blk.21.ffn_up.weight
F16
F16
[4096, 14336]
blk.22
blk.22.attn_k.weight
F16
F16
[4096, 1024]
blk.22.attn_norm.weight
F32
F32
[4096]
blk.22.attn_output.weight
F16
F16
[4096, 4096]
blk.22.attn_q.weight
F16
F16
[4096, 4096]
blk.22.attn_v.weight
F16
F16
[4096, 1024]
blk.22.ffn_down.weight
F16
F16
[14336, 4096]
blk.22.ffn_gate.weight
F16
F16
[4096, 14336]
blk.22.ffn_norm.weight
F32
F32
[4096]
blk.22.ffn_up.weight
F16
F16
[4096, 14336]
blk.23
blk.23.attn_k.weight
F16
F16
[4096, 1024]
blk.23.attn_norm.weight
F32
F32
[4096]
blk.23.attn_output.weight
F16
F16
[4096, 4096]
blk.23.attn_q.weight
F16
F16
[4096, 4096]
blk.23.attn_v.weight
F16
F16
[4096, 1024]
blk.23.ffn_down.weight
F16
F16
[14336, 4096]
blk.23.ffn_gate.weight
F16
F16
[4096, 14336]
blk.23.ffn_norm.weight
F32
F32
[4096]
blk.23.ffn_up.weight
F16
F16
[4096, 14336]
blk.24
blk.24.attn_k.weight
F16
F16
[4096, 1024]
blk.24.attn_norm.weight
F32
F32
[4096]
blk.24.attn_output.weight
F16
F16
[4096, 4096]
blk.24.attn_q.weight
F16
F16
[4096, 4096]
blk.24.attn_v.weight
F16
F16
[4096, 1024]
blk.24.ffn_down.weight
F16
F16
[14336, 4096]
blk.24.ffn_gate.weight
F16
F16
[4096, 14336]
blk.24.ffn_norm.weight
F32
F32
[4096]
blk.24.ffn_up.weight
F16
F16
[4096, 14336]
blk.25
blk.25.attn_k.weight
F16
F16
[4096, 1024]
blk.25.attn_norm.weight
F32
F32
[4096]
blk.25.attn_output.weight
F16
F16
[4096, 4096]
blk.25.attn_q.weight
F16
F16
[4096, 4096]
blk.25.attn_v.weight
F16
F16
[4096, 1024]
blk.25.ffn_down.weight
F16
F16
[14336, 4096]
blk.25.ffn_gate.weight
F16
F16
[4096, 14336]
blk.25.ffn_norm.weight
F32
F32
[4096]
blk.25.ffn_up.weight
F16
F16
[4096, 14336]
blk.26
blk.26.attn_k.weight
F16
F16
[4096, 1024]
blk.26.attn_norm.weight
F32
F32
[4096]
blk.26.attn_output.weight
F16
F16
[4096, 4096]
blk.26.attn_q.weight
F16
F16
[4096, 4096]
blk.26.attn_v.weight
F16
F16
[4096, 1024]
blk.26.ffn_down.weight
F16
F16
[14336, 4096]
blk.26.ffn_gate.weight
F16
F16
[4096, 14336]
blk.26.ffn_norm.weight
F32
F32
[4096]
blk.26.ffn_up.weight
F16
F16
[4096, 14336]
blk.27
blk.27.attn_k.weight
F16
F16
[4096, 1024]
blk.27.attn_norm.weight
F32
F32
[4096]
blk.27.attn_output.weight
F16
F16
[4096, 4096]
blk.27.attn_q.weight
F16
F16
[4096, 4096]
blk.27.attn_v.weight
F16
F16
[4096, 1024]
blk.27.ffn_down.weight
F16
F16
[14336, 4096]
blk.27.ffn_gate.weight
F16
F16
[4096, 14336]
blk.27.ffn_norm.weight
F32
F32
[4096]
blk.27.ffn_up.weight
F16
F16
[4096, 14336]
blk.28
blk.28.attn_k.weight
F16
F16
[4096, 1024]
blk.28.attn_norm.weight
F32
F32
[4096]
blk.28.attn_output.weight
F16
F16
[4096, 4096]
blk.28.attn_q.weight
F16
F16
[4096, 4096]
blk.28.attn_v.weight
F16
F16
[4096, 1024]
blk.28.ffn_down.weight
F16
F16
[14336, 4096]
blk.28.ffn_gate.weight
F16
F16
[4096, 14336]
blk.28.ffn_norm.weight
F32
F32
[4096]
blk.28.ffn_up.weight
F16
F16
[4096, 14336]
blk.29
blk.29.attn_k.weight
F16
F16
[4096, 1024]
blk.29.attn_norm.weight
F32
F32
[4096]
blk.29.attn_output.weight
F16
F16
[4096, 4096]
blk.29.attn_q.weight
F16
F16
[4096, 4096]
blk.29.attn_v.weight
F16
F16
[4096, 1024]
blk.29.ffn_down.weight
F16
F16
[14336, 4096]
blk.29.ffn_gate.weight
F16
F16
[4096, 14336]
blk.29.ffn_norm.weight
F32
F32
[4096]
blk.29.ffn_up.weight
F16
F16
[4096, 14336]
blk.30
blk.30.attn_k.weight
F16
F16
[4096, 1024]
blk.30.attn_norm.weight
F32
F32
[4096]
blk.30.attn_output.weight
F16
F16
[4096, 4096]
blk.30.attn_q.weight
F16
F16
[4096, 4096]
blk.30.attn_v.weight
F16
F16
[4096, 1024]
blk.30.ffn_down.weight
F16
F16
[14336, 4096]
blk.30.ffn_gate.weight
F16
F16
[4096, 14336]
blk.30.ffn_norm.weight
F32
F32
[4096]
blk.30.ffn_up.weight
F16
F16
[4096, 14336]
blk.31
blk.31.attn_k.weight
F16
F16
[4096, 1024]
blk.31.attn_norm.weight
F32
F32
[4096]
blk.31.attn_output.weight
F16
F16
[4096, 4096]
blk.31.attn_q.weight
F16
F16
[4096, 4096]
blk.31.attn_v.weight
F16
F16
[4096, 1024]
blk.31.ffn_down.weight
F16
F16
[14336, 4096]
blk.31.ffn_gate.weight
F16
F16
[4096, 14336]
blk.31.ffn_norm.weight
F32
F32
[4096]
blk.31.ffn_up.weight
F16
F16
[4096, 14336]
output.weight
F16
F16
[4096, 102400]
rope_freqs.weight
F32
F32
[64]
output_norm.weight
F32
F32
[4096]