Models
GitHub
Discord
Docs
Cloud
Sign in
Download
Models
Download
GitHub
Discord
Docs
Cloud
Sign in
deepseek-ocr
:3b
106
Downloads
Updated
12 minutes ago
DeepSeek-OCR is a vision-language model that can perform token-efficient OCR.
DeepSeek-OCR is a vision-language model that can perform token-efficient OCR.
Cancel
vision
3b
deepseek-ocr:3b
...
/
model
3a18673ff291 · 6.7GB
Metadata
general.architecture
deepseekocr
deepseekocr
general.file_type
F16
F16
deepseekocr.attention.head_count
10
10
deepseekocr.attention.head_count_kv
10
10
deepseekocr.block_count
12
12
deepseekocr.context_length
8192
8192
deepseekocr.embedding_length
1280
1280
deepseekocr.expert_count
64
64
deepseekocr.expert_used_count
6
6
deepseekocr.feed_forward_length
6848
6848
deepseekocr.leading_dense_block_count
1
1
deepseekocr.sam.block_count
12
12
deepseekocr.sam.embedding_length
768
768
deepseekocr.sam.global_attention_indexes
[2, 5, 8, 11]
[2, 5, 8, 11]
deepseekocr.sam.head_count
12
12
deepseekocr.vision.block_count
24
24
deepseekocr.vision.embedding_length
1024
1024
deepseekocr.vision.head_count
16
16
deepseekocr.vision.image_size
224
224
deepseekocr.vision.patch_size
14
14
tokenizer.ggml.add_bos_token
true
true
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.add_padding_token
false
false
tokenizer.ggml.bos_token_id
0
0
tokenizer.ggml.eos_token_id
1
1
tokenizer.ggml.merges
[Ġ t, Ġ a, i n, Ġ Ġ, h e, ...]
[Ġ t, Ġ a, i n, Ġ Ġ, h e, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
2
2
tokenizer.ggml.pre
default
default
tokenizer.ggml.scores
[0, 1, 2, 3, 4, ...]
[0, 1, 2, 3, 4, ...]
tokenizer.ggml.token_type
[3, 3, 3, 1, 1, ...]
[3, 3, 3, 1, 1, ...]
tokenizer.ggml.tokens
[<|begin▁of▁sentence|>, <|end▁of▁sentence|>, <|▁pad▁|>, !, ", ...]
[<|begin▁of▁sentence|>, <|end▁of▁sentence|>, <|▁pad▁|>, !, ", ...]
Tensor
Name
Type
Shape
token_embd.weight
BF16
BF16
[1280, 129280]
blk.0
blk.0.attn_k.weight
BF16
BF16
[1280, 1280]
blk.0.attn_norm.weight
F32
F32
[1280]
blk.0.attn_output.weight
BF16
BF16
[1280, 1280]
blk.0.attn_q.weight
BF16
BF16
[1280, 1280]
blk.0.attn_v.weight
BF16
BF16
[1280, 1280]
blk.0.ffn_down.weight
BF16
BF16
[6848, 1280]
blk.0.ffn_gate.weight
BF16
BF16
[1280, 6848]
blk.0.ffn_norm.weight
F32
F32
[1280]
blk.0.ffn_up.weight
BF16
BF16
[1280, 6848]
blk.1
blk.1.attn_k.weight
BF16
BF16
[1280, 1280]
blk.1.attn_norm.weight
F32
F32
[1280]
blk.1.attn_output.weight
BF16
BF16
[1280, 1280]
blk.1.attn_q.weight
BF16
BF16
[1280, 1280]
blk.1.attn_v.weight
BF16
BF16
[1280, 1280]
blk.1.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.1.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.1.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.1.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.1.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.1.ffn_norm.weight
F32
F32
[1280]
blk.1.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.1.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.2
blk.2.attn_k.weight
BF16
BF16
[1280, 1280]
blk.2.attn_norm.weight
F32
F32
[1280]
blk.2.attn_output.weight
BF16
BF16
[1280, 1280]
blk.2.attn_q.weight
BF16
BF16
[1280, 1280]
blk.2.attn_v.weight
BF16
BF16
[1280, 1280]
blk.2.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.2.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.2.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.2.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.2.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.2.ffn_norm.weight
F32
F32
[1280]
blk.2.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.2.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.3
blk.3.attn_k.weight
BF16
BF16
[1280, 1280]
blk.3.attn_norm.weight
F32
F32
[1280]
blk.3.attn_output.weight
BF16
BF16
[1280, 1280]
blk.3.attn_q.weight
BF16
BF16
[1280, 1280]
blk.3.attn_v.weight
BF16
BF16
[1280, 1280]
blk.3.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.3.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.3.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.3.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.3.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.3.ffn_norm.weight
F32
F32
[1280]
blk.3.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.3.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.4
blk.4.attn_k.weight
BF16
BF16
[1280, 1280]
blk.4.attn_norm.weight
F32
F32
[1280]
blk.4.attn_output.weight
BF16
BF16
[1280, 1280]
blk.4.attn_q.weight
BF16
BF16
[1280, 1280]
blk.4.attn_v.weight
BF16
BF16
[1280, 1280]
blk.4.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.4.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.4.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.4.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.4.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.4.ffn_norm.weight
F32
F32
[1280]
blk.4.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.4.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.5
blk.5.attn_k.weight
BF16
BF16
[1280, 1280]
blk.5.attn_norm.weight
F32
F32
[1280]
blk.5.attn_output.weight
BF16
BF16
[1280, 1280]
blk.5.attn_q.weight
BF16
BF16
[1280, 1280]
blk.5.attn_v.weight
BF16
BF16
[1280, 1280]
blk.5.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.5.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.5.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.5.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.5.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.5.ffn_norm.weight
F32
F32
[1280]
blk.5.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.5.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.6
blk.6.attn_k.weight
BF16
BF16
[1280, 1280]
blk.6.attn_norm.weight
F32
F32
[1280]
blk.6.attn_output.weight
BF16
BF16
[1280, 1280]
blk.6.attn_q.weight
BF16
BF16
[1280, 1280]
blk.6.attn_v.weight
BF16
BF16
[1280, 1280]
blk.6.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.6.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.6.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.6.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.6.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.6.ffn_norm.weight
F32
F32
[1280]
blk.6.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.6.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.7
blk.7.attn_k.weight
BF16
BF16
[1280, 1280]
blk.7.attn_norm.weight
F32
F32
[1280]
blk.7.attn_output.weight
BF16
BF16
[1280, 1280]
blk.7.attn_q.weight
BF16
BF16
[1280, 1280]
blk.7.attn_v.weight
BF16
BF16
[1280, 1280]
blk.7.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.7.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.7.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.7.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.7.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.7.ffn_norm.weight
F32
F32
[1280]
blk.7.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.7.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.8
blk.8.attn_k.weight
BF16
BF16
[1280, 1280]
blk.8.attn_norm.weight
F32
F32
[1280]
blk.8.attn_output.weight
BF16
BF16
[1280, 1280]
blk.8.attn_q.weight
BF16
BF16
[1280, 1280]
blk.8.attn_v.weight
BF16
BF16
[1280, 1280]
blk.8.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.8.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.8.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.8.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.8.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.8.ffn_norm.weight
F32
F32
[1280]
blk.8.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.8.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.9
blk.9.attn_k.weight
BF16
BF16
[1280, 1280]
blk.9.attn_norm.weight
F32
F32
[1280]
blk.9.attn_output.weight
BF16
BF16
[1280, 1280]
blk.9.attn_q.weight
BF16
BF16
[1280, 1280]
blk.9.attn_v.weight
BF16
BF16
[1280, 1280]
blk.9.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.9.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.9.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.9.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.9.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.9.ffn_norm.weight
F32
F32
[1280]
blk.9.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.9.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.10
blk.10.attn_k.weight
BF16
BF16
[1280, 1280]
blk.10.attn_norm.weight
F32
F32
[1280]
blk.10.attn_output.weight
BF16
BF16
[1280, 1280]
blk.10.attn_q.weight
BF16
BF16
[1280, 1280]
blk.10.attn_v.weight
BF16
BF16
[1280, 1280]
blk.10.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.10.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.10.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.10.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.10.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.10.ffn_norm.weight
F32
F32
[1280]
blk.10.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.10.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
blk.11
blk.11.attn_k.weight
BF16
BF16
[1280, 1280]
blk.11.attn_norm.weight
F32
F32
[1280]
blk.11.attn_output.weight
BF16
BF16
[1280, 1280]
blk.11.attn_q.weight
BF16
BF16
[1280, 1280]
blk.11.attn_v.weight
BF16
BF16
[1280, 1280]
blk.11.ffn_down_exps.weight
BF16
BF16
[896, 1280, 64]
blk.11.ffn_down_shexp.weight
BF16
BF16
[1792, 1280]
blk.11.ffn_gate_exps.weight
BF16
BF16
[1280, 896, 64]
blk.11.ffn_gate_inp.weight
F32
F32
[1280, 64]
blk.11.ffn_gate_shexp.weight
BF16
BF16
[1280, 1792]
blk.11.ffn_norm.weight
F32
F32
[1280]
blk.11.ffn_up_exps.weight
BF16
BF16
[1280, 896, 64]
blk.11.ffn_up_shexp.weight
BF16
BF16
[1280, 1792]
mm.image_newline
F32
F32
[1280]
mm.layers.bias
F32
F32
[1280]
mm.layers.weight
BF16
BF16
[2048, 1280]
mm.view_seperator
F32
F32
[1280]
output.weight
BF16
BF16
[1280, 129280]
s.blk.0
s.blk.0.attn.proj.bias
F32
F32
[768]
s.blk.0.attn.proj.weight
F16
F16
[768, 768]
s.blk.0.attn.qkv.bias
F32
F32
[2304]
s.blk.0.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.0.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.0.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.0.mlp.lin1.bias
F32
F32
[3072]
s.blk.0.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.0.mlp.lin2.bias
F32
F32
[768]
s.blk.0.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.0.norm1.bias
F32
F32
[768]
s.blk.0.norm1.weight
F32
F32
[768]
s.blk.0.norm2.bias
F32
F32
[768]
s.blk.0.norm2.weight
F32
F32
[768]
s.blk.1
s.blk.1.attn.proj.bias
F32
F32
[768]
s.blk.1.attn.proj.weight
F16
F16
[768, 768]
s.blk.1.attn.qkv.bias
F32
F32
[2304]
s.blk.1.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.1.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.1.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.1.mlp.lin1.bias
F32
F32
[3072]
s.blk.1.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.1.mlp.lin2.bias
F32
F32
[768]
s.blk.1.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.1.norm1.bias
F32
F32
[768]
s.blk.1.norm1.weight
F32
F32
[768]
s.blk.1.norm2.bias
F32
F32
[768]
s.blk.1.norm2.weight
F32
F32
[768]
s.blk.2
s.blk.2.attn.proj.bias
F32
F32
[768]
s.blk.2.attn.proj.weight
F16
F16
[768, 768]
s.blk.2.attn.qkv.bias
F32
F32
[2304]
s.blk.2.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.2.attn.rel_pos_h
F32
F32
[64, 127]
s.blk.2.attn.rel_pos_w
F32
F32
[64, 127]
s.blk.2.mlp.lin1.bias
F32
F32
[3072]
s.blk.2.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.2.mlp.lin2.bias
F32
F32
[768]
s.blk.2.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.2.norm1.bias
F32
F32
[768]
s.blk.2.norm1.weight
F32
F32
[768]
s.blk.2.norm2.bias
F32
F32
[768]
s.blk.2.norm2.weight
F32
F32
[768]
s.blk.3
s.blk.3.attn.proj.bias
F32
F32
[768]
s.blk.3.attn.proj.weight
F16
F16
[768, 768]
s.blk.3.attn.qkv.bias
F32
F32
[2304]
s.blk.3.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.3.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.3.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.3.mlp.lin1.bias
F32
F32
[3072]
s.blk.3.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.3.mlp.lin2.bias
F32
F32
[768]
s.blk.3.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.3.norm1.bias
F32
F32
[768]
s.blk.3.norm1.weight
F32
F32
[768]
s.blk.3.norm2.bias
F32
F32
[768]
s.blk.3.norm2.weight
F32
F32
[768]
s.blk.4
s.blk.4.attn.proj.bias
F32
F32
[768]
s.blk.4.attn.proj.weight
F16
F16
[768, 768]
s.blk.4.attn.qkv.bias
F32
F32
[2304]
s.blk.4.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.4.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.4.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.4.mlp.lin1.bias
F32
F32
[3072]
s.blk.4.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.4.mlp.lin2.bias
F32
F32
[768]
s.blk.4.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.4.norm1.bias
F32
F32
[768]
s.blk.4.norm1.weight
F32
F32
[768]
s.blk.4.norm2.bias
F32
F32
[768]
s.blk.4.norm2.weight
F32
F32
[768]
s.blk.5
s.blk.5.attn.proj.bias
F32
F32
[768]
s.blk.5.attn.proj.weight
F16
F16
[768, 768]
s.blk.5.attn.qkv.bias
F32
F32
[2304]
s.blk.5.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.5.attn.rel_pos_h
F32
F32
[64, 127]
s.blk.5.attn.rel_pos_w
F32
F32
[64, 127]
s.blk.5.mlp.lin1.bias
F32
F32
[3072]
s.blk.5.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.5.mlp.lin2.bias
F32
F32
[768]
s.blk.5.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.5.norm1.bias
F32
F32
[768]
s.blk.5.norm1.weight
F32
F32
[768]
s.blk.5.norm2.bias
F32
F32
[768]
s.blk.5.norm2.weight
F32
F32
[768]
s.blk.6
s.blk.6.attn.proj.bias
F32
F32
[768]
s.blk.6.attn.proj.weight
F16
F16
[768, 768]
s.blk.6.attn.qkv.bias
F32
F32
[2304]
s.blk.6.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.6.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.6.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.6.mlp.lin1.bias
F32
F32
[3072]
s.blk.6.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.6.mlp.lin2.bias
F32
F32
[768]
s.blk.6.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.6.norm1.bias
F32
F32
[768]
s.blk.6.norm1.weight
F32
F32
[768]
s.blk.6.norm2.bias
F32
F32
[768]
s.blk.6.norm2.weight
F32
F32
[768]
s.blk.7
s.blk.7.attn.proj.bias
F32
F32
[768]
s.blk.7.attn.proj.weight
F16
F16
[768, 768]
s.blk.7.attn.qkv.bias
F32
F32
[2304]
s.blk.7.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.7.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.7.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.7.mlp.lin1.bias
F32
F32
[3072]
s.blk.7.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.7.mlp.lin2.bias
F32
F32
[768]
s.blk.7.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.7.norm1.bias
F32
F32
[768]
s.blk.7.norm1.weight
F32
F32
[768]
s.blk.7.norm2.bias
F32
F32
[768]
s.blk.7.norm2.weight
F32
F32
[768]
s.blk.8
s.blk.8.attn.proj.bias
F32
F32
[768]
s.blk.8.attn.proj.weight
F16
F16
[768, 768]
s.blk.8.attn.qkv.bias
F32
F32
[2304]
s.blk.8.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.8.attn.rel_pos_h
F32
F32
[64, 127]
s.blk.8.attn.rel_pos_w
F32
F32
[64, 127]
s.blk.8.mlp.lin1.bias
F32
F32
[3072]
s.blk.8.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.8.mlp.lin2.bias
F32
F32
[768]
s.blk.8.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.8.norm1.bias
F32
F32
[768]
s.blk.8.norm1.weight
F32
F32
[768]
s.blk.8.norm2.bias
F32
F32
[768]
s.blk.8.norm2.weight
F32
F32
[768]
s.blk.9
s.blk.9.attn.proj.bias
F32
F32
[768]
s.blk.9.attn.proj.weight
F16
F16
[768, 768]
s.blk.9.attn.qkv.bias
F32
F32
[2304]
s.blk.9.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.9.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.9.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.9.mlp.lin1.bias
F32
F32
[3072]
s.blk.9.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.9.mlp.lin2.bias
F32
F32
[768]
s.blk.9.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.9.norm1.bias
F32
F32
[768]
s.blk.9.norm1.weight
F32
F32
[768]
s.blk.9.norm2.bias
F32
F32
[768]
s.blk.9.norm2.weight
F32
F32
[768]
s.blk.10
s.blk.10.attn.proj.bias
F32
F32
[768]
s.blk.10.attn.proj.weight
F16
F16
[768, 768]
s.blk.10.attn.qkv.bias
F32
F32
[2304]
s.blk.10.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.10.attn.rel_pos_h
F32
F32
[64, 27]
s.blk.10.attn.rel_pos_w
F32
F32
[64, 27]
s.blk.10.mlp.lin1.bias
F32
F32
[3072]
s.blk.10.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.10.mlp.lin2.bias
F32
F32
[768]
s.blk.10.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.10.norm1.bias
F32
F32
[768]
s.blk.10.norm1.weight
F32
F32
[768]
s.blk.10.norm2.bias
F32
F32
[768]
s.blk.10.norm2.weight
F32
F32
[768]
s.blk.11
s.blk.11.attn.proj.bias
F32
F32
[768]
s.blk.11.attn.proj.weight
F16
F16
[768, 768]
s.blk.11.attn.qkv.bias
F32
F32
[2304]
s.blk.11.attn.qkv.weight
F16
F16
[768, 2304]
s.blk.11.attn.rel_pos_h
F32
F32
[64, 127]
s.blk.11.attn.rel_pos_w
F32
F32
[64, 127]
s.blk.11.mlp.lin1.bias
F32
F32
[3072]
s.blk.11.mlp.lin1.weight
F16
F16
[768, 3072]
s.blk.11.mlp.lin2.bias
F32
F32
[768]
s.blk.11.mlp.lin2.weight
F16
F16
[3072, 768]
s.blk.11.norm1.bias
F32
F32
[768]
s.blk.11.norm1.weight
F32
F32
[768]
s.blk.11.norm2.bias
F32
F32
[768]
s.blk.11.norm2.weight
F32
F32
[768]
s.neck.0.weight
F16
F16
[1, 1, 768, 256]
s.neck.1.bias
F32
F32
[256]
s.neck.1.weight
F32
F32
[256]
s.neck.2.weight
F16
F16
[3, 3, 256, 256]
s.neck.3.bias
F32
F32
[256]
s.neck.3.weight
F32
F32
[256]
s.net_2.weight
F16
F16
[3, 3, 256, 512]
s.net_3.weight
F16
F16
[3, 3, 512, 1024]
s.patch_embd.bias
F32
F32
[768]
s.patch_embd.weight
F16
F16
[16, 16, 3, 768]
s.position_embd
F32
F32
[768, 64, 64, 1]
v.blk.0
v.blk.0.layer_norm1.bias
F32
F32
[1024]
v.blk.0.layer_norm1.weight
F32
F32
[1024]
v.blk.0.layer_norm2.bias
F32
F32
[1024]
v.blk.0.layer_norm2.weight
F32
F32
[1024]
v.blk.0.mlp.fc1.bias
F32
F32
[4096]
v.blk.0.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.0.mlp.fc2.bias
F32
F32
[1024]
v.blk.0.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.0.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.0.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.0.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.0.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.1
v.blk.1.layer_norm1.bias
F32
F32
[1024]
v.blk.1.layer_norm1.weight
F32
F32
[1024]
v.blk.1.layer_norm2.bias
F32
F32
[1024]
v.blk.1.layer_norm2.weight
F32
F32
[1024]
v.blk.1.mlp.fc1.bias
F32
F32
[4096]
v.blk.1.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.1.mlp.fc2.bias
F32
F32
[1024]
v.blk.1.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.1.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.1.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.1.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.1.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.2
v.blk.2.layer_norm1.bias
F32
F32
[1024]
v.blk.2.layer_norm1.weight
F32
F32
[1024]
v.blk.2.layer_norm2.bias
F32
F32
[1024]
v.blk.2.layer_norm2.weight
F32
F32
[1024]
v.blk.2.mlp.fc1.bias
F32
F32
[4096]
v.blk.2.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.2.mlp.fc2.bias
F32
F32
[1024]
v.blk.2.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.2.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.2.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.2.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.2.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.3
v.blk.3.layer_norm1.bias
F32
F32
[1024]
v.blk.3.layer_norm1.weight
F32
F32
[1024]
v.blk.3.layer_norm2.bias
F32
F32
[1024]
v.blk.3.layer_norm2.weight
F32
F32
[1024]
v.blk.3.mlp.fc1.bias
F32
F32
[4096]
v.blk.3.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.3.mlp.fc2.bias
F32
F32
[1024]
v.blk.3.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.3.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.3.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.3.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.3.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.4
v.blk.4.layer_norm1.bias
F32
F32
[1024]
v.blk.4.layer_norm1.weight
F32
F32
[1024]
v.blk.4.layer_norm2.bias
F32
F32
[1024]
v.blk.4.layer_norm2.weight
F32
F32
[1024]
v.blk.4.mlp.fc1.bias
F32
F32
[4096]
v.blk.4.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.4.mlp.fc2.bias
F32
F32
[1024]
v.blk.4.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.4.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.4.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.4.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.4.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.5
v.blk.5.layer_norm1.bias
F32
F32
[1024]
v.blk.5.layer_norm1.weight
F32
F32
[1024]
v.blk.5.layer_norm2.bias
F32
F32
[1024]
v.blk.5.layer_norm2.weight
F32
F32
[1024]
v.blk.5.mlp.fc1.bias
F32
F32
[4096]
v.blk.5.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.5.mlp.fc2.bias
F32
F32
[1024]
v.blk.5.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.5.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.5.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.5.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.5.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.6
v.blk.6.layer_norm1.bias
F32
F32
[1024]
v.blk.6.layer_norm1.weight
F32
F32
[1024]
v.blk.6.layer_norm2.bias
F32
F32
[1024]
v.blk.6.layer_norm2.weight
F32
F32
[1024]
v.blk.6.mlp.fc1.bias
F32
F32
[4096]
v.blk.6.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.6.mlp.fc2.bias
F32
F32
[1024]
v.blk.6.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.6.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.6.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.6.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.6.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.7
v.blk.7.layer_norm1.bias
F32
F32
[1024]
v.blk.7.layer_norm1.weight
F32
F32
[1024]
v.blk.7.layer_norm2.bias
F32
F32
[1024]
v.blk.7.layer_norm2.weight
F32
F32
[1024]
v.blk.7.mlp.fc1.bias
F32
F32
[4096]
v.blk.7.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.7.mlp.fc2.bias
F32
F32
[1024]
v.blk.7.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.7.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.7.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.7.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.7.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.8
v.blk.8.layer_norm1.bias
F32
F32
[1024]
v.blk.8.layer_norm1.weight
F32
F32
[1024]
v.blk.8.layer_norm2.bias
F32
F32
[1024]
v.blk.8.layer_norm2.weight
F32
F32
[1024]
v.blk.8.mlp.fc1.bias
F32
F32
[4096]
v.blk.8.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.8.mlp.fc2.bias
F32
F32
[1024]
v.blk.8.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.8.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.8.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.8.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.8.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.9
v.blk.9.layer_norm1.bias
F32
F32
[1024]
v.blk.9.layer_norm1.weight
F32
F32
[1024]
v.blk.9.layer_norm2.bias
F32
F32
[1024]
v.blk.9.layer_norm2.weight
F32
F32
[1024]
v.blk.9.mlp.fc1.bias
F32
F32
[4096]
v.blk.9.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.9.mlp.fc2.bias
F32
F32
[1024]
v.blk.9.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.9.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.9.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.9.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.9.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.10
v.blk.10.layer_norm1.bias
F32
F32
[1024]
v.blk.10.layer_norm1.weight
F32
F32
[1024]
v.blk.10.layer_norm2.bias
F32
F32
[1024]
v.blk.10.layer_norm2.weight
F32
F32
[1024]
v.blk.10.mlp.fc1.bias
F32
F32
[4096]
v.blk.10.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.10.mlp.fc2.bias
F32
F32
[1024]
v.blk.10.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.10.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.10.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.10.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.10.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.11
v.blk.11.layer_norm1.bias
F32
F32
[1024]
v.blk.11.layer_norm1.weight
F32
F32
[1024]
v.blk.11.layer_norm2.bias
F32
F32
[1024]
v.blk.11.layer_norm2.weight
F32
F32
[1024]
v.blk.11.mlp.fc1.bias
F32
F32
[4096]
v.blk.11.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.11.mlp.fc2.bias
F32
F32
[1024]
v.blk.11.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.11.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.11.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.11.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.11.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.12
v.blk.12.layer_norm1.bias
F32
F32
[1024]
v.blk.12.layer_norm1.weight
F32
F32
[1024]
v.blk.12.layer_norm2.bias
F32
F32
[1024]
v.blk.12.layer_norm2.weight
F32
F32
[1024]
v.blk.12.mlp.fc1.bias
F32
F32
[4096]
v.blk.12.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.12.mlp.fc2.bias
F32
F32
[1024]
v.blk.12.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.12.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.12.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.12.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.12.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.13
v.blk.13.layer_norm1.bias
F32
F32
[1024]
v.blk.13.layer_norm1.weight
F32
F32
[1024]
v.blk.13.layer_norm2.bias
F32
F32
[1024]
v.blk.13.layer_norm2.weight
F32
F32
[1024]
v.blk.13.mlp.fc1.bias
F32
F32
[4096]
v.blk.13.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.13.mlp.fc2.bias
F32
F32
[1024]
v.blk.13.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.13.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.13.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.13.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.13.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.14
v.blk.14.layer_norm1.bias
F32
F32
[1024]
v.blk.14.layer_norm1.weight
F32
F32
[1024]
v.blk.14.layer_norm2.bias
F32
F32
[1024]
v.blk.14.layer_norm2.weight
F32
F32
[1024]
v.blk.14.mlp.fc1.bias
F32
F32
[4096]
v.blk.14.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.14.mlp.fc2.bias
F32
F32
[1024]
v.blk.14.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.14.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.14.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.14.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.14.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.15
v.blk.15.layer_norm1.bias
F32
F32
[1024]
v.blk.15.layer_norm1.weight
F32
F32
[1024]
v.blk.15.layer_norm2.bias
F32
F32
[1024]
v.blk.15.layer_norm2.weight
F32
F32
[1024]
v.blk.15.mlp.fc1.bias
F32
F32
[4096]
v.blk.15.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.15.mlp.fc2.bias
F32
F32
[1024]
v.blk.15.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.15.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.15.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.15.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.15.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.16
v.blk.16.layer_norm1.bias
F32
F32
[1024]
v.blk.16.layer_norm1.weight
F32
F32
[1024]
v.blk.16.layer_norm2.bias
F32
F32
[1024]
v.blk.16.layer_norm2.weight
F32
F32
[1024]
v.blk.16.mlp.fc1.bias
F32
F32
[4096]
v.blk.16.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.16.mlp.fc2.bias
F32
F32
[1024]
v.blk.16.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.16.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.16.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.16.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.16.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.17
v.blk.17.layer_norm1.bias
F32
F32
[1024]
v.blk.17.layer_norm1.weight
F32
F32
[1024]
v.blk.17.layer_norm2.bias
F32
F32
[1024]
v.blk.17.layer_norm2.weight
F32
F32
[1024]
v.blk.17.mlp.fc1.bias
F32
F32
[4096]
v.blk.17.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.17.mlp.fc2.bias
F32
F32
[1024]
v.blk.17.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.17.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.17.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.17.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.17.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.18
v.blk.18.layer_norm1.bias
F32
F32
[1024]
v.blk.18.layer_norm1.weight
F32
F32
[1024]
v.blk.18.layer_norm2.bias
F32
F32
[1024]
v.blk.18.layer_norm2.weight
F32
F32
[1024]
v.blk.18.mlp.fc1.bias
F32
F32
[4096]
v.blk.18.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.18.mlp.fc2.bias
F32
F32
[1024]
v.blk.18.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.18.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.18.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.18.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.18.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.19
v.blk.19.layer_norm1.bias
F32
F32
[1024]
v.blk.19.layer_norm1.weight
F32
F32
[1024]
v.blk.19.layer_norm2.bias
F32
F32
[1024]
v.blk.19.layer_norm2.weight
F32
F32
[1024]
v.blk.19.mlp.fc1.bias
F32
F32
[4096]
v.blk.19.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.19.mlp.fc2.bias
F32
F32
[1024]
v.blk.19.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.19.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.19.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.19.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.19.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.20
v.blk.20.layer_norm1.bias
F32
F32
[1024]
v.blk.20.layer_norm1.weight
F32
F32
[1024]
v.blk.20.layer_norm2.bias
F32
F32
[1024]
v.blk.20.layer_norm2.weight
F32
F32
[1024]
v.blk.20.mlp.fc1.bias
F32
F32
[4096]
v.blk.20.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.20.mlp.fc2.bias
F32
F32
[1024]
v.blk.20.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.20.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.20.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.20.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.20.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.21
v.blk.21.layer_norm1.bias
F32
F32
[1024]
v.blk.21.layer_norm1.weight
F32
F32
[1024]
v.blk.21.layer_norm2.bias
F32
F32
[1024]
v.blk.21.layer_norm2.weight
F32
F32
[1024]
v.blk.21.mlp.fc1.bias
F32
F32
[4096]
v.blk.21.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.21.mlp.fc2.bias
F32
F32
[1024]
v.blk.21.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.21.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.21.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.21.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.21.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.22
v.blk.22.layer_norm1.bias
F32
F32
[1024]
v.blk.22.layer_norm1.weight
F32
F32
[1024]
v.blk.22.layer_norm2.bias
F32
F32
[1024]
v.blk.22.layer_norm2.weight
F32
F32
[1024]
v.blk.22.mlp.fc1.bias
F32
F32
[4096]
v.blk.22.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.22.mlp.fc2.bias
F32
F32
[1024]
v.blk.22.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.22.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.22.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.22.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.22.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.blk.23
v.blk.23.layer_norm1.bias
F32
F32
[1024]
v.blk.23.layer_norm1.weight
F32
F32
[1024]
v.blk.23.layer_norm2.bias
F32
F32
[1024]
v.blk.23.layer_norm2.weight
F32
F32
[1024]
v.blk.23.mlp.fc1.bias
F32
F32
[4096]
v.blk.23.mlp.fc1.weight
F16
F16
[1024, 4096]
v.blk.23.mlp.fc2.bias
F32
F32
[1024]
v.blk.23.mlp.fc2.weight
F16
F16
[4096, 1024]
v.blk.23.self_attn.out_proj.bias
F32
F32
[1024]
v.blk.23.self_attn.out_proj.weight
F16
F16
[1024, 1024]
v.blk.23.self_attn.qkv_proj.bias
F32
F32
[3072]
v.blk.23.self_attn.qkv_proj.weight
F16
F16
[1024, 3072]
v.class_embd
F32
F32
[1024]
v.patch_embd.weight
F16
F16
[14, 14, 3, 1024]
v.position_embd.weight
F16
F16
[1024, 257]
v.pre_layrnorm.bias
F32
F32
[1024]
v.pre_layrnorm.weight
F32
F32
[1024]
output_norm.weight
F32
F32
[1280]