deepseek-ocr:latest

114 18 minutes ago

DeepSeek-OCR is a vision-language model that can perform token-efficient OCR.

vision 3b
3a18673ff291 · 6.7GB
    Metadata
  • general.architecture
    deepseekocr
  • general.file_type
    F16
  • deepseekocr.attention.head_count
    10
  • deepseekocr.attention.head_count_kv
    10
  • deepseekocr.block_count
    12
  • deepseekocr.context_length
    8192
  • deepseekocr.embedding_length
    1280
  • deepseekocr.expert_count
    64
  • deepseekocr.expert_used_count
    6
  • deepseekocr.feed_forward_length
    6848
  • deepseekocr.leading_dense_block_count
    1
  • deepseekocr.sam.block_count
    12
  • deepseekocr.sam.embedding_length
    768
  • deepseekocr.sam.global_attention_indexes
    [2, 5, 8, 11]
  • deepseekocr.sam.head_count
    12
  • deepseekocr.vision.block_count
    24
  • deepseekocr.vision.embedding_length
    1024
  • deepseekocr.vision.head_count
    16
  • deepseekocr.vision.image_size
    224
  • deepseekocr.vision.patch_size
    14
  • tokenizer.ggml.add_bos_token
    true
  • tokenizer.ggml.add_eos_token
    false
  • tokenizer.ggml.add_padding_token
    false
  • tokenizer.ggml.bos_token_id
    0
  • tokenizer.ggml.eos_token_id
    1
  • tokenizer.ggml.merges
    [Ġ t, Ġ a, i n, Ġ Ġ, h e, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    2
  • tokenizer.ggml.pre
    default
  • tokenizer.ggml.scores
    [0, 1, 2, 3, 4, ...]
  • tokenizer.ggml.token_type
    [3, 3, 3, 1, 1, ...]
  • tokenizer.ggml.tokens
    [<|begin▁of▁sentence|>, <|end▁of▁sentence|>, <|▁pad▁|>, !, ", ...]
  • Tensor
  • token_embd.weight
    BF16
    [1280, 129280]
  • blk.0
  • blk.0.attn_k.weight
    BF16
    [1280, 1280]
  • blk.0.attn_norm.weight
    F32
    [1280]
  • blk.0.attn_output.weight
    BF16
    [1280, 1280]
  • blk.0.attn_q.weight
    BF16
    [1280, 1280]
  • blk.0.attn_v.weight
    BF16
    [1280, 1280]
  • blk.0.ffn_down.weight
    BF16
    [6848, 1280]
  • blk.0.ffn_gate.weight
    BF16
    [1280, 6848]
  • blk.0.ffn_norm.weight
    F32
    [1280]
  • blk.0.ffn_up.weight
    BF16
    [1280, 6848]
  • blk.1
  • blk.1.attn_k.weight
    BF16
    [1280, 1280]
  • blk.1.attn_norm.weight
    F32
    [1280]
  • blk.1.attn_output.weight
    BF16
    [1280, 1280]
  • blk.1.attn_q.weight
    BF16
    [1280, 1280]
  • blk.1.attn_v.weight
    BF16
    [1280, 1280]
  • blk.1.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.1.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.1.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.1.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.1.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.1.ffn_norm.weight
    F32
    [1280]
  • blk.1.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.1.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.2
  • blk.2.attn_k.weight
    BF16
    [1280, 1280]
  • blk.2.attn_norm.weight
    F32
    [1280]
  • blk.2.attn_output.weight
    BF16
    [1280, 1280]
  • blk.2.attn_q.weight
    BF16
    [1280, 1280]
  • blk.2.attn_v.weight
    BF16
    [1280, 1280]
  • blk.2.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.2.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.2.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.2.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.2.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.2.ffn_norm.weight
    F32
    [1280]
  • blk.2.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.2.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.3
  • blk.3.attn_k.weight
    BF16
    [1280, 1280]
  • blk.3.attn_norm.weight
    F32
    [1280]
  • blk.3.attn_output.weight
    BF16
    [1280, 1280]
  • blk.3.attn_q.weight
    BF16
    [1280, 1280]
  • blk.3.attn_v.weight
    BF16
    [1280, 1280]
  • blk.3.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.3.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.3.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.3.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.3.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.3.ffn_norm.weight
    F32
    [1280]
  • blk.3.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.3.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.4
  • blk.4.attn_k.weight
    BF16
    [1280, 1280]
  • blk.4.attn_norm.weight
    F32
    [1280]
  • blk.4.attn_output.weight
    BF16
    [1280, 1280]
  • blk.4.attn_q.weight
    BF16
    [1280, 1280]
  • blk.4.attn_v.weight
    BF16
    [1280, 1280]
  • blk.4.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.4.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.4.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.4.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.4.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.4.ffn_norm.weight
    F32
    [1280]
  • blk.4.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.4.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.5
  • blk.5.attn_k.weight
    BF16
    [1280, 1280]
  • blk.5.attn_norm.weight
    F32
    [1280]
  • blk.5.attn_output.weight
    BF16
    [1280, 1280]
  • blk.5.attn_q.weight
    BF16
    [1280, 1280]
  • blk.5.attn_v.weight
    BF16
    [1280, 1280]
  • blk.5.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.5.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.5.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.5.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.5.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.5.ffn_norm.weight
    F32
    [1280]
  • blk.5.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.5.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.6
  • blk.6.attn_k.weight
    BF16
    [1280, 1280]
  • blk.6.attn_norm.weight
    F32
    [1280]
  • blk.6.attn_output.weight
    BF16
    [1280, 1280]
  • blk.6.attn_q.weight
    BF16
    [1280, 1280]
  • blk.6.attn_v.weight
    BF16
    [1280, 1280]
  • blk.6.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.6.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.6.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.6.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.6.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.6.ffn_norm.weight
    F32
    [1280]
  • blk.6.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.6.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.7
  • blk.7.attn_k.weight
    BF16
    [1280, 1280]
  • blk.7.attn_norm.weight
    F32
    [1280]
  • blk.7.attn_output.weight
    BF16
    [1280, 1280]
  • blk.7.attn_q.weight
    BF16
    [1280, 1280]
  • blk.7.attn_v.weight
    BF16
    [1280, 1280]
  • blk.7.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.7.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.7.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.7.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.7.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.7.ffn_norm.weight
    F32
    [1280]
  • blk.7.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.7.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.8
  • blk.8.attn_k.weight
    BF16
    [1280, 1280]
  • blk.8.attn_norm.weight
    F32
    [1280]
  • blk.8.attn_output.weight
    BF16
    [1280, 1280]
  • blk.8.attn_q.weight
    BF16
    [1280, 1280]
  • blk.8.attn_v.weight
    BF16
    [1280, 1280]
  • blk.8.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.8.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.8.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.8.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.8.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.8.ffn_norm.weight
    F32
    [1280]
  • blk.8.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.8.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.9
  • blk.9.attn_k.weight
    BF16
    [1280, 1280]
  • blk.9.attn_norm.weight
    F32
    [1280]
  • blk.9.attn_output.weight
    BF16
    [1280, 1280]
  • blk.9.attn_q.weight
    BF16
    [1280, 1280]
  • blk.9.attn_v.weight
    BF16
    [1280, 1280]
  • blk.9.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.9.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.9.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.9.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.9.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.9.ffn_norm.weight
    F32
    [1280]
  • blk.9.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.9.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.10
  • blk.10.attn_k.weight
    BF16
    [1280, 1280]
  • blk.10.attn_norm.weight
    F32
    [1280]
  • blk.10.attn_output.weight
    BF16
    [1280, 1280]
  • blk.10.attn_q.weight
    BF16
    [1280, 1280]
  • blk.10.attn_v.weight
    BF16
    [1280, 1280]
  • blk.10.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.10.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.10.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.10.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.10.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.10.ffn_norm.weight
    F32
    [1280]
  • blk.10.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.10.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • blk.11
  • blk.11.attn_k.weight
    BF16
    [1280, 1280]
  • blk.11.attn_norm.weight
    F32
    [1280]
  • blk.11.attn_output.weight
    BF16
    [1280, 1280]
  • blk.11.attn_q.weight
    BF16
    [1280, 1280]
  • blk.11.attn_v.weight
    BF16
    [1280, 1280]
  • blk.11.ffn_down_exps.weight
    BF16
    [896, 1280, 64]
  • blk.11.ffn_down_shexp.weight
    BF16
    [1792, 1280]
  • blk.11.ffn_gate_exps.weight
    BF16
    [1280, 896, 64]
  • blk.11.ffn_gate_inp.weight
    F32
    [1280, 64]
  • blk.11.ffn_gate_shexp.weight
    BF16
    [1280, 1792]
  • blk.11.ffn_norm.weight
    F32
    [1280]
  • blk.11.ffn_up_exps.weight
    BF16
    [1280, 896, 64]
  • blk.11.ffn_up_shexp.weight
    BF16
    [1280, 1792]
  • mm.image_newline
    F32
    [1280]
  • mm.layers.bias
    F32
    [1280]
  • mm.layers.weight
    BF16
    [2048, 1280]
  • mm.view_seperator
    F32
    [1280]
  • output.weight
    BF16
    [1280, 129280]
  • s.blk.0
  • s.blk.0.attn.proj.bias
    F32
    [768]
  • s.blk.0.attn.proj.weight
    F16
    [768, 768]
  • s.blk.0.attn.qkv.bias
    F32
    [2304]
  • s.blk.0.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.0.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.0.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.0.mlp.lin1.bias
    F32
    [3072]
  • s.blk.0.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.0.mlp.lin2.bias
    F32
    [768]
  • s.blk.0.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.0.norm1.bias
    F32
    [768]
  • s.blk.0.norm1.weight
    F32
    [768]
  • s.blk.0.norm2.bias
    F32
    [768]
  • s.blk.0.norm2.weight
    F32
    [768]
  • s.blk.1
  • s.blk.1.attn.proj.bias
    F32
    [768]
  • s.blk.1.attn.proj.weight
    F16
    [768, 768]
  • s.blk.1.attn.qkv.bias
    F32
    [2304]
  • s.blk.1.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.1.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.1.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.1.mlp.lin1.bias
    F32
    [3072]
  • s.blk.1.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.1.mlp.lin2.bias
    F32
    [768]
  • s.blk.1.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.1.norm1.bias
    F32
    [768]
  • s.blk.1.norm1.weight
    F32
    [768]
  • s.blk.1.norm2.bias
    F32
    [768]
  • s.blk.1.norm2.weight
    F32
    [768]
  • s.blk.2
  • s.blk.2.attn.proj.bias
    F32
    [768]
  • s.blk.2.attn.proj.weight
    F16
    [768, 768]
  • s.blk.2.attn.qkv.bias
    F32
    [2304]
  • s.blk.2.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.2.attn.rel_pos_h
    F32
    [64, 127]
  • s.blk.2.attn.rel_pos_w
    F32
    [64, 127]
  • s.blk.2.mlp.lin1.bias
    F32
    [3072]
  • s.blk.2.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.2.mlp.lin2.bias
    F32
    [768]
  • s.blk.2.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.2.norm1.bias
    F32
    [768]
  • s.blk.2.norm1.weight
    F32
    [768]
  • s.blk.2.norm2.bias
    F32
    [768]
  • s.blk.2.norm2.weight
    F32
    [768]
  • s.blk.3
  • s.blk.3.attn.proj.bias
    F32
    [768]
  • s.blk.3.attn.proj.weight
    F16
    [768, 768]
  • s.blk.3.attn.qkv.bias
    F32
    [2304]
  • s.blk.3.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.3.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.3.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.3.mlp.lin1.bias
    F32
    [3072]
  • s.blk.3.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.3.mlp.lin2.bias
    F32
    [768]
  • s.blk.3.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.3.norm1.bias
    F32
    [768]
  • s.blk.3.norm1.weight
    F32
    [768]
  • s.blk.3.norm2.bias
    F32
    [768]
  • s.blk.3.norm2.weight
    F32
    [768]
  • s.blk.4
  • s.blk.4.attn.proj.bias
    F32
    [768]
  • s.blk.4.attn.proj.weight
    F16
    [768, 768]
  • s.blk.4.attn.qkv.bias
    F32
    [2304]
  • s.blk.4.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.4.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.4.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.4.mlp.lin1.bias
    F32
    [3072]
  • s.blk.4.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.4.mlp.lin2.bias
    F32
    [768]
  • s.blk.4.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.4.norm1.bias
    F32
    [768]
  • s.blk.4.norm1.weight
    F32
    [768]
  • s.blk.4.norm2.bias
    F32
    [768]
  • s.blk.4.norm2.weight
    F32
    [768]
  • s.blk.5
  • s.blk.5.attn.proj.bias
    F32
    [768]
  • s.blk.5.attn.proj.weight
    F16
    [768, 768]
  • s.blk.5.attn.qkv.bias
    F32
    [2304]
  • s.blk.5.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.5.attn.rel_pos_h
    F32
    [64, 127]
  • s.blk.5.attn.rel_pos_w
    F32
    [64, 127]
  • s.blk.5.mlp.lin1.bias
    F32
    [3072]
  • s.blk.5.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.5.mlp.lin2.bias
    F32
    [768]
  • s.blk.5.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.5.norm1.bias
    F32
    [768]
  • s.blk.5.norm1.weight
    F32
    [768]
  • s.blk.5.norm2.bias
    F32
    [768]
  • s.blk.5.norm2.weight
    F32
    [768]
  • s.blk.6
  • s.blk.6.attn.proj.bias
    F32
    [768]
  • s.blk.6.attn.proj.weight
    F16
    [768, 768]
  • s.blk.6.attn.qkv.bias
    F32
    [2304]
  • s.blk.6.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.6.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.6.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.6.mlp.lin1.bias
    F32
    [3072]
  • s.blk.6.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.6.mlp.lin2.bias
    F32
    [768]
  • s.blk.6.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.6.norm1.bias
    F32
    [768]
  • s.blk.6.norm1.weight
    F32
    [768]
  • s.blk.6.norm2.bias
    F32
    [768]
  • s.blk.6.norm2.weight
    F32
    [768]
  • s.blk.7
  • s.blk.7.attn.proj.bias
    F32
    [768]
  • s.blk.7.attn.proj.weight
    F16
    [768, 768]
  • s.blk.7.attn.qkv.bias
    F32
    [2304]
  • s.blk.7.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.7.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.7.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.7.mlp.lin1.bias
    F32
    [3072]
  • s.blk.7.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.7.mlp.lin2.bias
    F32
    [768]
  • s.blk.7.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.7.norm1.bias
    F32
    [768]
  • s.blk.7.norm1.weight
    F32
    [768]
  • s.blk.7.norm2.bias
    F32
    [768]
  • s.blk.7.norm2.weight
    F32
    [768]
  • s.blk.8
  • s.blk.8.attn.proj.bias
    F32
    [768]
  • s.blk.8.attn.proj.weight
    F16
    [768, 768]
  • s.blk.8.attn.qkv.bias
    F32
    [2304]
  • s.blk.8.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.8.attn.rel_pos_h
    F32
    [64, 127]
  • s.blk.8.attn.rel_pos_w
    F32
    [64, 127]
  • s.blk.8.mlp.lin1.bias
    F32
    [3072]
  • s.blk.8.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.8.mlp.lin2.bias
    F32
    [768]
  • s.blk.8.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.8.norm1.bias
    F32
    [768]
  • s.blk.8.norm1.weight
    F32
    [768]
  • s.blk.8.norm2.bias
    F32
    [768]
  • s.blk.8.norm2.weight
    F32
    [768]
  • s.blk.9
  • s.blk.9.attn.proj.bias
    F32
    [768]
  • s.blk.9.attn.proj.weight
    F16
    [768, 768]
  • s.blk.9.attn.qkv.bias
    F32
    [2304]
  • s.blk.9.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.9.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.9.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.9.mlp.lin1.bias
    F32
    [3072]
  • s.blk.9.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.9.mlp.lin2.bias
    F32
    [768]
  • s.blk.9.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.9.norm1.bias
    F32
    [768]
  • s.blk.9.norm1.weight
    F32
    [768]
  • s.blk.9.norm2.bias
    F32
    [768]
  • s.blk.9.norm2.weight
    F32
    [768]
  • s.blk.10
  • s.blk.10.attn.proj.bias
    F32
    [768]
  • s.blk.10.attn.proj.weight
    F16
    [768, 768]
  • s.blk.10.attn.qkv.bias
    F32
    [2304]
  • s.blk.10.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.10.attn.rel_pos_h
    F32
    [64, 27]
  • s.blk.10.attn.rel_pos_w
    F32
    [64, 27]
  • s.blk.10.mlp.lin1.bias
    F32
    [3072]
  • s.blk.10.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.10.mlp.lin2.bias
    F32
    [768]
  • s.blk.10.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.10.norm1.bias
    F32
    [768]
  • s.blk.10.norm1.weight
    F32
    [768]
  • s.blk.10.norm2.bias
    F32
    [768]
  • s.blk.10.norm2.weight
    F32
    [768]
  • s.blk.11
  • s.blk.11.attn.proj.bias
    F32
    [768]
  • s.blk.11.attn.proj.weight
    F16
    [768, 768]
  • s.blk.11.attn.qkv.bias
    F32
    [2304]
  • s.blk.11.attn.qkv.weight
    F16
    [768, 2304]
  • s.blk.11.attn.rel_pos_h
    F32
    [64, 127]
  • s.blk.11.attn.rel_pos_w
    F32
    [64, 127]
  • s.blk.11.mlp.lin1.bias
    F32
    [3072]
  • s.blk.11.mlp.lin1.weight
    F16
    [768, 3072]
  • s.blk.11.mlp.lin2.bias
    F32
    [768]
  • s.blk.11.mlp.lin2.weight
    F16
    [3072, 768]
  • s.blk.11.norm1.bias
    F32
    [768]
  • s.blk.11.norm1.weight
    F32
    [768]
  • s.blk.11.norm2.bias
    F32
    [768]
  • s.blk.11.norm2.weight
    F32
    [768]
  • s.neck.0.weight
    F16
    [1, 1, 768, 256]
  • s.neck.1.bias
    F32
    [256]
  • s.neck.1.weight
    F32
    [256]
  • s.neck.2.weight
    F16
    [3, 3, 256, 256]
  • s.neck.3.bias
    F32
    [256]
  • s.neck.3.weight
    F32
    [256]
  • s.net_2.weight
    F16
    [3, 3, 256, 512]
  • s.net_3.weight
    F16
    [3, 3, 512, 1024]
  • s.patch_embd.bias
    F32
    [768]
  • s.patch_embd.weight
    F16
    [16, 16, 3, 768]
  • s.position_embd
    F32
    [768, 64, 64, 1]
  • v.blk.0
  • v.blk.0.layer_norm1.bias
    F32
    [1024]
  • v.blk.0.layer_norm1.weight
    F32
    [1024]
  • v.blk.0.layer_norm2.bias
    F32
    [1024]
  • v.blk.0.layer_norm2.weight
    F32
    [1024]
  • v.blk.0.mlp.fc1.bias
    F32
    [4096]
  • v.blk.0.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.0.mlp.fc2.bias
    F32
    [1024]
  • v.blk.0.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.0.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.0.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.0.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.0.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.1
  • v.blk.1.layer_norm1.bias
    F32
    [1024]
  • v.blk.1.layer_norm1.weight
    F32
    [1024]
  • v.blk.1.layer_norm2.bias
    F32
    [1024]
  • v.blk.1.layer_norm2.weight
    F32
    [1024]
  • v.blk.1.mlp.fc1.bias
    F32
    [4096]
  • v.blk.1.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.1.mlp.fc2.bias
    F32
    [1024]
  • v.blk.1.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.1.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.1.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.1.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.1.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.2
  • v.blk.2.layer_norm1.bias
    F32
    [1024]
  • v.blk.2.layer_norm1.weight
    F32
    [1024]
  • v.blk.2.layer_norm2.bias
    F32
    [1024]
  • v.blk.2.layer_norm2.weight
    F32
    [1024]
  • v.blk.2.mlp.fc1.bias
    F32
    [4096]
  • v.blk.2.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.2.mlp.fc2.bias
    F32
    [1024]
  • v.blk.2.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.2.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.2.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.2.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.2.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.3
  • v.blk.3.layer_norm1.bias
    F32
    [1024]
  • v.blk.3.layer_norm1.weight
    F32
    [1024]
  • v.blk.3.layer_norm2.bias
    F32
    [1024]
  • v.blk.3.layer_norm2.weight
    F32
    [1024]
  • v.blk.3.mlp.fc1.bias
    F32
    [4096]
  • v.blk.3.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.3.mlp.fc2.bias
    F32
    [1024]
  • v.blk.3.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.3.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.3.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.3.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.3.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.4
  • v.blk.4.layer_norm1.bias
    F32
    [1024]
  • v.blk.4.layer_norm1.weight
    F32
    [1024]
  • v.blk.4.layer_norm2.bias
    F32
    [1024]
  • v.blk.4.layer_norm2.weight
    F32
    [1024]
  • v.blk.4.mlp.fc1.bias
    F32
    [4096]
  • v.blk.4.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.4.mlp.fc2.bias
    F32
    [1024]
  • v.blk.4.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.4.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.4.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.4.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.4.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.5
  • v.blk.5.layer_norm1.bias
    F32
    [1024]
  • v.blk.5.layer_norm1.weight
    F32
    [1024]
  • v.blk.5.layer_norm2.bias
    F32
    [1024]
  • v.blk.5.layer_norm2.weight
    F32
    [1024]
  • v.blk.5.mlp.fc1.bias
    F32
    [4096]
  • v.blk.5.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.5.mlp.fc2.bias
    F32
    [1024]
  • v.blk.5.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.5.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.5.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.5.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.5.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.6
  • v.blk.6.layer_norm1.bias
    F32
    [1024]
  • v.blk.6.layer_norm1.weight
    F32
    [1024]
  • v.blk.6.layer_norm2.bias
    F32
    [1024]
  • v.blk.6.layer_norm2.weight
    F32
    [1024]
  • v.blk.6.mlp.fc1.bias
    F32
    [4096]
  • v.blk.6.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.6.mlp.fc2.bias
    F32
    [1024]
  • v.blk.6.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.6.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.6.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.6.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.6.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.7
  • v.blk.7.layer_norm1.bias
    F32
    [1024]
  • v.blk.7.layer_norm1.weight
    F32
    [1024]
  • v.blk.7.layer_norm2.bias
    F32
    [1024]
  • v.blk.7.layer_norm2.weight
    F32
    [1024]
  • v.blk.7.mlp.fc1.bias
    F32
    [4096]
  • v.blk.7.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.7.mlp.fc2.bias
    F32
    [1024]
  • v.blk.7.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.7.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.7.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.7.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.7.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.8
  • v.blk.8.layer_norm1.bias
    F32
    [1024]
  • v.blk.8.layer_norm1.weight
    F32
    [1024]
  • v.blk.8.layer_norm2.bias
    F32
    [1024]
  • v.blk.8.layer_norm2.weight
    F32
    [1024]
  • v.blk.8.mlp.fc1.bias
    F32
    [4096]
  • v.blk.8.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.8.mlp.fc2.bias
    F32
    [1024]
  • v.blk.8.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.8.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.8.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.8.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.8.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.9
  • v.blk.9.layer_norm1.bias
    F32
    [1024]
  • v.blk.9.layer_norm1.weight
    F32
    [1024]
  • v.blk.9.layer_norm2.bias
    F32
    [1024]
  • v.blk.9.layer_norm2.weight
    F32
    [1024]
  • v.blk.9.mlp.fc1.bias
    F32
    [4096]
  • v.blk.9.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.9.mlp.fc2.bias
    F32
    [1024]
  • v.blk.9.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.9.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.9.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.9.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.9.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.10
  • v.blk.10.layer_norm1.bias
    F32
    [1024]
  • v.blk.10.layer_norm1.weight
    F32
    [1024]
  • v.blk.10.layer_norm2.bias
    F32
    [1024]
  • v.blk.10.layer_norm2.weight
    F32
    [1024]
  • v.blk.10.mlp.fc1.bias
    F32
    [4096]
  • v.blk.10.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.10.mlp.fc2.bias
    F32
    [1024]
  • v.blk.10.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.10.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.10.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.10.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.10.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.11
  • v.blk.11.layer_norm1.bias
    F32
    [1024]
  • v.blk.11.layer_norm1.weight
    F32
    [1024]
  • v.blk.11.layer_norm2.bias
    F32
    [1024]
  • v.blk.11.layer_norm2.weight
    F32
    [1024]
  • v.blk.11.mlp.fc1.bias
    F32
    [4096]
  • v.blk.11.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.11.mlp.fc2.bias
    F32
    [1024]
  • v.blk.11.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.11.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.11.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.11.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.11.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.12
  • v.blk.12.layer_norm1.bias
    F32
    [1024]
  • v.blk.12.layer_norm1.weight
    F32
    [1024]
  • v.blk.12.layer_norm2.bias
    F32
    [1024]
  • v.blk.12.layer_norm2.weight
    F32
    [1024]
  • v.blk.12.mlp.fc1.bias
    F32
    [4096]
  • v.blk.12.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.12.mlp.fc2.bias
    F32
    [1024]
  • v.blk.12.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.12.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.12.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.12.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.12.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.13
  • v.blk.13.layer_norm1.bias
    F32
    [1024]
  • v.blk.13.layer_norm1.weight
    F32
    [1024]
  • v.blk.13.layer_norm2.bias
    F32
    [1024]
  • v.blk.13.layer_norm2.weight
    F32
    [1024]
  • v.blk.13.mlp.fc1.bias
    F32
    [4096]
  • v.blk.13.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.13.mlp.fc2.bias
    F32
    [1024]
  • v.blk.13.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.13.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.13.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.13.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.13.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.14
  • v.blk.14.layer_norm1.bias
    F32
    [1024]
  • v.blk.14.layer_norm1.weight
    F32
    [1024]
  • v.blk.14.layer_norm2.bias
    F32
    [1024]
  • v.blk.14.layer_norm2.weight
    F32
    [1024]
  • v.blk.14.mlp.fc1.bias
    F32
    [4096]
  • v.blk.14.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.14.mlp.fc2.bias
    F32
    [1024]
  • v.blk.14.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.14.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.14.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.14.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.14.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.15
  • v.blk.15.layer_norm1.bias
    F32
    [1024]
  • v.blk.15.layer_norm1.weight
    F32
    [1024]
  • v.blk.15.layer_norm2.bias
    F32
    [1024]
  • v.blk.15.layer_norm2.weight
    F32
    [1024]
  • v.blk.15.mlp.fc1.bias
    F32
    [4096]
  • v.blk.15.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.15.mlp.fc2.bias
    F32
    [1024]
  • v.blk.15.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.15.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.15.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.15.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.15.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.16
  • v.blk.16.layer_norm1.bias
    F32
    [1024]
  • v.blk.16.layer_norm1.weight
    F32
    [1024]
  • v.blk.16.layer_norm2.bias
    F32
    [1024]
  • v.blk.16.layer_norm2.weight
    F32
    [1024]
  • v.blk.16.mlp.fc1.bias
    F32
    [4096]
  • v.blk.16.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.16.mlp.fc2.bias
    F32
    [1024]
  • v.blk.16.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.16.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.16.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.16.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.16.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.17
  • v.blk.17.layer_norm1.bias
    F32
    [1024]
  • v.blk.17.layer_norm1.weight
    F32
    [1024]
  • v.blk.17.layer_norm2.bias
    F32
    [1024]
  • v.blk.17.layer_norm2.weight
    F32
    [1024]
  • v.blk.17.mlp.fc1.bias
    F32
    [4096]
  • v.blk.17.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.17.mlp.fc2.bias
    F32
    [1024]
  • v.blk.17.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.17.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.17.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.17.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.17.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.18
  • v.blk.18.layer_norm1.bias
    F32
    [1024]
  • v.blk.18.layer_norm1.weight
    F32
    [1024]
  • v.blk.18.layer_norm2.bias
    F32
    [1024]
  • v.blk.18.layer_norm2.weight
    F32
    [1024]
  • v.blk.18.mlp.fc1.bias
    F32
    [4096]
  • v.blk.18.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.18.mlp.fc2.bias
    F32
    [1024]
  • v.blk.18.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.18.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.18.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.18.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.18.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.19
  • v.blk.19.layer_norm1.bias
    F32
    [1024]
  • v.blk.19.layer_norm1.weight
    F32
    [1024]
  • v.blk.19.layer_norm2.bias
    F32
    [1024]
  • v.blk.19.layer_norm2.weight
    F32
    [1024]
  • v.blk.19.mlp.fc1.bias
    F32
    [4096]
  • v.blk.19.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.19.mlp.fc2.bias
    F32
    [1024]
  • v.blk.19.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.19.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.19.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.19.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.19.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.20
  • v.blk.20.layer_norm1.bias
    F32
    [1024]
  • v.blk.20.layer_norm1.weight
    F32
    [1024]
  • v.blk.20.layer_norm2.bias
    F32
    [1024]
  • v.blk.20.layer_norm2.weight
    F32
    [1024]
  • v.blk.20.mlp.fc1.bias
    F32
    [4096]
  • v.blk.20.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.20.mlp.fc2.bias
    F32
    [1024]
  • v.blk.20.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.20.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.20.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.20.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.20.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.21
  • v.blk.21.layer_norm1.bias
    F32
    [1024]
  • v.blk.21.layer_norm1.weight
    F32
    [1024]
  • v.blk.21.layer_norm2.bias
    F32
    [1024]
  • v.blk.21.layer_norm2.weight
    F32
    [1024]
  • v.blk.21.mlp.fc1.bias
    F32
    [4096]
  • v.blk.21.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.21.mlp.fc2.bias
    F32
    [1024]
  • v.blk.21.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.21.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.21.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.21.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.21.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.22
  • v.blk.22.layer_norm1.bias
    F32
    [1024]
  • v.blk.22.layer_norm1.weight
    F32
    [1024]
  • v.blk.22.layer_norm2.bias
    F32
    [1024]
  • v.blk.22.layer_norm2.weight
    F32
    [1024]
  • v.blk.22.mlp.fc1.bias
    F32
    [4096]
  • v.blk.22.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.22.mlp.fc2.bias
    F32
    [1024]
  • v.blk.22.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.22.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.22.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.22.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.22.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.blk.23
  • v.blk.23.layer_norm1.bias
    F32
    [1024]
  • v.blk.23.layer_norm1.weight
    F32
    [1024]
  • v.blk.23.layer_norm2.bias
    F32
    [1024]
  • v.blk.23.layer_norm2.weight
    F32
    [1024]
  • v.blk.23.mlp.fc1.bias
    F32
    [4096]
  • v.blk.23.mlp.fc1.weight
    F16
    [1024, 4096]
  • v.blk.23.mlp.fc2.bias
    F32
    [1024]
  • v.blk.23.mlp.fc2.weight
    F16
    [4096, 1024]
  • v.blk.23.self_attn.out_proj.bias
    F32
    [1024]
  • v.blk.23.self_attn.out_proj.weight
    F16
    [1024, 1024]
  • v.blk.23.self_attn.qkv_proj.bias
    F32
    [3072]
  • v.blk.23.self_attn.qkv_proj.weight
    F16
    [1024, 3072]
  • v.class_embd
    F32
    [1024]
  • v.patch_embd.weight
    F16
    [14, 14, 3, 1024]
  • v.position_embd.weight
    F16
    [1024, 257]
  • v.pre_layrnorm.bias
    F32
    [1024]
  • v.pre_layrnorm.weight
    F32
    [1024]
  • output_norm.weight
    F32
    [1280]