latest
27GB
5 Pulls Updated 4 months ago
beca1bc263b9 · 27GB
-
general.architecturebaichuan
-
baichuan.attention.head_count40
-
baichuan.attention.head_count_kv40
-
baichuan.attention.layer_norm_rms_epsilon1e-06
-
baichuan.block_count40
-
baichuan.context_length4096
-
baichuan.embedding_length5120
-
baichuan.feed_forward_length13696
-
baichuan.rope.dimension_count128
-
baichuan.tensor_data_layoutMeta AI original pth
-
tokenizer.ggml.add_bos_tokenfalse
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id2
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.scores[0 0 0 0 0 ...]
-
tokenizer.ggml.token_type[2 3 3 1 1 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <SEP> <CLS> ...]
-
NameTypeShape
-
token_embd.weightF16[5120 64000]
-
blk.0.attn_output.weightF16[5120 5120]
-
blk.0.ffn_gate.weightF16[5120 13696]
-
blk.0.ffn_down.weightF16[13696 5120]
-
blk.0.ffn_up.weightF16[5120 13696]
-
blk.0.attn_norm.weightF32[5120]
-
blk.0.ffn_norm.weightF32[5120]
-
blk.0.attn_q.weightF16[5120 5120]
-
blk.0.attn_k.weightF16[5120 5120]
-
blk.0.attn_v.weightF16[5120 5120]
-
blk.1.attn_output.weightF16[5120 5120]
-
blk.1.ffn_gate.weightF16[5120 13696]
-
blk.1.ffn_down.weightF16[13696 5120]
-
blk.1.ffn_up.weightF16[5120 13696]
-
blk.1.attn_norm.weightF32[5120]
-
blk.1.ffn_norm.weightF32[5120]
-
blk.1.attn_q.weightF16[5120 5120]
-
blk.1.attn_k.weightF16[5120 5120]
-
blk.1.attn_v.weightF16[5120 5120]
-
blk.2.attn_output.weightF16[5120 5120]
-
blk.2.ffn_gate.weightF16[5120 13696]
-
blk.2.ffn_down.weightF16[13696 5120]
-
blk.2.ffn_up.weightF16[5120 13696]
-
blk.2.attn_norm.weightF32[5120]
-
blk.2.ffn_norm.weightF32[5120]
-
blk.2.attn_q.weightF16[5120 5120]
-
blk.2.attn_k.weightF16[5120 5120]
-
blk.2.attn_v.weightF16[5120 5120]
-
blk.3.attn_output.weightF16[5120 5120]
-
blk.3.ffn_gate.weightF16[5120 13696]
-
blk.3.ffn_down.weightF16[13696 5120]
-
blk.3.ffn_up.weightF16[5120 13696]
-
blk.3.attn_norm.weightF32[5120]
-
blk.3.ffn_norm.weightF32[5120]
-
blk.3.attn_q.weightF16[5120 5120]
-
blk.3.attn_k.weightF16[5120 5120]
-
blk.3.attn_v.weightF16[5120 5120]
-
blk.4.attn_output.weightF16[5120 5120]
-
blk.4.ffn_gate.weightF16[5120 13696]
-
blk.4.ffn_down.weightF16[13696 5120]
-
blk.4.ffn_up.weightF16[5120 13696]
-
blk.4.attn_norm.weightF32[5120]
-
blk.4.ffn_norm.weightF32[5120]
-
blk.4.attn_q.weightF16[5120 5120]
-
blk.4.attn_k.weightF16[5120 5120]
-
blk.4.attn_v.weightF16[5120 5120]
-
blk.5.attn_output.weightF16[5120 5120]
-
blk.5.ffn_gate.weightF16[5120 13696]
-
blk.5.ffn_down.weightF16[13696 5120]
-
blk.5.ffn_up.weightF16[5120 13696]
-
blk.5.attn_norm.weightF32[5120]
-
blk.5.ffn_norm.weightF32[5120]
-
blk.5.attn_q.weightF16[5120 5120]
-
blk.5.attn_k.weightF16[5120 5120]
-
blk.5.attn_v.weightF16[5120 5120]
-
blk.6.attn_output.weightF16[5120 5120]
-
blk.6.ffn_gate.weightF16[5120 13696]
-
blk.6.ffn_down.weightF16[13696 5120]
-
blk.6.ffn_up.weightF16[5120 13696]
-
blk.6.attn_norm.weightF32[5120]
-
blk.6.ffn_norm.weightF32[5120]
-
blk.6.attn_q.weightF16[5120 5120]
-
blk.6.attn_k.weightF16[5120 5120]
-
blk.6.attn_v.weightF16[5120 5120]
-
blk.7.attn_output.weightF16[5120 5120]
-
blk.7.ffn_gate.weightF16[5120 13696]
-
blk.7.ffn_down.weightF16[13696 5120]
-
blk.7.ffn_up.weightF16[5120 13696]
-
blk.7.attn_norm.weightF32[5120]
-
blk.7.ffn_norm.weightF32[5120]
-
blk.7.attn_q.weightF16[5120 5120]
-
blk.7.attn_k.weightF16[5120 5120]
-
blk.7.attn_v.weightF16[5120 5120]
-
blk.8.attn_output.weightF16[5120 5120]
-
blk.8.ffn_gate.weightF16[5120 13696]
-
blk.8.ffn_down.weightF16[13696 5120]
-
blk.8.ffn_up.weightF16[5120 13696]
-
blk.8.attn_norm.weightF32[5120]
-
blk.8.ffn_norm.weightF32[5120]
-
blk.8.attn_q.weightF16[5120 5120]
-
blk.8.attn_k.weightF16[5120 5120]
-
blk.8.attn_v.weightF16[5120 5120]
-
blk.9.attn_output.weightF16[5120 5120]
-
blk.9.ffn_gate.weightF16[5120 13696]
-
blk.9.ffn_down.weightF16[13696 5120]
-
blk.9.ffn_up.weightF16[5120 13696]
-
blk.9.attn_norm.weightF32[5120]
-
blk.9.ffn_norm.weightF32[5120]
-
blk.9.attn_q.weightF16[5120 5120]
-
blk.9.attn_k.weightF16[5120 5120]
-
blk.9.attn_v.weightF16[5120 5120]
-
blk.10.attn_output.weightF16[5120 5120]
-
blk.10.ffn_gate.weightF16[5120 13696]
-
blk.10.ffn_down.weightF16[13696 5120]
-
blk.10.ffn_up.weightF16[5120 13696]
-
blk.10.attn_norm.weightF32[5120]
-
blk.10.ffn_norm.weightF32[5120]
-
blk.10.attn_q.weightF16[5120 5120]
-
blk.10.attn_k.weightF16[5120 5120]
-
blk.10.attn_v.weightF16[5120 5120]
-
blk.11.attn_output.weightF16[5120 5120]
-
blk.11.ffn_gate.weightF16[5120 13696]
-
blk.11.ffn_down.weightF16[13696 5120]
-
blk.11.ffn_up.weightF16[5120 13696]
-
blk.11.attn_norm.weightF32[5120]
-
blk.11.ffn_norm.weightF32[5120]
-
blk.11.attn_q.weightF16[5120 5120]
-
blk.11.attn_k.weightF16[5120 5120]
-
blk.11.attn_v.weightF16[5120 5120]
-
blk.12.attn_output.weightF16[5120 5120]
-
blk.12.ffn_gate.weightF16[5120 13696]
-
blk.12.ffn_down.weightF16[13696 5120]
-
blk.12.ffn_up.weightF16[5120 13696]
-
blk.12.attn_norm.weightF32[5120]
-
blk.12.ffn_norm.weightF32[5120]
-
blk.12.attn_q.weightF16[5120 5120]
-
blk.12.attn_k.weightF16[5120 5120]
-
blk.12.attn_v.weightF16[5120 5120]
-
blk.13.attn_output.weightF16[5120 5120]
-
blk.13.ffn_gate.weightF16[5120 13696]
-
blk.13.ffn_down.weightF16[13696 5120]
-
blk.13.ffn_up.weightF16[5120 13696]
-
blk.13.attn_norm.weightF32[5120]
-
blk.13.ffn_norm.weightF32[5120]
-
blk.13.attn_q.weightF16[5120 5120]
-
blk.13.attn_k.weightF16[5120 5120]
-
blk.13.attn_v.weightF16[5120 5120]
-
blk.14.attn_output.weightF16[5120 5120]
-
blk.14.ffn_gate.weightF16[5120 13696]
-
blk.14.ffn_down.weightF16[13696 5120]
-
blk.14.ffn_up.weightF16[5120 13696]
-
blk.14.attn_norm.weightF32[5120]
-
blk.14.ffn_norm.weightF32[5120]
-
blk.14.attn_q.weightF16[5120 5120]
-
blk.14.attn_k.weightF16[5120 5120]
-
blk.14.attn_v.weightF16[5120 5120]
-
blk.15.attn_output.weightF16[5120 5120]
-
blk.15.ffn_gate.weightF16[5120 13696]
-
blk.15.ffn_down.weightF16[13696 5120]
-
blk.15.ffn_up.weightF16[5120 13696]
-
blk.15.attn_norm.weightF32[5120]
-
blk.15.ffn_norm.weightF32[5120]
-
blk.15.attn_q.weightF16[5120 5120]
-
blk.15.attn_k.weightF16[5120 5120]
-
blk.15.attn_v.weightF16[5120 5120]
-
blk.16.attn_output.weightF16[5120 5120]
-
blk.16.ffn_gate.weightF16[5120 13696]
-
blk.16.ffn_down.weightF16[13696 5120]
-
blk.16.ffn_up.weightF16[5120 13696]
-
blk.16.attn_norm.weightF32[5120]
-
blk.16.ffn_norm.weightF32[5120]
-
blk.16.attn_q.weightF16[5120 5120]
-
blk.16.attn_k.weightF16[5120 5120]
-
blk.16.attn_v.weightF16[5120 5120]
-
blk.17.attn_output.weightF16[5120 5120]
-
blk.17.ffn_gate.weightF16[5120 13696]
-
blk.17.ffn_down.weightF16[13696 5120]
-
blk.17.ffn_up.weightF16[5120 13696]
-
blk.17.attn_norm.weightF32[5120]
-
blk.17.ffn_norm.weightF32[5120]
-
blk.17.attn_q.weightF16[5120 5120]
-
blk.17.attn_k.weightF16[5120 5120]
-
blk.17.attn_v.weightF16[5120 5120]
-
blk.18.attn_output.weightF16[5120 5120]
-
blk.18.ffn_gate.weightF16[5120 13696]
-
blk.18.ffn_down.weightF16[13696 5120]
-
blk.18.ffn_up.weightF16[5120 13696]
-
blk.18.attn_norm.weightF32[5120]
-
blk.18.ffn_norm.weightF32[5120]
-
blk.18.attn_q.weightF16[5120 5120]
-
blk.18.attn_k.weightF16[5120 5120]
-
blk.18.attn_v.weightF16[5120 5120]
-
blk.19.attn_output.weightF16[5120 5120]
-
blk.19.ffn_gate.weightF16[5120 13696]
-
blk.19.ffn_down.weightF16[13696 5120]
-
blk.19.ffn_up.weightF16[5120 13696]
-
blk.19.attn_norm.weightF32[5120]
-
blk.19.ffn_norm.weightF32[5120]
-
blk.19.attn_q.weightF16[5120 5120]
-
blk.19.attn_k.weightF16[5120 5120]
-
blk.19.attn_v.weightF16[5120 5120]
-
blk.20.attn_output.weightF16[5120 5120]
-
blk.20.ffn_gate.weightF16[5120 13696]
-
blk.20.ffn_down.weightF16[13696 5120]
-
blk.20.ffn_up.weightF16[5120 13696]
-
blk.20.attn_norm.weightF32[5120]
-
blk.20.ffn_norm.weightF32[5120]
-
blk.20.attn_q.weightF16[5120 5120]
-
blk.20.attn_k.weightF16[5120 5120]
-
blk.20.attn_v.weightF16[5120 5120]
-
blk.21.attn_output.weightF16[5120 5120]
-
blk.21.ffn_gate.weightF16[5120 13696]
-
blk.21.ffn_down.weightF16[13696 5120]
-
blk.21.ffn_up.weightF16[5120 13696]
-
blk.21.attn_norm.weightF32[5120]
-
blk.21.ffn_norm.weightF32[5120]
-
blk.21.attn_q.weightF16[5120 5120]
-
blk.21.attn_k.weightF16[5120 5120]
-
blk.21.attn_v.weightF16[5120 5120]
-
blk.22.attn_output.weightF16[5120 5120]
-
blk.22.ffn_gate.weightF16[5120 13696]
-
blk.22.ffn_down.weightF16[13696 5120]
-
blk.22.ffn_up.weightF16[5120 13696]
-
blk.22.attn_norm.weightF32[5120]
-
blk.22.ffn_norm.weightF32[5120]
-
blk.22.attn_q.weightF16[5120 5120]
-
blk.22.attn_k.weightF16[5120 5120]
-
blk.22.attn_v.weightF16[5120 5120]
-
blk.23.attn_output.weightF16[5120 5120]
-
blk.23.ffn_gate.weightF16[5120 13696]
-
blk.23.ffn_down.weightF16[13696 5120]
-
blk.23.ffn_up.weightF16[5120 13696]
-
blk.23.attn_norm.weightF32[5120]
-
blk.23.ffn_norm.weightF32[5120]
-
blk.23.attn_q.weightF16[5120 5120]
-
blk.23.attn_k.weightF16[5120 5120]
-
blk.23.attn_v.weightF16[5120 5120]
-
blk.24.attn_output.weightF16[5120 5120]
-
blk.24.ffn_gate.weightF16[5120 13696]
-
blk.24.ffn_down.weightF16[13696 5120]
-
blk.24.ffn_up.weightF16[5120 13696]
-
blk.24.attn_norm.weightF32[5120]
-
blk.24.ffn_norm.weightF32[5120]
-
blk.24.attn_q.weightF16[5120 5120]
-
blk.24.attn_k.weightF16[5120 5120]
-
blk.24.attn_v.weightF16[5120 5120]
-
blk.25.attn_output.weightF16[5120 5120]
-
blk.25.ffn_gate.weightF16[5120 13696]
-
blk.25.ffn_down.weightF16[13696 5120]
-
blk.25.ffn_up.weightF16[5120 13696]
-
blk.25.attn_norm.weightF32[5120]
-
blk.25.ffn_norm.weightF32[5120]
-
blk.25.attn_q.weightF16[5120 5120]
-
blk.25.attn_k.weightF16[5120 5120]
-
blk.25.attn_v.weightF16[5120 5120]
-
blk.26.attn_output.weightF16[5120 5120]
-
blk.26.ffn_gate.weightF16[5120 13696]
-
blk.26.ffn_down.weightF16[13696 5120]
-
blk.26.ffn_up.weightF16[5120 13696]
-
blk.26.attn_norm.weightF32[5120]
-
blk.26.ffn_norm.weightF32[5120]
-
blk.26.attn_q.weightF16[5120 5120]
-
blk.26.attn_k.weightF16[5120 5120]
-
blk.26.attn_v.weightF16[5120 5120]
-
blk.27.attn_output.weightF16[5120 5120]
-
blk.27.ffn_gate.weightF16[5120 13696]
-
blk.27.ffn_down.weightF16[13696 5120]
-
blk.27.ffn_up.weightF16[5120 13696]
-
blk.27.attn_norm.weightF32[5120]
-
blk.27.ffn_norm.weightF32[5120]
-
blk.27.attn_q.weightF16[5120 5120]
-
blk.27.attn_k.weightF16[5120 5120]
-
blk.27.attn_v.weightF16[5120 5120]
-
blk.28.attn_output.weightF16[5120 5120]
-
blk.28.ffn_gate.weightF16[5120 13696]
-
blk.28.ffn_down.weightF16[13696 5120]
-
blk.28.ffn_up.weightF16[5120 13696]
-
blk.28.attn_norm.weightF32[5120]
-
blk.28.ffn_norm.weightF32[5120]
-
blk.28.attn_q.weightF16[5120 5120]
-
blk.28.attn_k.weightF16[5120 5120]
-
blk.28.attn_v.weightF16[5120 5120]
-
blk.29.attn_output.weightF16[5120 5120]
-
blk.29.ffn_gate.weightF16[5120 13696]
-
blk.29.ffn_down.weightF16[13696 5120]
-
blk.29.ffn_up.weightF16[5120 13696]
-
blk.29.attn_norm.weightF32[5120]
-
blk.29.ffn_norm.weightF32[5120]
-
blk.29.attn_q.weightF16[5120 5120]
-
blk.29.attn_k.weightF16[5120 5120]
-
blk.29.attn_v.weightF16[5120 5120]
-
blk.30.attn_output.weightF16[5120 5120]
-
blk.30.ffn_gate.weightF16[5120 13696]
-
blk.30.ffn_down.weightF16[13696 5120]
-
blk.30.ffn_up.weightF16[5120 13696]
-
blk.30.attn_norm.weightF32[5120]
-
blk.30.ffn_norm.weightF32[5120]
-
blk.30.attn_q.weightF16[5120 5120]
-
blk.30.attn_k.weightF16[5120 5120]
-
blk.30.attn_v.weightF16[5120 5120]
-
blk.31.attn_output.weightF16[5120 5120]
-
blk.31.ffn_gate.weightF16[5120 13696]
-
blk.31.ffn_down.weightF16[13696 5120]
-
blk.31.ffn_up.weightF16[5120 13696]
-
blk.31.attn_norm.weightF32[5120]
-
blk.31.ffn_norm.weightF32[5120]
-
blk.31.attn_q.weightF16[5120 5120]
-
blk.31.attn_k.weightF16[5120 5120]
-
blk.31.attn_v.weightF16[5120 5120]
-
blk.32.attn_output.weightF16[5120 5120]
-
blk.32.ffn_gate.weightF16[5120 13696]
-
blk.32.ffn_down.weightF16[13696 5120]
-
blk.32.ffn_up.weightF16[5120 13696]
-
blk.32.attn_norm.weightF32[5120]
-
blk.32.ffn_norm.weightF32[5120]
-
blk.32.attn_q.weightF16[5120 5120]
-
blk.32.attn_k.weightF16[5120 5120]
-
blk.32.attn_v.weightF16[5120 5120]
-
blk.33.attn_output.weightF16[5120 5120]
-
blk.33.ffn_gate.weightF16[5120 13696]
-
blk.33.ffn_down.weightF16[13696 5120]
-
blk.33.ffn_up.weightF16[5120 13696]
-
blk.33.attn_norm.weightF32[5120]
-
blk.33.ffn_norm.weightF32[5120]
-
blk.33.attn_q.weightF16[5120 5120]
-
blk.33.attn_k.weightF16[5120 5120]
-
blk.33.attn_v.weightF16[5120 5120]
-
blk.34.attn_output.weightF16[5120 5120]
-
blk.34.ffn_gate.weightF16[5120 13696]
-
blk.34.ffn_down.weightF16[13696 5120]
-
blk.34.ffn_up.weightF16[5120 13696]
-
blk.34.attn_norm.weightF32[5120]
-
blk.34.ffn_norm.weightF32[5120]
-
blk.34.attn_q.weightF16[5120 5120]
-
blk.34.attn_k.weightF16[5120 5120]
-
blk.34.attn_v.weightF16[5120 5120]
-
blk.35.attn_output.weightF16[5120 5120]
-
blk.35.ffn_gate.weightF16[5120 13696]
-
blk.35.ffn_down.weightF16[13696 5120]
-
blk.35.ffn_up.weightF16[5120 13696]
-
blk.35.attn_norm.weightF32[5120]
-
blk.35.ffn_norm.weightF32[5120]
-
blk.35.attn_q.weightF16[5120 5120]
-
blk.35.attn_k.weightF16[5120 5120]
-
blk.35.attn_v.weightF16[5120 5120]
-
blk.36.attn_output.weightF16[5120 5120]
-
blk.36.ffn_gate.weightF16[5120 13696]
-
blk.36.ffn_down.weightF16[13696 5120]
-
blk.36.ffn_up.weightF16[5120 13696]
-
blk.36.attn_norm.weightF32[5120]
-
blk.36.ffn_norm.weightF32[5120]
-
blk.36.attn_q.weightF16[5120 5120]
-
blk.36.attn_k.weightF16[5120 5120]
-
blk.36.attn_v.weightF16[5120 5120]
-
blk.37.attn_output.weightF16[5120 5120]
-
blk.37.ffn_gate.weightF16[5120 13696]
-
blk.37.ffn_down.weightF16[13696 5120]
-
blk.37.ffn_up.weightF16[5120 13696]
-
blk.37.attn_norm.weightF32[5120]
-
blk.37.ffn_norm.weightF32[5120]
-
blk.37.attn_q.weightF16[5120 5120]
-
blk.37.attn_k.weightF16[5120 5120]
-
blk.37.attn_v.weightF16[5120 5120]
-
blk.38.attn_output.weightF16[5120 5120]
-
blk.38.ffn_gate.weightF16[5120 13696]
-
blk.38.ffn_down.weightF16[13696 5120]
-
blk.38.ffn_up.weightF16[5120 13696]
-
blk.38.attn_norm.weightF32[5120]
-
blk.38.ffn_norm.weightF32[5120]
-
blk.38.attn_q.weightF16[5120 5120]
-
blk.38.attn_k.weightF16[5120 5120]
-
blk.38.attn_v.weightF16[5120 5120]
-
blk.39.attn_output.weightF16[5120 5120]
-
blk.39.ffn_gate.weightF16[5120 13696]
-
blk.39.ffn_down.weightF16[13696 5120]
-
blk.39.ffn_up.weightF16[5120 13696]
-
blk.39.attn_norm.weightF32[5120]
-
blk.39.ffn_norm.weightF32[5120]
-
blk.39.attn_q.weightF16[5120 5120]
-
blk.39.attn_k.weightF16[5120 5120]
-
blk.39.attn_v.weightF16[5120 5120]
-
output_norm.weightF32[5120]
-
output.weightF16[5120 64000]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39