An open-source Mixture-of-Experts code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.

16B 236B

111.5K Pulls Updated 12 days ago

{ "metadata": { "deepseek2.attention.head_count": 16, "deepseek2.attention.head_count_kv": 16, "deepseek2.attention.key_length": 192, "deepseek2.attention.kv_lora_rank": 512, "deepseek2.attention.layer_norm_rms_epsilon": 0.000001, "deepseek2.attention.value_length": 128, "deepseek2.block_count": 27, "deepseek2.context_length": 163840, "deepseek2.embedding_length": 2048, "deepseek2.expert_count": 64, "deepseek2.expert_feed_forward_length": 1408, "deepseek2.expert_shared_count": 2, "deepseek2.expert_used_count": 6, "deepseek2.expert_weights_scale": 1, "deepseek2.feed_forward_length": 10944, "deepseek2.leading_dense_block_count": 1, "deepseek2.rope.dimension_count": 64, "deepseek2.rope.freq_base": 10000, "deepseek2.rope.scaling.factor": 40, "deepseek2.rope.scaling.original_context_length": 4096, "deepseek2.rope.scaling.type": "yarn", "deepseek2.rope.scaling.yarn_log_multiplier": 0.0707, "deepseek2.vocab_size": 102400, "general.architecture": "deepseek2", "general.file_type": 1, "general.name": "DeepSeek-Coder-V2-Lite-Instruct", "general.quantization_version": 2, "tokenizer.ggml.add_bos_token": true, "tokenizer.ggml.add_eos_token": false, "tokenizer.ggml.bos_token_id": 100000, "tokenizer.ggml.eos_token_id": 100001, "tokenizer.ggml.merges": "... (99757 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.padding_token_id": 100001, "tokenizer.ggml.pre": "deepseek-llm", "tokenizer.ggml.token_type": "... (102400 values)", "tokenizer.ggml.tokens": "... (102400 values)" }, "num_params": 15706484224, "tensors": [ { "name": "token_embd.weight", "offset": 419430400, "shape": [ 2048, 102400 ], "size": 419430400, "type": 1 }, { "name": "blk.0.attn_norm.weight", "offset": 838860800, "shape": [ 2048 ], "size": 419430400, "type": 0 }, { "name": "blk.0.ffn_down.weight", "offset": 838868992, "shape": [ 10944, 2048 ], "size": 8192, "type": 1 }, { "name": "blk.0.ffn_gate.weight", "offset": 883695616, "shape": [ 2048, 10944 ], "size": 44826624, "type": 1 }, { "name": "blk.0.ffn_up.weight", "offset": 928522240, "shape": [ 2048, 10944 ], "size": 44826624, "type": 1 }, { "name": "blk.0.ffn_norm.weight", "offset": 973348864, "shape": [ 2048 ], "size": 44826624, "type": 0 }, { "name": "blk.0.attn_kv_a_norm.weight", "offset": 973357056, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.0.attn_kv_a_mqa.weight", "offset": 973359104, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.0.attn_kv_b.weight", "offset": 975718400, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.0.attn_output.weight", "offset": 979912704, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.0.attn_q.weight", "offset": 988301312, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.1.attn_norm.weight", "offset": 1000884224, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.1.ffn_down_exps.weight", "offset": 1000892416, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.1.ffn_gate_exps.weight", "offset": 1369991168, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.1.ffn_up_exps.weight", "offset": 1739089920, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.1.ffn_gate_inp.weight", "offset": 2108188672, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.1.ffn_down_shexp.weight", "offset": 2108712960, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.1.ffn_gate_shexp.weight", "offset": 2120247296, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.1.ffn_up_shexp.weight", "offset": 2131781632, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.1.ffn_norm.weight", "offset": 2143315968, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.1.attn_kv_a_norm.weight", "offset": 2143324160, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_kv_a_mqa.weight", "offset": 2143326208, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.1.attn_kv_b.weight", "offset": 2145685504, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.1.attn_output.weight", "offset": 2149879808, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.1.attn_q.weight", "offset": 2158268416, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.2.attn_norm.weight", "offset": 2170851328, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.2.ffn_down_exps.weight", "offset": 2170859520, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.2.ffn_gate_exps.weight", "offset": 2539958272, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.2.ffn_up_exps.weight", "offset": 2909057024, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.2.ffn_gate_inp.weight", "offset": 3278155776, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.2.ffn_down_shexp.weight", "offset": 3278680064, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.2.ffn_gate_shexp.weight", "offset": 3290214400, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.2.ffn_up_shexp.weight", "offset": 3301748736, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.2.ffn_norm.weight", "offset": 3313283072, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.2.attn_kv_a_norm.weight", "offset": 3313291264, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_kv_a_mqa.weight", "offset": 3313293312, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.2.attn_kv_b.weight", "offset": 3315652608, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.2.attn_output.weight", "offset": 3319846912, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.2.attn_q.weight", "offset": 3328235520, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.3.attn_norm.weight", "offset": 3340818432, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.3.ffn_down_exps.weight", "offset": 3340826624, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.3.ffn_gate_exps.weight", "offset": 3709925376, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.3.ffn_up_exps.weight", "offset": 4079024128, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.3.ffn_gate_inp.weight", "offset": 4448122880, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.3.ffn_down_shexp.weight", "offset": 4448647168, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.3.ffn_gate_shexp.weight", "offset": 4460181504, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.3.ffn_up_shexp.weight", "offset": 4471715840, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.3.ffn_norm.weight", "offset": 4483250176, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.3.attn_kv_a_norm.weight", "offset": 4483258368, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_kv_a_mqa.weight", "offset": 4483260416, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.3.attn_kv_b.weight", "offset": 4485619712, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.3.attn_output.weight", "offset": 4489814016, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.3.attn_q.weight", "offset": 4498202624, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.4.attn_norm.weight", "offset": 4510785536, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.4.ffn_down_exps.weight", "offset": 4510793728, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.4.ffn_gate_exps.weight", "offset": 4879892480, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.4.ffn_up_exps.weight", "offset": 5248991232, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.4.ffn_gate_inp.weight", "offset": 5618089984, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.4.ffn_down_shexp.weight", "offset": 5618614272, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.4.ffn_gate_shexp.weight", "offset": 5630148608, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.4.ffn_up_shexp.weight", "offset": 5641682944, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.4.ffn_norm.weight", "offset": 5653217280, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.4.attn_kv_a_norm.weight", "offset": 5653225472, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_kv_a_mqa.weight", "offset": 5653227520, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.4.attn_kv_b.weight", "offset": 5655586816, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.4.attn_output.weight", "offset": 5659781120, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.4.attn_q.weight", "offset": 5668169728, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.5.attn_norm.weight", "offset": 5680752640, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.5.ffn_down_exps.weight", "offset": 5680760832, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.5.ffn_gate_exps.weight", "offset": 6049859584, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.5.ffn_up_exps.weight", "offset": 6418958336, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.5.ffn_gate_inp.weight", "offset": 6788057088, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.5.ffn_down_shexp.weight", "offset": 6788581376, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.5.ffn_gate_shexp.weight", "offset": 6800115712, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.5.ffn_up_shexp.weight", "offset": 6811650048, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.5.ffn_norm.weight", "offset": 6823184384, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.5.attn_kv_a_norm.weight", "offset": 6823192576, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_kv_a_mqa.weight", "offset": 6823194624, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.5.attn_kv_b.weight", "offset": 6825553920, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.5.attn_output.weight", "offset": 6829748224, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.5.attn_q.weight", "offset": 6838136832, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.6.attn_norm.weight", "offset": 6850719744, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.6.ffn_down_exps.weight", "offset": 6850727936, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.6.ffn_gate_exps.weight", "offset": 7219826688, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.6.ffn_up_exps.weight", "offset": 7588925440, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.6.ffn_gate_inp.weight", "offset": 7958024192, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.6.ffn_down_shexp.weight", "offset": 7958548480, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.6.ffn_gate_shexp.weight", "offset": 7970082816, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.6.ffn_up_shexp.weight", "offset": 7981617152, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.6.ffn_norm.weight", "offset": 7993151488, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.6.attn_kv_a_norm.weight", "offset": 7993159680, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_kv_a_mqa.weight", "offset": 7993161728, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.6.attn_kv_b.weight", "offset": 7995521024, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.6.attn_output.weight", "offset": 7999715328, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.6.attn_q.weight", "offset": 8008103936, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.7.ffn_gate_inp.weight", "offset": 8020686848, "shape": [ 2048, 64 ], "size": 12582912, "type": 0 }, { "name": "blk.7.ffn_down_shexp.weight", "offset": 8021211136, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.7.ffn_gate_shexp.weight", "offset": 8032745472, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.7.ffn_up_shexp.weight", "offset": 8044279808, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.7.attn_kv_a_norm.weight", "offset": 8055814144, "shape": [ 512 ], "size": 11534336, "type": 0 }, { "name": "blk.7.attn_kv_a_mqa.weight", "offset": 8055816192, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.7.attn_kv_b.weight", "offset": 8058175488, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.7.attn_output.weight", "offset": 8062369792, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.7.attn_q.weight", "offset": 8070758400, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "output_norm.weight", "offset": 8083341312, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.10.attn_norm.weight", "offset": 8083349504, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.ffn_down_exps.weight", "offset": 8083357696, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.10.ffn_gate_exps.weight", "offset": 8452456448, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.10.ffn_up_exps.weight", "offset": 8821555200, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.10.ffn_gate_inp.weight", "offset": 9190653952, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.10.ffn_down_shexp.weight", "offset": 9191178240, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.10.ffn_gate_shexp.weight", "offset": 9202712576, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.10.ffn_up_shexp.weight", "offset": 9214246912, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.10.ffn_norm.weight", "offset": 9225781248, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.10.attn_kv_a_norm.weight", "offset": 9225789440, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_kv_a_mqa.weight", "offset": 9225791488, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.10.attn_kv_b.weight", "offset": 9228150784, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.10.attn_output.weight", "offset": 9232345088, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.10.attn_q.weight", "offset": 9240733696, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.11.attn_norm.weight", "offset": 9253316608, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.11.ffn_down_exps.weight", "offset": 9253324800, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.11.ffn_gate_exps.weight", "offset": 9622423552, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.11.ffn_up_exps.weight", "offset": 9991522304, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.11.ffn_gate_inp.weight", "offset": 10360621056, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.11.ffn_down_shexp.weight", "offset": 10361145344, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.11.ffn_gate_shexp.weight", "offset": 10372679680, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.11.ffn_up_shexp.weight", "offset": 10384214016, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.11.ffn_norm.weight", "offset": 10395748352, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.11.attn_kv_a_norm.weight", "offset": 10395756544, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_kv_a_mqa.weight", "offset": 10395758592, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.11.attn_kv_b.weight", "offset": 10398117888, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.11.attn_output.weight", "offset": 10402312192, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.11.attn_q.weight", "offset": 10410700800, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.12.attn_norm.weight", "offset": 10423283712, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.12.ffn_down_exps.weight", "offset": 10423291904, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.12.ffn_gate_exps.weight", "offset": 10792390656, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.12.ffn_up_exps.weight", "offset": 11161489408, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.12.ffn_gate_inp.weight", "offset": 11530588160, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.12.ffn_down_shexp.weight", "offset": 11531112448, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.12.ffn_gate_shexp.weight", "offset": 11542646784, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.12.ffn_up_shexp.weight", "offset": 11554181120, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.12.ffn_norm.weight", "offset": 11565715456, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.12.attn_kv_a_norm.weight", "offset": 11565723648, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_kv_a_mqa.weight", "offset": 11565725696, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.12.attn_kv_b.weight", "offset": 11568084992, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.12.attn_output.weight", "offset": 11572279296, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.12.attn_q.weight", "offset": 11580667904, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.13.attn_norm.weight", "offset": 11593250816, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.13.ffn_down_exps.weight", "offset": 11593259008, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.13.ffn_gate_exps.weight", "offset": 11962357760, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.13.ffn_up_exps.weight", "offset": 12331456512, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.13.ffn_gate_inp.weight", "offset": 12700555264, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.13.ffn_down_shexp.weight", "offset": 12701079552, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.13.ffn_gate_shexp.weight", "offset": 12712613888, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.13.ffn_up_shexp.weight", "offset": 12724148224, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.13.ffn_norm.weight", "offset": 12735682560, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.13.attn_kv_a_norm.weight", "offset": 12735690752, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_kv_a_mqa.weight", "offset": 12735692800, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.13.attn_kv_b.weight", "offset": 12738052096, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.13.attn_output.weight", "offset": 12742246400, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.13.attn_q.weight", "offset": 12750635008, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.14.ffn_gate_inp.weight", "offset": 12763217920, "shape": [ 2048, 64 ], "size": 12582912, "type": 0 }, { "name": "blk.14.ffn_down_shexp.weight", "offset": 12763742208, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.14.ffn_gate_shexp.weight", "offset": 12775276544, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.14.ffn_up_shexp.weight", "offset": 12786810880, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.14.attn_kv_a_norm.weight", "offset": 12798345216, "shape": [ 512 ], "size": 11534336, "type": 0 }, { "name": "blk.14.attn_kv_a_mqa.weight", "offset": 12798347264, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.14.attn_kv_b.weight", "offset": 12800706560, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.14.attn_output.weight", "offset": 12804900864, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.14.attn_q.weight", "offset": 12813289472, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.7.attn_norm.weight", "offset": 12825872384, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.7.ffn_down_exps.weight", "offset": 12825880576, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.7.ffn_gate_exps.weight", "offset": 13194979328, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.7.ffn_up_exps.weight", "offset": 13564078080, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.7.ffn_norm.weight", "offset": 13933176832, "shape": [ 2048 ], "size": 369098752, "type": 0 }, { "name": "blk.8.attn_norm.weight", "offset": 13933185024, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.ffn_down_exps.weight", "offset": 13933193216, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.8.ffn_gate_exps.weight", "offset": 14302291968, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.8.ffn_up_exps.weight", "offset": 14671390720, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.8.ffn_gate_inp.weight", "offset": 15040489472, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.8.ffn_down_shexp.weight", "offset": 15041013760, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.8.ffn_gate_shexp.weight", "offset": 15052548096, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.8.ffn_up_shexp.weight", "offset": 15064082432, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.8.ffn_norm.weight", "offset": 15075616768, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.8.attn_kv_a_norm.weight", "offset": 15075624960, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_kv_a_mqa.weight", "offset": 15075627008, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.8.attn_kv_b.weight", "offset": 15077986304, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.8.attn_output.weight", "offset": 15082180608, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.8.attn_q.weight", "offset": 15090569216, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.9.attn_norm.weight", "offset": 15103152128, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.9.ffn_down_exps.weight", "offset": 15103160320, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.9.ffn_gate_exps.weight", "offset": 15472259072, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.9.ffn_up_exps.weight", "offset": 15841357824, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.9.ffn_gate_inp.weight", "offset": 16210456576, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.9.ffn_down_shexp.weight", "offset": 16210980864, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.9.ffn_gate_shexp.weight", "offset": 16222515200, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.9.ffn_up_shexp.weight", "offset": 16234049536, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.9.ffn_norm.weight", "offset": 16245583872, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.9.attn_kv_a_norm.weight", "offset": 16245592064, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_kv_a_mqa.weight", "offset": 16245594112, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.9.attn_kv_b.weight", "offset": 16247953408, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.9.attn_output.weight", "offset": 16252147712, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.9.attn_q.weight", "offset": 16260536320, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.14.attn_norm.weight", "offset": 16273119232, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.14.ffn_down_exps.weight", "offset": 16273127424, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.14.ffn_gate_exps.weight", "offset": 16642226176, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.14.ffn_up_exps.weight", "offset": 17011324928, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.14.ffn_norm.weight", "offset": 17380423680, "shape": [ 2048 ], "size": 369098752, "type": 0 }, { "name": "blk.15.attn_norm.weight", "offset": 17380431872, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.ffn_down_exps.weight", "offset": 17380440064, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.15.ffn_gate_exps.weight", "offset": 17749538816, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.15.ffn_up_exps.weight", "offset": 18118637568, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.15.ffn_gate_inp.weight", "offset": 18487736320, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.15.ffn_down_shexp.weight", "offset": 18488260608, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.15.ffn_gate_shexp.weight", "offset": 18499794944, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.15.ffn_up_shexp.weight", "offset": 18511329280, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.15.ffn_norm.weight", "offset": 18522863616, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.15.attn_kv_a_norm.weight", "offset": 18522871808, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_kv_a_mqa.weight", "offset": 18522873856, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.15.attn_kv_b.weight", "offset": 18525233152, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.15.attn_output.weight", "offset": 18529427456, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.15.attn_q.weight", "offset": 18537816064, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.16.attn_norm.weight", "offset": 18550398976, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.16.ffn_down_exps.weight", "offset": 18550407168, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.16.ffn_gate_exps.weight", "offset": 18919505920, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.16.ffn_up_exps.weight", "offset": 19288604672, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.16.ffn_gate_inp.weight", "offset": 19657703424, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.16.ffn_down_shexp.weight", "offset": 19658227712, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.16.ffn_gate_shexp.weight", "offset": 19669762048, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.16.ffn_up_shexp.weight", "offset": 19681296384, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.16.ffn_norm.weight", "offset": 19692830720, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.16.attn_kv_a_norm.weight", "offset": 19692838912, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_kv_a_mqa.weight", "offset": 19692840960, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.16.attn_kv_b.weight", "offset": 19695200256, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.16.attn_output.weight", "offset": 19699394560, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.16.attn_q.weight", "offset": 19707783168, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.17.attn_norm.weight", "offset": 19720366080, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.17.ffn_down_exps.weight", "offset": 19720374272, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.17.ffn_gate_exps.weight", "offset": 20089473024, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.17.ffn_up_exps.weight", "offset": 20458571776, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.17.ffn_gate_inp.weight", "offset": 20827670528, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.17.ffn_down_shexp.weight", "offset": 20828194816, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.17.ffn_gate_shexp.weight", "offset": 20839729152, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.17.ffn_up_shexp.weight", "offset": 20851263488, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.17.ffn_norm.weight", "offset": 20862797824, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.17.attn_kv_a_norm.weight", "offset": 20862806016, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_kv_a_mqa.weight", "offset": 20862808064, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.17.attn_kv_b.weight", "offset": 20865167360, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.17.attn_output.weight", "offset": 20869361664, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.17.attn_q.weight", "offset": 20877750272, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.18.attn_norm.weight", "offset": 20890333184, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.18.ffn_down_exps.weight", "offset": 20890341376, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.18.ffn_gate_exps.weight", "offset": 21259440128, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.18.ffn_up_exps.weight", "offset": 21628538880, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.18.ffn_gate_inp.weight", "offset": 21997637632, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.18.ffn_down_shexp.weight", "offset": 21998161920, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.18.ffn_gate_shexp.weight", "offset": 22009696256, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.18.ffn_up_shexp.weight", "offset": 22021230592, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.18.ffn_norm.weight", "offset": 22032764928, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.18.attn_kv_a_norm.weight", "offset": 22032773120, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_kv_a_mqa.weight", "offset": 22032775168, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.18.attn_kv_b.weight", "offset": 22035134464, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.18.attn_output.weight", "offset": 22039328768, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.18.attn_q.weight", "offset": 22047717376, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.19.attn_norm.weight", "offset": 22060300288, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.19.ffn_down_exps.weight", "offset": 22060308480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.19.ffn_gate_exps.weight", "offset": 22429407232, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.19.ffn_up_exps.weight", "offset": 22798505984, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.19.ffn_gate_inp.weight", "offset": 23167604736, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.19.ffn_down_shexp.weight", "offset": 23168129024, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.19.ffn_gate_shexp.weight", "offset": 23179663360, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.19.ffn_up_shexp.weight", "offset": 23191197696, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.19.ffn_norm.weight", "offset": 23202732032, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.19.attn_kv_a_norm.weight", "offset": 23202740224, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "p\ufffd\ufffd%\ufffd\ufffd\u0018\ufffdX.ذH\ufffd\ufffd\ufffd00\u0000\ufffd\ufffd\ufffd`\u001e\ufffd\ufffd\ufffd", "offset": 23202742272, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.19.attn_kv_b.weight", "offset": 23205101568, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.19.attn_output.weight", "offset": 23209295872, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.19.attn_q.weight", "offset": 23217684480, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.20.attn_norm.weight", "offset": 23230267392, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.20.ffn_down_exps.weight", "offset": 23230275584, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.20.ffn_gate_exps.weight", "offset": 23599374336, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.20.ffn_up_exps.weight", "offset": 23968473088, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.20.ffn_gate_inp.weight", "offset": 24337571840, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.20.ffn_down_shexp.weight", "offset": 24338096128, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.20.ffn_gate_shexp.weight", "offset": 24349630464, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.20.ffn_up_shexp.weight", "offset": 24361164800, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.20.ffn_norm.weight", "offset": 24372699136, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.20.attn_kv_a_norm.weight", "offset": 24372707328, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_kv_a_mqa.weight", "offset": 24372709376, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.20.attn_kv_b.weight", "offset": 24375068672, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.20.attn_output.weight", "offset": 24379262976, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.20.attn_q.weight", "offset": 24387651584, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.21.attn_norm.weight", "offset": 24400234496, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.21.ffn_down_exps.weight", "offset": 24400242688, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.21.ffn_gate_exps.weight", "offset": 24769341440, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.21.ffn_up_exps.weight", "offset": 25138440192, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.21.ffn_gate_inp.weight", "offset": 25507538944, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.21.ffn_down_shexp.weight", "offset": 25508063232, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.21.ffn_gate_shexp.weight", "offset": 25519597568, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.21.ffn_up_shexp.weight", "offset": 25531131904, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.21.ffn_norm.weight", "offset": 25542666240, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.21.attn_kv_a_norm.weight", "offset": 25542674432, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_kv_a_mqa.weight", "offset": 25542676480, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.21.attn_kv_b.weight", "offset": 25545035776, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.21.attn_output.weight", "offset": 25549230080, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.21.attn_q.weight", "offset": 25557618688, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.22.ffn_gate_inp.weight", "offset": 25570201600, "shape": [ 2048, 64 ], "size": 12582912, "type": 0 }, { "name": "blk.22.ffn_down_shexp.weight", "offset": 25570725888, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.22.ffn_gate_shexp.weight", "offset": 25582260224, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.22.ffn_up_shexp.weight", "offset": 25593794560, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.22.attn_kv_a_norm.weight", "offset": 25605328896, "shape": [ 512 ], "size": 11534336, "type": 0 }, { "name": "blk.22.attn_kv_a_mqa.weight", "offset": 25605330944, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.22.attn_kv_b.weight", "offset": 25607690240, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.22.attn_output.weight", "offset": 25611884544, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.22.attn_q.weight", "offset": 25620273152, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.22.attn_norm.weight", "offset": 25632856064, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.22.ffn_down_exps.weight", "offset": 25632864256, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.22.ffn_gate_exps.weight", "offset": 26001963008, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.22.ffn_up_exps.weight", "offset": 26371061760, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.22.ffn_norm.weight", "offset": 26740160512, "shape": [ 2048 ], "size": 369098752, "type": 0 }, { "name": "blk.23.attn_norm.weight", "offset": 26740168704, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.ffn_down_exps.weight", "offset": 26740176896, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.23.ffn_gate_exps.weight", "offset": 27109275648, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.23.ffn_up_exps.weight", "offset": 27478374400, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.23.ffn_gate_inp.weight", "offset": 27847473152, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.23.ffn_down_shexp.weight", "offset": 27847997440, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.23.ffn_gate_shexp.weight", "offset": 27859531776, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.23.ffn_up_shexp.weight", "offset": 27871066112, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.23.ffn_norm.weight", "offset": 27882600448, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.23.attn_kv_a_norm.weight", "offset": 27882608640, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_kv_a_mqa.weight", "offset": 27882610688, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.23.attn_kv_b.weight", "offset": 27884969984, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.23.attn_output.weight", "offset": 27889164288, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.23.attn_q.weight", "offset": 27897552896, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.24.attn_norm.weight", "offset": 27910135808, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.24.ffn_down_exps.weight", "offset": 27910144000, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.24.ffn_gate_exps.weight", "offset": 28279242752, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.24.ffn_up_exps.weight", "offset": 28648341504, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.24.ffn_gate_inp.weight", "offset": 29017440256, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.24.ffn_down_shexp.weight", "offset": 29017964544, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.24.ffn_gate_shexp.weight", "offset": 29029498880, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.24.ffn_up_shexp.weight", "offset": 29041033216, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.24.ffn_norm.weight", "offset": 29052567552, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.24.attn_kv_a_norm.weight", "offset": 29052575744, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.24.attn_kv_a_mqa.weight", "offset": 29052577792, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.24.attn_kv_b.weight", "offset": 29054937088, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.24.attn_output.weight", "offset": 29059131392, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.24.attn_q.weight", "offset": 29067520000, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.25.attn_norm.weight", "offset": 29080102912, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.25.ffn_down_exps.weight", "offset": 29080111104, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.25.ffn_gate_exps.weight", "offset": 29449209856, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.25.ffn_up_exps.weight", "offset": 29818308608, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.25.ffn_gate_inp.weight", "offset": 30187407360, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.25.ffn_down_shexp.weight", "offset": 30187931648, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.25.ffn_gate_shexp.weight", "offset": 30199465984, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.25.ffn_up_shexp.weight", "offset": 30211000320, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.25.ffn_norm.weight", "offset": 30222534656, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.25.attn_kv_a_norm.weight", "offset": 30222542848, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.25.attn_kv_a_mqa.weight", "offset": 30222544896, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.25.attn_kv_b.weight", "offset": 30224904192, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.25.attn_output.weight", "offset": 30229098496, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.25.attn_q.weight", "offset": 30237487104, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 }, { "name": "blk.26.attn_norm.weight", "offset": 30250070016, "shape": [ 2048 ], "size": 12582912, "type": 0 }, { "name": "blk.26.ffn_down_exps.weight", "offset": 30250078208, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 1 }, { "name": "blk.26.ffn_gate_exps.weight", "offset": 30619176960, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.26.ffn_up_exps.weight", "offset": 30988275712, "shape": [ 2048, 1408, 64 ], "size": 369098752, "type": 1 }, { "name": "blk.26.ffn_gate_inp.weight", "offset": 31357374464, "shape": [ 2048, 64 ], "size": 369098752, "type": 0 }, { "name": "blk.26.ffn_down_shexp.weight", "offset": 31357898752, "shape": [ 2816, 2048 ], "size": 524288, "type": 1 }, { "name": "blk.26.ffn_gate_shexp.weight", "offset": 31369433088, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.26.ffn_up_shexp.weight", "offset": 31380967424, "shape": [ 2048, 2816 ], "size": 11534336, "type": 1 }, { "name": "blk.26.ffn_norm.weight", "offset": 31392501760, "shape": [ 2048 ], "size": 11534336, "type": 0 }, { "name": "blk.26.attn_kv_a_norm.weight", "offset": 31392509952, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.26.attn_kv_a_mqa.weight", "offset": 31392512000, "shape": [ 2048, 576 ], "size": 2048, "type": 1 }, { "name": "blk.26.attn_kv_b.weight", "offset": 31394871296, "shape": [ 512, 4096 ], "size": 2359296, "type": 1 }, { "name": "blk.26.attn_output.weight", "offset": 31399065600, "shape": [ 2048, 2048 ], "size": 4194304, "type": 1 }, { "name": "blk.26.attn_q.weight", "offset": 31407454208, "shape": [ 2048, 3072 ], "size": 8388608, "type": 1 } ], "version": 3 }