An open-source Mixture-of-Experts code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.

16B 236B

111.5K Pulls Updated 12 days ago

{ "metadata": { "deepseek2.attention.head_count": 16, "deepseek2.attention.head_count_kv": 16, "deepseek2.attention.key_length": 192, "deepseek2.attention.kv_lora_rank": 512, "deepseek2.attention.layer_norm_rms_epsilon": 0.000001, "deepseek2.attention.value_length": 128, "deepseek2.block_count": 27, "deepseek2.context_length": 163840, "deepseek2.embedding_length": 2048, "deepseek2.expert_count": 64, "deepseek2.expert_feed_forward_length": 1408, "deepseek2.expert_shared_count": 2, "deepseek2.expert_used_count": 6, "deepseek2.expert_weights_scale": 1, "deepseek2.feed_forward_length": 10944, "deepseek2.leading_dense_block_count": 1, "deepseek2.rope.dimension_count": 64, "deepseek2.rope.freq_base": 10000, "deepseek2.rope.scaling.factor": 40, "deepseek2.rope.scaling.original_context_length": 4096, "deepseek2.rope.scaling.type": "yarn", "deepseek2.rope.scaling.yarn_log_multiplier": 0.0707, "deepseek2.vocab_size": 102400, "general.architecture": "deepseek2", "general.file_type": 14, "general.name": "DeepSeek-Coder-V2-Lite-Instruct", "general.quantization_version": 2, "tokenizer.ggml.add_bos_token": true, "tokenizer.ggml.add_eos_token": false, "tokenizer.ggml.bos_token_id": 100000, "tokenizer.ggml.eos_token_id": 100001, "tokenizer.ggml.merges": "... (99757 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.padding_token_id": 100001, "tokenizer.ggml.pre": "deepseek-llm", "tokenizer.ggml.token_type": "... (102400 values)", "tokenizer.ggml.tokens": "... (102400 values)" }, "num_params": 15706484224, "tensors": [ { "name": "token_embd.weight", "offset": 172032000, "shape": [ 2048, 102400 ], "size": 172032000, "type": 12 }, { "name": "blk.0.attn_norm.weight", "offset": 289996800, "shape": [ 2048 ], "size": 117964800, "type": 0 }, { "name": "blk.0.ffn_down.weight", "offset": 290004992, "shape": [ 10944, 2048 ], "size": 8192, "type": 7 }, { "name": "blk.0.ffn_gate.weight", "offset": 306814976, "shape": [ 2048, 10944 ], "size": 16809984, "type": 12 }, { "name": "blk.0.ffn_up.weight", "offset": 319422464, "shape": [ 2048, 10944 ], "size": 12607488, "type": 12 }, { "name": "blk.0.ffn_norm.weight", "offset": 332029952, "shape": [ 2048 ], "size": 12607488, "type": 0 }, { "name": "blk.0.attn_kv_a_norm.weight", "offset": 332038144, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.0.attn_kv_a_mqa.weight", "offset": 332040192, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.0.attn_kv_b.weight", "offset": 332703744, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.0.attn_output.weight", "offset": 333883392, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.0.attn_q.weight", "offset": 336242688, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.1.attn_norm.weight", "offset": 339781632, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.1.ffn_down_exps.weight", "offset": 339789824, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 7 }, { "name": "blk.1.ffn_gate_exps.weight", "offset": 478201856, "shape": [ 2048, 1408, 64 ], "size": 138412032, "type": 12 }, { "name": "blk.1.ffn_up_exps.weight", "offset": 582010880, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.1.ffn_gate_inp.weight", "offset": 685819904, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.1.ffn_down_shexp.weight", "offset": 686344192, "shape": [ 2816, 2048 ], "size": 524288, "type": 13 }, { "name": "blk.1.ffn_gate_shexp.weight", "offset": 690309120, "shape": [ 2048, 2816 ], "size": 3964928, "type": 12 }, { "name": "blk.1.ffn_up_shexp.weight", "offset": 693553152, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.1.ffn_norm.weight", "offset": 696797184, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.1.attn_kv_a_norm.weight", "offset": 696805376, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_kv_a_mqa.weight", "offset": 696807424, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.1.attn_kv_b.weight", "offset": 697470976, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.1.attn_output.weight", "offset": 698650624, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.1.attn_q.weight", "offset": 701009920, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.2.attn_norm.weight", "offset": 704548864, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.2.ffn_down_exps.weight", "offset": 704557056, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 7 }, { "name": "blk.2.ffn_gate_exps.weight", "offset": 842969088, "shape": [ 2048, 1408, 64 ], "size": 138412032, "type": 12 }, { "name": "blk.2.ffn_up_exps.weight", "offset": 946778112, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.2.ffn_gate_inp.weight", "offset": 1050587136, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.2.ffn_down_shexp.weight", "offset": 1051111424, "shape": [ 2816, 2048 ], "size": 524288, "type": 13 }, { "name": "blk.2.ffn_gate_shexp.weight", "offset": 1055076352, "shape": [ 2048, 2816 ], "size": 3964928, "type": 12 }, { "name": "blk.2.ffn_up_shexp.weight", "offset": 1058320384, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.2.ffn_norm.weight", "offset": 1061564416, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.2.attn_kv_a_norm.weight", "offset": 1061572608, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_kv_a_mqa.weight", "offset": 1061574656, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.2.attn_kv_b.weight", "offset": 1062238208, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.2.attn_output.weight", "offset": 1063417856, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.2.attn_q.weight", "offset": 1065777152, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.3.attn_norm.weight", "offset": 1069316096, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.3.ffn_down_exps.weight", "offset": 1069324288, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.3.ffn_gate_exps.weight", "offset": 1196201984, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.3.ffn_up_exps.weight", "offset": 1300011008, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.3.ffn_gate_inp.weight", "offset": 1403820032, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.3.ffn_down_shexp.weight", "offset": 1404344320, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.3.ffn_gate_shexp.weight", "offset": 1407588352, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.3.ffn_up_shexp.weight", "offset": 1410832384, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.3.ffn_norm.weight", "offset": 1414076416, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.3.attn_kv_a_norm.weight", "offset": 1414084608, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_kv_a_mqa.weight", "offset": 1414086656, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.3.attn_kv_b.weight", "offset": 1414750208, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.3.attn_output.weight", "offset": 1415929856, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.3.attn_q.weight", "offset": 1418289152, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.4.attn_norm.weight", "offset": 1421828096, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.4.ffn_down_exps.weight", "offset": 1421836288, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.4.ffn_gate_exps.weight", "offset": 1548713984, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.4.ffn_up_exps.weight", "offset": 1652523008, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.4.ffn_gate_inp.weight", "offset": 1756332032, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.4.ffn_down_shexp.weight", "offset": 1756856320, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.4.ffn_gate_shexp.weight", "offset": 1760100352, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.4.ffn_up_shexp.weight", "offset": 1763344384, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.4.ffn_norm.weight", "offset": 1766588416, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.4.attn_kv_a_norm.weight", "offset": 1766596608, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_kv_a_mqa.weight", "offset": 1766598656, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.4.attn_kv_b.weight", "offset": 1767262208, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.4.attn_output.weight", "offset": 1768441856, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.4.attn_q.weight", "offset": 1770801152, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.5.attn_norm.weight", "offset": 1774340096, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.5.ffn_down_exps.weight", "offset": 1774348288, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.5.ffn_gate_exps.weight", "offset": 1901225984, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.5.ffn_up_exps.weight", "offset": 2005035008, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.5.ffn_gate_inp.weight", "offset": 2108844032, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.5.ffn_down_shexp.weight", "offset": 2109368320, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.5.ffn_gate_shexp.weight", "offset": 2112612352, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.5.ffn_up_shexp.weight", "offset": 2115856384, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.5.ffn_norm.weight", "offset": 2119100416, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.5.attn_kv_a_norm.weight", "offset": 2119108608, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_kv_a_mqa.weight", "offset": 2119110656, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.5.attn_kv_b.weight", "offset": 2119774208, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.5.attn_output.weight", "offset": 2120953856, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.5.attn_q.weight", "offset": 2123313152, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.6.attn_norm.weight", "offset": 2126852096, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.6.ffn_down_exps.weight", "offset": 2126860288, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.6.ffn_gate_exps.weight", "offset": 2253737984, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.6.ffn_up_exps.weight", "offset": 2357547008, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.6.ffn_gate_inp.weight", "offset": 2461356032, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.6.ffn_down_shexp.weight", "offset": 2461880320, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.6.ffn_gate_shexp.weight", "offset": 2465124352, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.6.ffn_up_shexp.weight", "offset": 2468368384, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.6.ffn_norm.weight", "offset": 2471612416, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.6.attn_kv_a_norm.weight", "offset": 2471620608, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_kv_a_mqa.weight", "offset": 2471622656, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.6.attn_kv_b.weight", "offset": 2472286208, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.6.attn_output.weight", "offset": 2473465856, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.6.attn_q.weight", "offset": 2475825152, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.7.ffn_gate_inp.weight", "offset": 2479364096, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.7.ffn_down_shexp.weight", "offset": 2479888384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.7.ffn_gate_shexp.weight", "offset": 2483132416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.7.ffn_up_shexp.weight", "offset": 2486376448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.7.attn_kv_a_norm.weight", "offset": 2489620480, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.7.attn_kv_a_mqa.weight", "offset": 2489622528, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.7.attn_kv_b.weight", "offset": 2490286080, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.7.attn_output.weight", "offset": 2491465728, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.7.attn_q.weight", "offset": 2493825024, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "output_norm.weight", "offset": 2497363968, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.10.attn_norm.weight", "offset": 2497372160, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.ffn_down_exps.weight", "offset": 2497380352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.10.ffn_gate_exps.weight", "offset": 2624258048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.10.ffn_up_exps.weight", "offset": 2728067072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.10.ffn_gate_inp.weight", "offset": 2831876096, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.10.ffn_down_shexp.weight", "offset": 2832400384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.10.ffn_gate_shexp.weight", "offset": 2835644416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.10.ffn_up_shexp.weight", "offset": 2838888448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.10.ffn_norm.weight", "offset": 2842132480, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.10.attn_kv_a_norm.weight", "offset": 2842140672, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_kv_a_mqa.weight", "offset": 2842142720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.10.attn_kv_b.weight", "offset": 2842806272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.10.attn_output.weight", "offset": 2843985920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.10.attn_q.weight", "offset": 2846345216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.11.attn_norm.weight", "offset": 2849884160, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.11.ffn_down_exps.weight", "offset": 2849892352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.11.ffn_gate_exps.weight", "offset": 2976770048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.11.ffn_up_exps.weight", "offset": 3080579072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.11.ffn_gate_inp.weight", "offset": 3184388096, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.11.ffn_down_shexp.weight", "offset": 3184912384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.11.ffn_gate_shexp.weight", "offset": 3188156416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.11.ffn_up_shexp.weight", "offset": 3191400448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.11.ffn_norm.weight", "offset": 3194644480, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.11.attn_kv_a_norm.weight", "offset": 3194652672, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_kv_a_mqa.weight", "offset": 3194654720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.11.attn_kv_b.weight", "offset": 3195318272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.11.attn_output.weight", "offset": 3196497920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.11.attn_q.weight", "offset": 3198857216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.12.attn_norm.weight", "offset": 3202396160, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.12.ffn_down_exps.weight", "offset": 3202404352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.12.ffn_gate_exps.weight", "offset": 3329282048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.12.ffn_up_exps.weight", "offset": 3433091072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.12.ffn_gate_inp.weight", "offset": 3536900096, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.12.ffn_down_shexp.weight", "offset": 3537424384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.12.ffn_gate_shexp.weight", "offset": 3540668416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.12.ffn_up_shexp.weight", "offset": 3543912448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.12.ffn_norm.weight", "offset": 3547156480, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.12.attn_kv_a_norm.weight", "offset": 3547164672, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_kv_a_mqa.weight", "offset": 3547166720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.12.attn_kv_b.weight", "offset": 3547830272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.12.attn_output.weight", "offset": 3549009920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.12.attn_q.weight", "offset": 3551369216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.13.attn_norm.weight", "offset": 3554908160, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.13.ffn_down_exps.weight", "offset": 3554916352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.13.ffn_gate_exps.weight", "offset": 3681794048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.13.ffn_up_exps.weight", "offset": 3785603072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.13.ffn_gate_inp.weight", "offset": 3889412096, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.13.ffn_down_shexp.weight", "offset": 3889936384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.13.ffn_gate_shexp.weight", "offset": 3893180416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.13.ffn_up_shexp.weight", "offset": 3896424448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.13.ffn_norm.weight", "offset": 3899668480, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.13.attn_kv_a_norm.weight", "offset": 3899676672, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_kv_a_mqa.weight", "offset": 3899678720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.13.attn_kv_b.weight", "offset": 3900342272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.13.attn_output.weight", "offset": 3901521920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.13.attn_q.weight", "offset": 3903881216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.14.ffn_gate_inp.weight", "offset": 3907420160, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.14.ffn_down_shexp.weight", "offset": 3907944448, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.14.ffn_gate_shexp.weight", "offset": 3911188480, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.14.ffn_up_shexp.weight", "offset": 3914432512, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.14.attn_kv_a_norm.weight", "offset": 3917676544, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.14.attn_kv_a_mqa.weight", "offset": 3917678592, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.14.attn_kv_b.weight", "offset": 3918342144, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.14.attn_output.weight", "offset": 3919521792, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.14.attn_q.weight", "offset": 3921881088, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.7.attn_norm.weight", "offset": 3925420032, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.7.ffn_down_exps.weight", "offset": 3925428224, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.7.ffn_gate_exps.weight", "offset": 4052305920, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.7.ffn_up_exps.weight", "offset": 4156114944, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.7.ffn_norm.weight", "offset": 4259923968, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.8.attn_norm.weight", "offset": 4259932160, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.ffn_down_exps.weight", "offset": 4259940352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.8.ffn_gate_exps.weight", "offset": 4386818048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.8.ffn_up_exps.weight", "offset": 4490627072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.8.ffn_gate_inp.weight", "offset": 4594436096, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.8.ffn_down_shexp.weight", "offset": 4594960384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.8.ffn_gate_shexp.weight", "offset": 4598204416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.8.ffn_up_shexp.weight", "offset": 4601448448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.8.ffn_norm.weight", "offset": 4604692480, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.8.attn_kv_a_norm.weight", "offset": 4604700672, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_kv_a_mqa.weight", "offset": 4604702720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.8.attn_kv_b.weight", "offset": 4605366272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.8.attn_output.weight", "offset": 4606545920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.8.attn_q.weight", "offset": 4608905216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.9.attn_norm.weight", "offset": 4612444160, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.9.ffn_down_exps.weight", "offset": 4612452352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.9.ffn_gate_exps.weight", "offset": 4739330048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.9.ffn_up_exps.weight", "offset": 4843139072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.9.ffn_gate_inp.weight", "offset": 4946948096, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.9.ffn_down_shexp.weight", "offset": 4947472384, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.9.ffn_gate_shexp.weight", "offset": 4950716416, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.9.ffn_up_shexp.weight", "offset": 4953960448, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.9.ffn_norm.weight", "offset": 4957204480, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.9.attn_kv_a_norm.weight", "offset": 4957212672, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_kv_a_mqa.weight", "offset": 4957214720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.9.attn_kv_b.weight", "offset": 4957878272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.9.attn_output.weight", "offset": 4959057920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.9.attn_q.weight", "offset": 4961417216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.14.attn_norm.weight", "offset": 4964956160, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.14.ffn_down_exps.weight", "offset": 4964964352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.14.ffn_gate_exps.weight", "offset": 5091842048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.14.ffn_up_exps.weight", "offset": 5195651072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.14.ffn_norm.weight", "offset": 5299460096, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.15.attn_norm.weight", "offset": 5299468288, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.ffn_down_exps.weight", "offset": 5299476480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.15.ffn_gate_exps.weight", "offset": 5426354176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.15.ffn_up_exps.weight", "offset": 5530163200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.15.ffn_gate_inp.weight", "offset": 5633972224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.15.ffn_down_shexp.weight", "offset": 5634496512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.15.ffn_gate_shexp.weight", "offset": 5637740544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.15.ffn_up_shexp.weight", "offset": 5640984576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.15.ffn_norm.weight", "offset": 5644228608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.15.attn_kv_a_norm.weight", "offset": 5644236800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_kv_a_mqa.weight", "offset": 5644238848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.15.attn_kv_b.weight", "offset": 5644902400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.15.attn_output.weight", "offset": 5646082048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.15.attn_q.weight", "offset": 5648441344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.16.attn_norm.weight", "offset": 5651980288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.16.ffn_down_exps.weight", "offset": 5651988480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.16.ffn_gate_exps.weight", "offset": 5778866176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.16.ffn_up_exps.weight", "offset": 5882675200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.16.ffn_gate_inp.weight", "offset": 5986484224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.16.ffn_down_shexp.weight", "offset": 5987008512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.16.ffn_gate_shexp.weight", "offset": 5990252544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.16.ffn_up_shexp.weight", "offset": 5993496576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.16.ffn_norm.weight", "offset": 5996740608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.16.attn_kv_a_norm.weight", "offset": 5996748800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_kv_a_mqa.weight", "offset": 5996750848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.16.attn_kv_b.weight", "offset": 5997414400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.16.attn_output.weight", "offset": 5998594048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.16.attn_q.weight", "offset": 6000953344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.17.attn_norm.weight", "offset": 6004492288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.17.ffn_down_exps.weight", "offset": 6004500480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.17.ffn_gate_exps.weight", "offset": 6131378176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.17.ffn_up_exps.weight", "offset": 6235187200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.17.ffn_gate_inp.weight", "offset": 6338996224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.17.ffn_down_shexp.weight", "offset": 6339520512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.17.ffn_gate_shexp.weight", "offset": 6342764544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.17.ffn_up_shexp.weight", "offset": 6346008576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.17.ffn_norm.weight", "offset": 6349252608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.17.attn_kv_a_norm.weight", "offset": 6349260800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_kv_a_mqa.weight", "offset": 6349262848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.17.attn_kv_b.weight", "offset": 6349926400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.17.attn_output.weight", "offset": 6351106048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.17.attn_q.weight", "offset": 6353465344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.18.attn_norm.weight", "offset": 6357004288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.18.ffn_down_exps.weight", "offset": 6357012480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.18.ffn_gate_exps.weight", "offset": 6483890176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.18.ffn_up_exps.weight", "offset": 6587699200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.18.ffn_gate_inp.weight", "offset": 6691508224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.18.ffn_down_shexp.weight", "offset": 6692032512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.18.ffn_gate_shexp.weight", "offset": 6695276544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.18.ffn_up_shexp.weight", "offset": 6698520576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.18.ffn_norm.weight", "offset": 6701764608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.18.attn_kv_a_norm.weight", "offset": 6701772800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_kv_a_mqa.weight", "offset": 6701774848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.18.attn_kv_b.weight", "offset": 6702438400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.18.attn_output.weight", "offset": 6703618048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.18.attn_q.weight", "offset": 6705977344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.19.attn_norm.weight", "offset": 6709516288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.19.ffn_down_exps.weight", "offset": 6709524480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.19.ffn_gate_exps.weight", "offset": 6836402176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.19.ffn_up_exps.weight", "offset": 6940211200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.19.ffn_gate_inp.weight", "offset": 7044020224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.19.ffn_down_shexp.weight", "offset": 7044544512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.19.ffn_gate_shexp.weight", "offset": 7047788544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.19.ffn_up_shexp.weight", "offset": 7051032576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.19.ffn_norm.weight", "offset": 7054276608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.19.attn_kv_a_norm.weight", "offset": 7054284800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.19.attn_kv_a_mqa.weight", "offset": 7054286848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.19.attn_kv_b.weight", "offset": 7054950400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.19.attn_output.weight", "offset": 7056130048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.19.attn_q.weight", "offset": 7058489344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.20.attn_norm.weight", "offset": 7062028288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.20.ffn_down_exps.weight", "offset": 7062036480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.20.ffn_gate_exps.weight", "offset": 7188914176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.20.ffn_up_exps.weight", "offset": 7292723200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.20.ffn_gate_inp.weight", "offset": 7396532224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.20.ffn_down_shexp.weight", "offset": 7397056512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.20.ffn_gate_shexp.weight", "offset": 7400300544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.20.ffn_up_shexp.weight", "offset": 7403544576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.20.ffn_norm.weight", "offset": 7406788608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.20.attn_kv_a_norm.weight", "offset": 7406796800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_kv_a_mqa.weight", "offset": 7406798848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.20.attn_kv_b.weight", "offset": 7407462400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.20.attn_output.weight", "offset": 7408642048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.20.attn_q.weight", "offset": 7411001344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.21.attn_norm.weight", "offset": 7414540288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.21.ffn_down_exps.weight", "offset": 7414548480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.21.ffn_gate_exps.weight", "offset": 7541426176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.21.ffn_up_exps.weight", "offset": 7645235200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.21.ffn_gate_inp.weight", "offset": 7749044224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.21.ffn_down_shexp.weight", "offset": 7749568512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.21.ffn_gate_shexp.weight", "offset": 7752812544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.21.ffn_up_shexp.weight", "offset": 7756056576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.21.ffn_norm.weight", "offset": 7759300608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.21.attn_kv_a_norm.weight", "offset": 7759308800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_kv_a_mqa.weight", "offset": 7759310848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.21.attn_kv_b.weight", "offset": 7759974400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.21.attn_output.weight", "offset": 7761154048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.21.attn_q.weight", "offset": 7763513344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.22.ffn_gate_inp.weight", "offset": 7767052288, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.22.ffn_down_shexp.weight", "offset": 7767576576, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.22.ffn_gate_shexp.weight", "offset": 7770820608, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.22.ffn_up_shexp.weight", "offset": 7774064640, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.22.attn_kv_a_norm.weight", "offset": 7777308672, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.22.attn_kv_a_mqa.weight", "offset": 7777310720, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.22.attn_kv_b.weight", "offset": 7777974272, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.22.attn_output.weight", "offset": 7779153920, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.22.attn_q.weight", "offset": 7781513216, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.22.attn_norm.weight", "offset": 7785052160, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.22.ffn_down_exps.weight", "offset": 7785060352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.22.ffn_gate_exps.weight", "offset": 7911938048, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.22.ffn_up_exps.weight", "offset": 8015747072, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.22.ffn_norm.weight", "offset": 8119556096, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.23.attn_norm.weight", "offset": 8119564288, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.ffn_down_exps.weight", "offset": 8119572480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.23.ffn_gate_exps.weight", "offset": 8246450176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.23.ffn_up_exps.weight", "offset": 8350259200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.23.ffn_gate_inp.weight", "offset": 8454068224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.23.ffn_down_shexp.weight", "offset": 8454592512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.23.ffn_gate_shexp.weight", "offset": 8457836544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.23.ffn_up_shexp.weight", "offset": 8461080576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.23.ffn_norm.weight", "offset": 8464324608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.23.attn_kv_a_norm.weight", "offset": 8464332800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_kv_a_mqa.weight", "offset": 8464334848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.23.attn_kv_b.weight", "offset": 8464998400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.23.attn_output.weight", "offset": 8466178048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.23.attn_q.weight", "offset": 8468537344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.24.attn_norm.weight", "offset": 8472076288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.24.ffn_down_exps.weight", "offset": 8472084480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.24.ffn_gate_exps.weight", "offset": 8598962176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.24.ffn_up_exps.weight", "offset": 8702771200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.24.ffn_gate_inp.weight", "offset": 8806580224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.24.ffn_down_shexp.weight", "offset": 8807104512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.24.ffn_gate_shexp.weight", "offset": 8810348544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.24.ffn_up_shexp.weight", "offset": 8813592576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.24.ffn_norm.weight", "offset": 8816836608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.24.attn_kv_a_norm.weight", "offset": 8816844800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.24.attn_kv_a_mqa.weight", "offset": 8816846848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.24.attn_kv_b.weight", "offset": 8817510400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.24.attn_output.weight", "offset": 8818690048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.24.attn_q.weight", "offset": 8821049344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.25.attn_norm.weight", "offset": 8824588288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.25.ffn_down_exps.weight", "offset": 8824596480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.25.ffn_gate_exps.weight", "offset": 8951474176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.25.ffn_up_exps.weight", "offset": 9055283200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.25.ffn_gate_inp.weight", "offset": 9159092224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.25.ffn_down_shexp.weight", "offset": 9159616512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.25.ffn_gate_shexp.weight", "offset": 9162860544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.25.ffn_up_shexp.weight", "offset": 9166104576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.25.ffn_norm.weight", "offset": 9169348608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.25.attn_kv_a_norm.weight", "offset": 9169356800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.25.attn_kv_a_mqa.weight", "offset": 9169358848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.25.attn_kv_b.weight", "offset": 9170022400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.25.attn_output.weight", "offset": 9171202048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.25.attn_q.weight", "offset": 9173561344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.26.attn_norm.weight", "offset": 9177100288, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.26.ffn_down_exps.weight", "offset": 9177108480, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.26.ffn_gate_exps.weight", "offset": 9303986176, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.26.ffn_up_exps.weight", "offset": 9407795200, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.26.ffn_gate_inp.weight", "offset": 9511604224, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.26.ffn_down_shexp.weight", "offset": 9512128512, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.26.ffn_gate_shexp.weight", "offset": 9515372544, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.26.ffn_up_shexp.weight", "offset": 9518616576, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.26.ffn_norm.weight", "offset": 9521860608, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.26.attn_kv_a_norm.weight", "offset": 9521868800, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.26.attn_kv_a_mqa.weight", "offset": 9521870848, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.26.attn_kv_b.weight", "offset": 9522534400, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.26.attn_output.weight", "offset": 9523714048, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.26.attn_q.weight", "offset": 9526073344, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 } ], "version": 3 }