An open-source Mixture-of-Experts code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.

16B 236B

106.8K Pulls Updated 10 days ago

5ff0abeeac1d · 8.9GB
{ "metadata": { "deepseek2.attention.head_count": 16, "deepseek2.attention.head_count_kv": 16, "deepseek2.attention.key_length": 192, "deepseek2.attention.kv_lora_rank": 512, "deepseek2.attention.layer_norm_rms_epsilon": 0.000001, "deepseek2.attention.value_length": 128, "deepseek2.block_count": 27, "deepseek2.context_length": 163840, "deepseek2.embedding_length": 2048, "deepseek2.expert_count": 64, "deepseek2.expert_feed_forward_length": 1408, "deepseek2.expert_shared_count": 2, "deepseek2.expert_used_count": 6, "deepseek2.expert_weights_scale": 1, "deepseek2.feed_forward_length": 10944, "deepseek2.leading_dense_block_count": 1, "deepseek2.rope.dimension_count": 64, "deepseek2.rope.freq_base": 10000, "deepseek2.rope.scaling.factor": 40, "deepseek2.rope.scaling.original_context_length": 4096, "deepseek2.rope.scaling.type": "yarn", "deepseek2.rope.scaling.yarn_log_multiplier": 0.0707, "deepseek2.vocab_size": 102400, "general.architecture": "deepseek2", "general.file_type": 2, "general.name": "DeepSeek-Coder-V2-Lite-Instruct", "general.quantization_version": 2, "tokenizer.ggml.add_bos_token": true, "tokenizer.ggml.add_eos_token": false, "tokenizer.ggml.bos_token_id": 100000, "tokenizer.ggml.eos_token_id": 100001, "tokenizer.ggml.merges": "... (99757 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.padding_token_id": 100001, "tokenizer.ggml.pre": "deepseek-llm", "tokenizer.ggml.token_type": "... (102400 values)", "tokenizer.ggml.tokens": "... (102400 values)" }, "num_params": 15706484224, "tensors": [ { "name": "token_embd.weight", "offset": 172032000, "shape": [ 2048, 102400 ], "size": 172032000, "type": 2 }, { "name": "blk.0.attn_norm.weight", "offset": 289996800, "shape": [ 2048 ], "size": 117964800, "type": 0 }, { "name": "blk.0.ffn_down.weight", "offset": 290004992, "shape": [ 10944, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.0.ffn_gate.weight", "offset": 302612480, "shape": [ 2048, 10944 ], "size": 12607488, "type": 2 }, { "name": "blk.0.ffn_up.weight", "offset": 315219968, "shape": [ 2048, 10944 ], "size": 12607488, "type": 2 }, { "name": "blk.0.ffn_norm.weight", "offset": 327827456, "shape": [ 2048 ], "size": 12607488, "type": 0 }, { "name": "blk.0.attn_kv_a_norm.weight", "offset": 327835648, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.0.attn_kv_a_mqa.weight", "offset": 327837696, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.0.attn_kv_b.weight", "offset": 328501248, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.0.attn_output.weight", "offset": 329680896, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.0.attn_q.weight", "offset": 332040192, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.1.attn_norm.weight", "offset": 335579136, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.1.ffn_down_exps.weight", "offset": 335587328, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.1.ffn_gate_exps.weight", "offset": 439396352, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.1.ffn_up_exps.weight", "offset": 543205376, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.1.ffn_gate_inp.weight", "offset": 647014400, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.1.ffn_down_shexp.weight", "offset": 647538688, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.1.ffn_gate_shexp.weight", "offset": 650782720, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.1.ffn_up_shexp.weight", "offset": 654026752, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.1.ffn_norm.weight", "offset": 657270784, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.1.attn_kv_a_norm.weight", "offset": 657278976, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_kv_a_mqa.weight", "offset": 657281024, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.1.attn_kv_b.weight", "offset": 657944576, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.1.attn_output.weight", "offset": 659124224, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.1.attn_q.weight", "offset": 661483520, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.2.attn_norm.weight", "offset": 665022464, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.2.ffn_down_exps.weight", "offset": 665030656, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.2.ffn_gate_exps.weight", "offset": 768839680, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.2.ffn_up_exps.weight", "offset": 872648704, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.2.ffn_gate_inp.weight", "offset": 976457728, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.2.ffn_down_shexp.weight", "offset": 976982016, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.2.ffn_gate_shexp.weight", "offset": 980226048, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.2.ffn_up_shexp.weight", "offset": 983470080, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.2.ffn_norm.weight", "offset": 986714112, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.2.attn_kv_a_norm.weight", "offset": 986722304, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_kv_a_mqa.weight", "offset": 986724352, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.2.attn_kv_b.weight", "offset": 987387904, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.2.attn_output.weight", "offset": 988567552, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.2.attn_q.weight", "offset": 990926848, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.3.attn_norm.weight", "offset": 994465792, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.3.ffn_down_exps.weight", "offset": 994473984, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.3.ffn_gate_exps.weight", "offset": 1098283008, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.3.ffn_up_exps.weight", "offset": 1202092032, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.3.ffn_gate_inp.weight", "offset": 1305901056, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.3.ffn_down_shexp.weight", "offset": 1306425344, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.3.ffn_gate_shexp.weight", "offset": 1309669376, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.3.ffn_up_shexp.weight", "offset": 1312913408, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.3.ffn_norm.weight", "offset": 1316157440, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.3.attn_kv_a_norm.weight", "offset": 1316165632, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_kv_a_mqa.weight", "offset": 1316167680, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.3.attn_kv_b.weight", "offset": 1316831232, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.3.attn_output.weight", "offset": 1318010880, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.3.attn_q.weight", "offset": 1320370176, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.4.attn_norm.weight", "offset": 1323909120, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.4.ffn_down_exps.weight", "offset": 1323917312, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.4.ffn_gate_exps.weight", "offset": 1427726336, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.4.ffn_up_exps.weight", "offset": 1531535360, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.4.ffn_gate_inp.weight", "offset": 1635344384, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.4.ffn_down_shexp.weight", "offset": 1635868672, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.4.ffn_gate_shexp.weight", "offset": 1639112704, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.4.ffn_up_shexp.weight", "offset": 1642356736, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.4.ffn_norm.weight", "offset": 1645600768, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.4.attn_kv_a_norm.weight", "offset": 1645608960, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_kv_a_mqa.weight", "offset": 1645611008, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.4.attn_kv_b.weight", "offset": 1646274560, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.4.attn_output.weight", "offset": 1647454208, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.4.attn_q.weight", "offset": 1649813504, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.5.attn_norm.weight", "offset": 1653352448, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.5.ffn_down_exps.weight", "offset": 1653360640, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.5.ffn_gate_exps.weight", "offset": 1757169664, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.5.ffn_up_exps.weight", "offset": 1860978688, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.5.ffn_gate_inp.weight", "offset": 1964787712, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.5.ffn_down_shexp.weight", "offset": 1965312000, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.5.ffn_gate_shexp.weight", "offset": 1968556032, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.5.ffn_up_shexp.weight", "offset": 1971800064, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.5.ffn_norm.weight", "offset": 1975044096, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.5.attn_kv_a_norm.weight", "offset": 1975052288, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_kv_a_mqa.weight", "offset": 1975054336, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.5.attn_kv_b.weight", "offset": 1975717888, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.5.attn_output.weight", "offset": 1976897536, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.5.attn_q.weight", "offset": 1979256832, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.6.attn_norm.weight", "offset": 1982795776, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.6.ffn_down_exps.weight", "offset": 1982803968, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.6.ffn_gate_exps.weight", "offset": 2086612992, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.6.ffn_up_exps.weight", "offset": 2190422016, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "\u0000\u0000@\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0000\u0000\u0000\u0000\ufffd\ufffd\ufffd\u0000\u0000\u0000\u0000\u001b\u0000\u0000", "offset": 2294231040, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.6.ffn_down_shexp.weight", "offset": 2294755328, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.6.ffn_gate_shexp.weight", "offset": 2297999360, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.6.ffn_up_shexp.weight", "offset": 2301243392, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.6.ffn_norm.weight", "offset": 2304487424, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.6.attn_kv_a_norm.weight", "offset": 2304495616, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_kv_a_mqa.weight", "offset": 2304497664, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.6.attn_kv_b.weight", "offset": 2305161216, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.6.attn_output.weight", "offset": 2306340864, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.6.attn_q.weight", "offset": 2308700160, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.7.ffn_gate_inp.weight", "offset": 2312239104, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.7.ffn_down_shexp.weight", "offset": 2312763392, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.7.ffn_gate_shexp.weight", "offset": 2316007424, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.7.ffn_up_shexp.weight", "offset": 2319251456, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.7.attn_kv_a_norm.weight", "offset": 2322495488, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.7.attn_kv_a_mqa.weight", "offset": 2322497536, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.7.attn_kv_b.weight", "offset": 2323161088, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.7.attn_output.weight", "offset": 2324340736, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.7.attn_q.weight", "offset": 2326700032, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "output_norm.weight", "offset": 2330238976, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.10.attn_norm.weight", "offset": 2330247168, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.ffn_down_exps.weight", "offset": 2330255360, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.10.ffn_gate_exps.weight", "offset": 2434064384, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.10.ffn_up_exps.weight", "offset": 2537873408, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.10.ffn_gate_inp.weight", "offset": 2641682432, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.10.ffn_down_shexp.weight", "offset": 2642206720, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.10.ffn_gate_shexp.weight", "offset": 2645450752, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.10.ffn_up_shexp.weight", "offset": 2648694784, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.10.ffn_norm.weight", "offset": 2651938816, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.10.attn_kv_a_norm.weight", "offset": 2651947008, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_kv_a_mqa.weight", "offset": 2651949056, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.10.attn_kv_b.weight", "offset": 2652612608, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.10.attn_output.weight", "offset": 2653792256, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.10.attn_q.weight", "offset": 2656151552, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.11.attn_norm.weight", "offset": 2659690496, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.11.ffn_down_exps.weight", "offset": 2659698688, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.11.ffn_gate_exps.weight", "offset": 2763507712, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.11.ffn_up_exps.weight", "offset": 2867316736, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.11.ffn_gate_inp.weight", "offset": 2971125760, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.11.ffn_down_shexp.weight", "offset": 2971650048, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.11.ffn_gate_shexp.weight", "offset": 2974894080, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.11.ffn_up_shexp.weight", "offset": 2978138112, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.11.ffn_norm.weight", "offset": 2981382144, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.11.attn_kv_a_norm.weight", "offset": 2981390336, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_kv_a_mqa.weight", "offset": 2981392384, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.11.attn_kv_b.weight", "offset": 2982055936, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.11.attn_output.weight", "offset": 2983235584, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.11.attn_q.weight", "offset": 2985594880, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.12.attn_norm.weight", "offset": 2989133824, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.12.ffn_down_exps.weight", "offset": 2989142016, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.12.ffn_gate_exps.weight", "offset": 3092951040, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.12.ffn_up_exps.weight", "offset": 3196760064, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.12.ffn_gate_inp.weight", "offset": 3300569088, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.12.ffn_down_shexp.weight", "offset": 3301093376, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.12.ffn_gate_shexp.weight", "offset": 3304337408, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.12.ffn_up_shexp.weight", "offset": 3307581440, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.12.ffn_norm.weight", "offset": 3310825472, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.12.attn_kv_a_norm.weight", "offset": 3310833664, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_kv_a_mqa.weight", "offset": 3310835712, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.12.attn_kv_b.weight", "offset": 3311499264, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.12.attn_output.weight", "offset": 3312678912, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.12.attn_q.weight", "offset": 3315038208, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.13.attn_norm.weight", "offset": 3318577152, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.13.ffn_down_exps.weight", "offset": 3318585344, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.13.ffn_gate_exps.weight", "offset": 3422394368, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.13.ffn_up_exps.weight", "offset": 3526203392, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.13.ffn_gate_inp.weight", "offset": 3630012416, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.13.ffn_down_shexp.weight", "offset": 3630536704, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.13.ffn_gate_shexp.weight", "offset": 3633780736, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.13.ffn_up_shexp.weight", "offset": 3637024768, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.13.ffn_norm.weight", "offset": 3640268800, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.13.attn_kv_a_norm.weight", "offset": 3640276992, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_kv_a_mqa.weight", "offset": 3640279040, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.13.attn_kv_b.weight", "offset": 3640942592, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.13.attn_output.weight", "offset": 3642122240, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.13.attn_q.weight", "offset": 3644481536, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.14.ffn_gate_inp.weight", "offset": 3648020480, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.14.ffn_down_shexp.weight", "offset": 3648544768, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.14.ffn_gate_shexp.weight", "offset": 3651788800, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.14.ffn_up_shexp.weight", "offset": 3655032832, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.14.attn_kv_a_norm.weight", "offset": 3658276864, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.14.attn_kv_a_mqa.weight", "offset": 3658278912, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.14.attn_kv_b.weight", "offset": 3658942464, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.14.attn_output.weight", "offset": 3660122112, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.14.attn_q.weight", "offset": 3662481408, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.7.attn_norm.weight", "offset": 3666020352, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.7.ffn_down_exps.weight", "offset": 3666028544, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.7.ffn_gate_exps.weight", "offset": 3769837568, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.7.ffn_up_exps.weight", "offset": 3873646592, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.7.ffn_norm.weight", "offset": 3977455616, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.8.attn_norm.weight", "offset": 3977463808, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.ffn_down_exps.weight", "offset": 3977472000, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.8.ffn_gate_exps.weight", "offset": 4081281024, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.8.ffn_up_exps.weight", "offset": 4185090048, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.8.ffn_gate_inp.weight", "offset": 4288899072, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.8.ffn_down_shexp.weight", "offset": 4289423360, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.8.ffn_gate_shexp.weight", "offset": 4292667392, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.8.ffn_up_shexp.weight", "offset": 4295911424, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.8.ffn_norm.weight", "offset": 4299155456, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.8.attn_kv_a_norm.weight", "offset": 4299163648, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_kv_a_mqa.weight", "offset": 4299165696, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.8.attn_kv_b.weight", "offset": 4299829248, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.8.attn_output.weight", "offset": 4301008896, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.8.attn_q.weight", "offset": 4303368192, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.9.attn_norm.weight", "offset": 4306907136, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.9.ffn_down_exps.weight", "offset": 4306915328, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.9.ffn_gate_exps.weight", "offset": 4410724352, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.9.ffn_up_exps.weight", "offset": 4514533376, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.9.ffn_gate_inp.weight", "offset": 4618342400, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.9.ffn_down_shexp.weight", "offset": 4618866688, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.9.ffn_gate_shexp.weight", "offset": 4622110720, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.9.ffn_up_shexp.weight", "offset": 4625354752, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.9.ffn_norm.weight", "offset": 4628598784, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.9.attn_kv_a_norm.weight", "offset": 4628606976, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_kv_a_mqa.weight", "offset": 4628609024, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.9.attn_kv_b.weight", "offset": 4629272576, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.9.attn_output.weight", "offset": 4630452224, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.9.attn_q.weight", "offset": 4632811520, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.14.attn_norm.weight", "offset": 4636350464, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.14.ffn_down_exps.weight", "offset": 4636358656, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.14.ffn_gate_exps.weight", "offset": 4740167680, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.14.ffn_up_exps.weight", "offset": 4843976704, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.14.ffn_norm.weight", "offset": 4947785728, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.15.attn_norm.weight", "offset": 4947793920, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.ffn_down_exps.weight", "offset": 4947802112, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.15.ffn_gate_exps.weight", "offset": 5051611136, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "\b\u0000\u0000\u0000\u0000\u0000\u0000\ufffd\u0005\u0000\u0000\u0000\u0000\u0000\u0000@\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0000", "offset": 5155420160, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.15.ffn_gate_inp.weight", "offset": 5259229184, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.15.ffn_down_shexp.weight", "offset": 5259753472, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.15.ffn_gate_shexp.weight", "offset": 5262997504, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.15.ffn_up_shexp.weight", "offset": 5266241536, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.15.ffn_norm.weight", "offset": 5269485568, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.15.attn_kv_a_norm.weight", "offset": 5269493760, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_kv_a_mqa.weight", "offset": 5269495808, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.15.attn_kv_b.weight", "offset": 5270159360, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.15.attn_output.weight", "offset": 5271339008, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.15.attn_q.weight", "offset": 5273698304, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.16.attn_norm.weight", "offset": 5277237248, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.16.ffn_down_exps.weight", "offset": 5277245440, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.16.ffn_gate_exps.weight", "offset": 5381054464, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.16.ffn_up_exps.weight", "offset": 5484863488, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.16.ffn_gate_inp.weight", "offset": 5588672512, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.16.ffn_down_shexp.weight", "offset": 5589196800, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.16.ffn_gate_shexp.weight", "offset": 5592440832, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.16.ffn_up_shexp.weight", "offset": 5595684864, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.16.ffn_norm.weight", "offset": 5598928896, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.16.attn_kv_a_norm.weight", "offset": 5598937088, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_kv_a_mqa.weight", "offset": 5598939136, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.16.attn_kv_b.weight", "offset": 5599602688, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.16.attn_output.weight", "offset": 5600782336, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.16.attn_q.weight", "offset": 5603141632, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.17.attn_norm.weight", "offset": 5606680576, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.17.ffn_down_exps.weight", "offset": 5606688768, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.17.ffn_gate_exps.weight", "offset": 5710497792, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.17.ffn_up_exps.weight", "offset": 5814306816, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.17.ffn_gate_inp.weight", "offset": 5918115840, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.17.ffn_down_shexp.weight", "offset": 5918640128, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.17.ffn_gate_shexp.weight", "offset": 5921884160, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.17.ffn_up_shexp.weight", "offset": 5925128192, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.17.ffn_norm.weight", "offset": 5928372224, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.17.attn_kv_a_norm.weight", "offset": 5928380416, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_kv_a_mqa.weight", "offset": 5928382464, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.17.attn_kv_b.weight", "offset": 5929046016, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.17.attn_output.weight", "offset": 5930225664, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.17.attn_q.weight", "offset": 5932584960, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.18.attn_norm.weight", "offset": 5936123904, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.18.ffn_down_exps.weight", "offset": 5936132096, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.18.ffn_gate_exps.weight", "offset": 6039941120, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.18.ffn_up_exps.weight", "offset": 6143750144, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.18.ffn_gate_inp.weight", "offset": 6247559168, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.18.ffn_down_shexp.weight", "offset": 6248083456, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.18.ffn_gate_shexp.weight", "offset": 6251327488, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.18.ffn_up_shexp.weight", "offset": 6254571520, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.18.ffn_norm.weight", "offset": 6257815552, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.18.attn_kv_a_norm.weight", "offset": 6257823744, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_kv_a_mqa.weight", "offset": 6257825792, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.18.attn_kv_b.weight", "offset": 6258489344, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.18.attn_output.weight", "offset": 6259668992, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.18.attn_q.weight", "offset": 6262028288, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.19.attn_norm.weight", "offset": 6265567232, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.19.ffn_down_exps.weight", "offset": 6265575424, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.19.ffn_gate_exps.weight", "offset": 6369384448, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.19.ffn_up_exps.weight", "offset": 6473193472, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.19.ffn_gate_inp.weight", "offset": 6577002496, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.19.ffn_down_shexp.weight", "offset": 6577526784, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.19.ffn_gate_shexp.weight", "offset": 6580770816, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.19.ffn_up_shexp.weight", "offset": 6584014848, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.19.ffn_norm.weight", "offset": 6587258880, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.19.attn_kv_a_norm.weight", "offset": 6587267072, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.19.attn_kv_a_mqa.weight", "offset": 6587269120, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.19.attn_kv_b.weight", "offset": 6587932672, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.19.attn_output.weight", "offset": 6589112320, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.19.attn_q.weight", "offset": 6591471616, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.20.attn_norm.weight", "offset": 6595010560, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.20.ffn_down_exps.weight", "offset": 6595018752, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.20.ffn_gate_exps.weight", "offset": 6698827776, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.20.ffn_up_exps.weight", "offset": 6802636800, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.20.ffn_gate_inp.weight", "offset": 6906445824, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.20.ffn_down_shexp.weight", "offset": 6906970112, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.20.ffn_gate_shexp.weight", "offset": 6910214144, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.20.ffn_up_shexp.weight", "offset": 6913458176, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.20.ffn_norm.weight", "offset": 6916702208, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.20.attn_kv_a_norm.weight", "offset": 6916710400, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_kv_a_mqa.weight", "offset": 6916712448, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.20.attn_kv_b.weight", "offset": 6917376000, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.20.attn_output.weight", "offset": 6918555648, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.20.attn_q.weight", "offset": 6920914944, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.21.attn_norm.weight", "offset": 6924453888, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.21.ffn_down_exps.weight", "offset": 6924462080, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.21.ffn_gate_exps.weight", "offset": 7028271104, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.21.ffn_up_exps.weight", "offset": 7132080128, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.21.ffn_gate_inp.weight", "offset": 7235889152, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.21.ffn_down_shexp.weight", "offset": 7236413440, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.21.ffn_gate_shexp.weight", "offset": 7239657472, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.21.ffn_up_shexp.weight", "offset": 7242901504, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.21.ffn_norm.weight", "offset": 7246145536, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.21.attn_kv_a_norm.weight", "offset": 7246153728, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_kv_a_mqa.weight", "offset": 7246155776, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.21.attn_kv_b.weight", "offset": 7246819328, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.21.attn_output.weight", "offset": 7247998976, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.21.attn_q.weight", "offset": 7250358272, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.22.ffn_gate_inp.weight", "offset": 7253897216, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.22.ffn_down_shexp.weight", "offset": 7254421504, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.22.ffn_gate_shexp.weight", "offset": 7257665536, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.22.ffn_up_shexp.weight", "offset": 7260909568, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.22.attn_kv_a_norm.weight", "offset": 7264153600, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.22.attn_kv_a_mqa.weight", "offset": 7264155648, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.22.attn_kv_b.weight", "offset": 7264819200, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.22.attn_output.weight", "offset": 7265998848, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.22.attn_q.weight", "offset": 7268358144, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.22.attn_norm.weight", "offset": 7271897088, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.22.ffn_down_exps.weight", "offset": 7271905280, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.22.ffn_gate_exps.weight", "offset": 7375714304, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.22.ffn_up_exps.weight", "offset": 7479523328, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.22.ffn_norm.weight", "offset": 7583332352, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.23.attn_norm.weight", "offset": 7583340544, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.ffn_down_exps.weight", "offset": 7583348736, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.23.ffn_gate_exps.weight", "offset": 7687157760, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.23.ffn_up_exps.weight", "offset": 7790966784, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.23.ffn_gate_inp.weight", "offset": 7894775808, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.23.ffn_down_shexp.weight", "offset": 7895300096, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.23.ffn_gate_shexp.weight", "offset": 7898544128, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.23.ffn_up_shexp.weight", "offset": 7901788160, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.23.ffn_norm.weight", "offset": 7905032192, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.23.attn_kv_a_norm.weight", "offset": 7905040384, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_kv_a_mqa.weight", "offset": 7905042432, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.23.attn_kv_b.weight", "offset": 7905705984, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.23.attn_output.weight", "offset": 7906885632, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.23.attn_q.weight", "offset": 7909244928, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.24.attn_norm.weight", "offset": 7912783872, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.24.ffn_down_exps.weight", "offset": 7912792064, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.24.ffn_gate_exps.weight", "offset": 8016601088, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.24.ffn_up_exps.weight", "offset": 8120410112, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.24.ffn_gate_inp.weight", "offset": 8224219136, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.24.ffn_down_shexp.weight", "offset": 8224743424, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.24.ffn_gate_shexp.weight", "offset": 8227987456, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.24.ffn_up_shexp.weight", "offset": 8231231488, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.24.ffn_norm.weight", "offset": 8234475520, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.24.attn_kv_a_norm.weight", "offset": 8234483712, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.24.attn_kv_a_mqa.weight", "offset": 8234485760, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.24.attn_kv_b.weight", "offset": 8235149312, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.24.attn_output.weight", "offset": 8236328960, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.24.attn_q.weight", "offset": 8238688256, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.25.attn_norm.weight", "offset": 8242227200, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.25.ffn_down_exps.weight", "offset": 8242235392, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.25.ffn_gate_exps.weight", "offset": 8346044416, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.25.ffn_up_exps.weight", "offset": 8449853440, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.25.ffn_gate_inp.weight", "offset": 8553662464, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.25.ffn_down_shexp.weight", "offset": 8554186752, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.25.ffn_gate_shexp.weight", "offset": 8557430784, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.25.ffn_up_shexp.weight", "offset": 8560674816, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.25.ffn_norm.weight", "offset": 8563918848, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.25.attn_kv_a_norm.weight", "offset": 8563927040, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.25.attn_kv_a_mqa.weight", "offset": 8563929088, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.25.attn_kv_b.weight", "offset": 8564592640, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.25.attn_output.weight", "offset": 8565772288, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.25.attn_q.weight", "offset": 8568131584, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 }, { "name": "blk.26.attn_norm.weight", "offset": 8571670528, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.26.ffn_down_exps.weight", "offset": 8571678720, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 2 }, { "name": "blk.26.ffn_gate_exps.weight", "offset": 8675487744, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.26.ffn_up_exps.weight", "offset": 8779296768, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 2 }, { "name": "blk.26.ffn_gate_inp.weight", "offset": 8883105792, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.26.ffn_down_shexp.weight", "offset": 8883630080, "shape": [ 2816, 2048 ], "size": 524288, "type": 2 }, { "name": "blk.26.ffn_gate_shexp.weight", "offset": 8886874112, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.26.ffn_up_shexp.weight", "offset": 8890118144, "shape": [ 2048, 2816 ], "size": 3244032, "type": 2 }, { "name": "blk.26.ffn_norm.weight", "offset": 8893362176, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.26.attn_kv_a_norm.weight", "offset": 8893370368, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.26.attn_kv_a_mqa.weight", "offset": 8893372416, "shape": [ 2048, 576 ], "size": 2048, "type": 2 }, { "name": "blk.26.attn_kv_b.weight", "offset": 8894035968, "shape": [ 512, 4096 ], "size": 663552, "type": 2 }, { "name": "blk.26.attn_output.weight", "offset": 8895215616, "shape": [ 2048, 2048 ], "size": 1179648, "type": 2 }, { "name": "blk.26.attn_q.weight", "offset": 8897574912, "shape": [ 2048, 3072 ], "size": 2359296, "type": 2 } ], "version": 3 }