An open-source Mixture-of-Experts code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.

16B 236B

111.5K Pulls Updated 12 days ago

ece89e24aa98 · 142GB
{ "metadata": { "deepseek2.attention.head_count": 128, "deepseek2.attention.head_count_kv": 128, "deepseek2.attention.key_length": 192, "deepseek2.attention.kv_lora_rank": 512, "deepseek2.attention.layer_norm_rms_epsilon": 0.000001, "deepseek2.attention.q_lora_rank": 1536, "deepseek2.attention.value_length": 128, "deepseek2.block_count": 60, "deepseek2.context_length": 163840, "deepseek2.embedding_length": 5120, "deepseek2.expert_count": 160, "deepseek2.expert_feed_forward_length": 1536, "deepseek2.expert_shared_count": 2, "deepseek2.expert_used_count": 6, "deepseek2.expert_weights_scale": 16, "deepseek2.feed_forward_length": 12288, "deepseek2.leading_dense_block_count": 1, "deepseek2.rope.dimension_count": 64, "deepseek2.rope.freq_base": 10000, "deepseek2.rope.scaling.factor": 40, "deepseek2.rope.scaling.original_context_length": 4096, "deepseek2.rope.scaling.type": "yarn", "deepseek2.rope.scaling.yarn_log_multiplier": 0.1, "deepseek2.vocab_size": 102400, "general.architecture": "deepseek2", "general.file_type": 15, "general.name": "DeepSeek-Coder-V2-Instruct", "general.quantization_version": 2, "tokenizer.ggml.add_bos_token": true, "tokenizer.ggml.add_eos_token": false, "tokenizer.ggml.bos_token_id": 100000, "tokenizer.ggml.eos_token_id": 100001, "tokenizer.ggml.merges": "... (99757 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.padding_token_id": 100001, "tokenizer.ggml.pre": "deepseek-llm", "tokenizer.ggml.token_type": "... (102400 values)", "tokenizer.ggml.tokens": "... (102400 values)" }, "num_params": 235741434880, "tensors": [ { "name": "token_embd.weight", "offset": 430080000, "shape": [ 5120, 102400 ], "size": 430080000, "type": 12 }, { "name": "blk.0.attn_norm.weight", "offset": 724992000, "shape": [ 5120 ], "size": 294912000, "type": 0 }, { "name": "blk.0.ffn_down.weight", "offset": 725012480, "shape": [ 12288, 5120 ], "size": 20480, "type": 14 }, { "name": "blk.0.ffn_gate.weight", "offset": 776622080, "shape": [ 5120, 12288 ], "size": 51609600, "type": 12 }, { "name": "blk.0.ffn_up.weight", "offset": 812011520, "shape": [ 5120, 12288 ], "size": 35389440, "type": 12 }, { "name": "blk.0.ffn_norm.weight", "offset": 847400960, "shape": [ 5120 ], "size": 35389440, "type": 0 }, { "name": "blk.0.attn_kv_a_norm.weight", "offset": 847421440, "shape": [ 512 ], "size": 20480, "type": 0 }, { "name": "blk.0.attn_kv_a_mqa.weight", "offset": 847423488, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.0.attn_kv_b.weight", "offset": 849082368, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.0.attn_output.weight", "offset": 858519552, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.0.attn_q_a_norm.weight", "offset": 905705472, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.0.attn_q_a.weight", "offset": 905711616, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.0.attn_q_b.weight", "offset": 910135296, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.1.ffn_gate_inp.weight", "offset": 931368960, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.1.ffn_down_shexp.weight", "offset": 934645760, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.1.ffn_gate_shexp.weight", "offset": 947548160, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.1.ffn_up_shexp.weight", "offset": 956395520, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.1.attn_kv_a_norm.weight", "offset": 965242880, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.1.attn_kv_a_mqa.weight", "offset": 965244928, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "\u0000\u0014\u0000\u0000\u0000\u0000\u0000\u0000@\u0002\u0000\u0000\u0000\u0000\u0000\u0000\f\u0000\u0000\u0000\u0000\ufffd", "offset": 966903808, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.1.attn_output.weight", "offset": 976340992, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.1.attn_q_a_norm.weight", "offset": 1023526912, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.1.attn_q_a.weight", "offset": 1023533056, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.1.attn_q_b.weight", "offset": 1027956736, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "output_norm.weight", "offset": 1049190400, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.1.attn_norm.weight", "offset": 1049210880, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.1.ffn_down_exps.weight", "offset": 1049231360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.1.ffn_gate_exps.weight", "offset": 2081423360, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.1.ffn_up_exps.weight", "offset": 2789212160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.1.ffn_norm.weight", "offset": 3497000960, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.2.ffn_gate_inp.weight", "offset": 3497021440, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.2.ffn_down_shexp.weight", "offset": 3500298240, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.2.ffn_gate_shexp.weight", "offset": 3513200640, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.2.ffn_up_shexp.weight", "offset": 3522048000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.2.attn_kv_a_norm.weight", "offset": 3530895360, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.2.attn_kv_a_mqa.weight", "offset": 3530897408, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.2.attn_kv_b.weight", "offset": 3532556288, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.2.attn_output.weight", "offset": 3541993472, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.2.attn_q_a_norm.weight", "offset": 3589179392, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.2.attn_q_a.weight", "offset": 3589185536, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.2.attn_q_b.weight", "offset": 3593609216, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.2.attn_norm.weight", "offset": 3614842880, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.2.ffn_down_exps.weight", "offset": 3614863360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.2.ffn_gate_exps.weight", "offset": 4647055360, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.2.ffn_up_exps.weight", "offset": 5354844160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.2.ffn_norm.weight", "offset": 6062632960, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.3.ffn_gate_inp.weight", "offset": 6062653440, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.3.ffn_down_shexp.weight", "offset": 6065930240, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.3.ffn_gate_shexp.weight", "offset": 6078832640, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.3.ffn_up_shexp.weight", "offset": 6087680000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.3.attn_kv_a_norm.weight", "offset": 6096527360, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.3.attn_kv_a_mqa.weight", "offset": 6096529408, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.3.attn_kv_b.weight", "offset": 6098188288, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.3.attn_output.weight", "offset": 6107625472, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.3.attn_q_a_norm.weight", "offset": 6154811392, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.3.attn_q_a.weight", "offset": 6154817536, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.3.attn_q_b.weight", "offset": 6159241216, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.3.attn_norm.weight", "offset": 6180474880, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.3.ffn_down_exps.weight", "offset": 6180495360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.3.ffn_gate_exps.weight", "offset": 7212687360, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.3.ffn_up_exps.weight", "offset": 7920476160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.3.ffn_norm.weight", "offset": 8628264960, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.4.ffn_gate_inp.weight", "offset": 8628285440, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.4.ffn_down_shexp.weight", "offset": 8631562240, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.4.ffn_gate_shexp.weight", "offset": 8644464640, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.4.ffn_up_shexp.weight", "offset": 8653312000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.4.attn_kv_a_norm.weight", "offset": 8662159360, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.4.attn_kv_a_mqa.weight", "offset": 8662161408, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.4.attn_kv_b.weight", "offset": 8663820288, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.4.attn_output.weight", "offset": 8673257472, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.4.attn_q_a_norm.weight", "offset": 8720443392, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.4.attn_q_a.weight", "offset": 8720449536, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.4.attn_q_b.weight", "offset": 8724873216, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.4.attn_norm.weight", "offset": 8746106880, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.4.ffn_down_exps.weight", "offset": 8746127360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.4.ffn_gate_exps.weight", "offset": 9778319360, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.4.ffn_up_exps.weight", "offset": 10486108160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.4.ffn_norm.weight", "offset": 11193896960, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.5.attn_norm.weight", "offset": 11193917440, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.5.ffn_down_exps.weight", "offset": 11193937920, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.5.ffn_gate_exps.weight", "offset": 12226129920, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.5.ffn_up_exps.weight", "offset": 12933918720, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.5.ffn_gate_inp.weight", "offset": 13641707520, "shape": [ 5120, 160 ], "size": 707788800, "type": 0 }, { "name": "blk.5.ffn_down_shexp.weight", "offset": 13644984320, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.5.ffn_gate_shexp.weight", "offset": 13657886720, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.5.ffn_up_shexp.weight", "offset": 13666734080, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.5.ffn_norm.weight", "offset": 13675581440, "shape": [ 5120 ], "size": 8847360, "type": 0 }, { "name": "blk.5.attn_kv_a_norm.weight", "offset": 13675601920, "shape": [ 512 ], "size": 20480, "type": 0 }, { "name": "blk.5.attn_kv_a_mqa.weight", "offset": 13675603968, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.5.attn_kv_b.weight", "offset": 13677262848, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.5.attn_output.weight", "offset": 13686700032, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.5.attn_q_a_norm.weight", "offset": 13733885952, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.5.attn_q_a.weight", "offset": 13733892096, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.5.attn_q_b.weight", "offset": 13738315776, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.6.ffn_gate_inp.weight", "offset": 13759549440, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.6.ffn_down_shexp.weight", "offset": 13762826240, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.6.ffn_gate_shexp.weight", "offset": 13775728640, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.6.ffn_up_shexp.weight", "offset": 13784576000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.6.attn_kv_a_norm.weight", "offset": 13793423360, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.6.attn_kv_a_mqa.weight", "offset": 13793425408, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.6.attn_kv_b.weight", "offset": 13795084288, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.6.attn_output.weight", "offset": 13804521472, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.6.attn_q_a_norm.weight", "offset": 13851707392, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.6.attn_q_a.weight", "offset": 13851713536, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.6.attn_q_b.weight", "offset": 13856137216, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.6.attn_norm.weight", "offset": 13877370880, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.6.ffn_down_exps.weight", "offset": 13877391360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.6.ffn_gate_exps.weight", "offset": 14909583360, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.6.ffn_up_exps.weight", "offset": 15617372160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.6.ffn_norm.weight", "offset": 16325160960, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.7.ffn_gate_inp.weight", "offset": 16325181440, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.7.ffn_down_shexp.weight", "offset": 16328458240, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.7.ffn_gate_shexp.weight", "offset": 16337305600, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.7.ffn_up_shexp.weight", "offset": 16346152960, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.7.attn_kv_a_norm.weight", "offset": 16355000320, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.7.attn_kv_a_mqa.weight", "offset": 16355002368, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.7.attn_kv_b.weight", "offset": 16356661248, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.7.attn_output.weight", "offset": 16366098432, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.7.attn_q_a_norm.weight", "offset": 16413284352, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.7.attn_q_a.weight", "offset": 16413290496, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.7.attn_q_b.weight", "offset": 16417714176, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.7.attn_norm.weight", "offset": 16438947840, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.7.ffn_down_exps.weight", "offset": 16438968320, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.7.ffn_gate_exps.weight", "offset": 17146757120, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.7.ffn_up_exps.weight", "offset": 17854545920, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.7.ffn_norm.weight", "offset": 18562334720, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.8.ffn_gate_inp.weight", "offset": 18562355200, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.8.ffn_down_shexp.weight", "offset": 18565632000, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.8.ffn_gate_shexp.weight", "offset": 18574479360, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.8.ffn_up_shexp.weight", "offset": 18583326720, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.8.attn_kv_a_norm.weight", "offset": 18592174080, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.8.attn_kv_a_mqa.weight", "offset": 18592176128, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.8.attn_kv_b.weight", "offset": 18593835008, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.8.attn_output.weight", "offset": 18603272192, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.8.attn_q_a_norm.weight", "offset": 18650458112, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.8.attn_q_a.weight", "offset": 18650464256, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.8.attn_q_b.weight", "offset": 18654887936, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.8.attn_norm.weight", "offset": 18676121600, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.8.ffn_down_exps.weight", "offset": 18676142080, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.8.ffn_gate_exps.weight", "offset": 19383930880, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.8.ffn_up_exps.weight", "offset": 20091719680, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.8.ffn_norm.weight", "offset": 20799508480, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.9.ffn_gate_inp.weight", "offset": 20799528960, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.9.ffn_down_shexp.weight", "offset": 20802805760, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.9.ffn_gate_shexp.weight", "offset": 20815708160, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.9.ffn_up_shexp.weight", "offset": 20824555520, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.9.attn_kv_a_norm.weight", "offset": 20833402880, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.9.attn_kv_a_mqa.weight", "offset": 20833404928, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.9.attn_kv_b.weight", "offset": 20835063808, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.9.attn_output.weight", "offset": 20844500992, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.9.attn_q_a_norm.weight", "offset": 20891686912, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.9.attn_q_a.weight", "offset": 20891693056, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.9.attn_q_b.weight", "offset": 20896116736, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.10.ffn_gate_inp.weight", "offset": 20917350400, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.10.ffn_down_shexp.weight", "offset": 20920627200, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.10.ffn_gate_shexp.weight", "offset": 20929474560, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.10.ffn_up_shexp.weight", "offset": 20938321920, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.10.attn_kv_a_norm.weight", "offset": 20947169280, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.10.attn_kv_a_mqa.weight", "offset": 20947171328, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.10.attn_kv_b.weight", "offset": 20948830208, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.10.attn_output.weight", "offset": 20958267392, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.10.attn_q_a_norm.weight", "offset": 21005453312, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.10.attn_q_a.weight", "offset": 21005459456, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.10.attn_q_b.weight", "offset": 21009883136, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.9.attn_norm.weight", "offset": 21031116800, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.9.ffn_down_exps.weight", "offset": 21031137280, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.9.ffn_gate_exps.weight", "offset": 22063329280, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.9.ffn_up_exps.weight", "offset": 22771118080, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.9.ffn_norm.weight", "offset": 23478906880, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.10.attn_norm.weight", "offset": 23478927360, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.10.ffn_down_exps.weight", "offset": 23478947840, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.10.ffn_gate_exps.weight", "offset": 24186736640, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.10.ffn_up_exps.weight", "offset": 24894525440, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.10.ffn_norm.weight", "offset": 25602314240, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.11.ffn_gate_inp.weight", "offset": 25602334720, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.11.ffn_down_shexp.weight", "offset": 25605611520, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.11.ffn_gate_shexp.weight", "offset": 25614458880, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.11.ffn_up_shexp.weight", "offset": 25623306240, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.11.attn_kv_a_norm.weight", "offset": 25632153600, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.11.attn_kv_a_mqa.weight", "offset": 25632155648, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.11.attn_kv_b.weight", "offset": 25633814528, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.11.attn_output.weight", "offset": 25643251712, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.11.attn_q_a_norm.weight", "offset": 25690437632, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.11.attn_q_a.weight", "offset": 25690443776, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.11.attn_q_b.weight", "offset": 25694867456, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.11.attn_norm.weight", "offset": 25716101120, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.11.ffn_down_exps.weight", "offset": 25716121600, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.11.ffn_gate_exps.weight", "offset": 26423910400, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.11.ffn_up_exps.weight", "offset": 27131699200, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.11.ffn_norm.weight", "offset": 27839488000, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.12.ffn_gate_inp.weight", "offset": 27839508480, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.12.ffn_down_shexp.weight", "offset": 27842785280, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.12.ffn_gate_shexp.weight", "offset": 27855687680, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.12.ffn_up_shexp.weight", "offset": 27864535040, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.12.attn_kv_a_norm.weight", "offset": 27873382400, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.12.attn_kv_a_mqa.weight", "offset": 27873384448, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.12.attn_kv_b.weight", "offset": 27875043328, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.12.attn_output.weight", "offset": 27884480512, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.12.attn_q_a_norm.weight", "offset": 27931666432, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.12.attn_q_a.weight", "offset": 27931672576, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.12.attn_q_b.weight", "offset": 27936096256, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.12.attn_norm.weight", "offset": 27957329920, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.12.ffn_down_exps.weight", "offset": 27957350400, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.12.ffn_gate_exps.weight", "offset": 28989542400, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.12.ffn_up_exps.weight", "offset": 29697331200, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.12.ffn_norm.weight", "offset": 30405120000, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.13.ffn_gate_inp.weight", "offset": 30405140480, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.13.ffn_down_shexp.weight", "offset": 30408417280, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.13.ffn_gate_shexp.weight", "offset": 30417264640, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.13.ffn_up_shexp.weight", "offset": 30426112000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.13.attn_kv_a_norm.weight", "offset": 30434959360, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.13.attn_kv_a_mqa.weight", "offset": 30434961408, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.13.attn_kv_b.weight", "offset": 30436620288, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.13.attn_output.weight", "offset": 30446057472, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.13.attn_q_a_norm.weight", "offset": 30493243392, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.13.attn_q_a.weight", "offset": 30493249536, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.13.attn_q_b.weight", "offset": 30497673216, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.13.attn_norm.weight", "offset": 30518906880, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.13.ffn_down_exps.weight", "offset": 30518927360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.13.ffn_gate_exps.weight", "offset": 31226716160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.13.ffn_up_exps.weight", "offset": 31934504960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.13.ffn_norm.weight", "offset": 32642293760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.14.ffn_gate_inp.weight", "offset": 32642314240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.14.ffn_down_shexp.weight", "offset": 32645591040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.14.ffn_gate_shexp.weight", "offset": 32654438400, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.14.ffn_up_shexp.weight", "offset": 32663285760, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.14.attn_kv_a_norm.weight", "offset": 32672133120, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.14.attn_kv_a_mqa.weight", "offset": 32672135168, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.14.attn_kv_b.weight", "offset": 32673794048, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.14.attn_output.weight", "offset": 32683231232, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.14.attn_q_a_norm.weight", "offset": 32730417152, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.14.attn_q_a.weight", "offset": 32730423296, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.14.attn_q_b.weight", "offset": 32734846976, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.14.attn_norm.weight", "offset": 32756080640, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.14.ffn_down_exps.weight", "offset": 32756101120, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.14.ffn_gate_exps.weight", "offset": 33463889920, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.14.ffn_up_exps.weight", "offset": 34171678720, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.14.ffn_norm.weight", "offset": 34879467520, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.15.ffn_gate_inp.weight", "offset": 34879488000, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.15.ffn_down_shexp.weight", "offset": 34882764800, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.15.ffn_gate_shexp.weight", "offset": 34895667200, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.15.ffn_up_shexp.weight", "offset": 34904514560, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.15.attn_kv_a_norm.weight", "offset": 34913361920, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.15.attn_kv_a_mqa.weight", "offset": 34913363968, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.15.attn_kv_b.weight", "offset": 34915022848, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.15.attn_output.weight", "offset": 34924460032, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.15.attn_q_a_norm.weight", "offset": 34971645952, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.15.attn_q_a.weight", "offset": 34971652096, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.15.attn_q_b.weight", "offset": 34976075776, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.15.attn_norm.weight", "offset": 34997309440, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.15.ffn_down_exps.weight", "offset": 34997329920, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.15.ffn_gate_exps.weight", "offset": 36029521920, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.15.ffn_up_exps.weight", "offset": 36737310720, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.15.ffn_norm.weight", "offset": 37445099520, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.16.ffn_gate_inp.weight", "offset": 37445120000, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.16.ffn_down_shexp.weight", "offset": 37448396800, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.16.ffn_gate_shexp.weight", "offset": 37457244160, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.16.ffn_up_shexp.weight", "offset": 37466091520, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.16.attn_kv_a_norm.weight", "offset": 37474938880, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.16.attn_kv_a_mqa.weight", "offset": 37474940928, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.16.attn_kv_b.weight", "offset": 37476599808, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.16.attn_output.weight", "offset": 37486036992, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.16.attn_q_a_norm.weight", "offset": 37533222912, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.16.attn_q_a.weight", "offset": 37533229056, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.16.attn_q_b.weight", "offset": 37537652736, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.16.attn_norm.weight", "offset": 37558886400, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.16.ffn_down_exps.weight", "offset": 37558906880, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.16.ffn_gate_exps.weight", "offset": 38266695680, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.16.ffn_up_exps.weight", "offset": 38974484480, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.16.ffn_norm.weight", "offset": 39682273280, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.17.ffn_gate_inp.weight", "offset": 39682293760, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.17.ffn_down_shexp.weight", "offset": 39685570560, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.17.ffn_gate_shexp.weight", "offset": 39694417920, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.17.ffn_up_shexp.weight", "offset": 39703265280, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.17.attn_kv_a_norm.weight", "offset": 39712112640, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.17.attn_kv_a_mqa.weight", "offset": 39712114688, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.17.attn_kv_b.weight", "offset": 39713773568, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.17.attn_output.weight", "offset": 39723210752, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.17.attn_q_a_norm.weight", "offset": 39770396672, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.17.attn_q_a.weight", "offset": 39770402816, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.17.attn_q_b.weight", "offset": 39774826496, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.17.attn_norm.weight", "offset": 39796060160, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.17.ffn_down_exps.weight", "offset": 39796080640, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.17.ffn_gate_exps.weight", "offset": 40503869440, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.17.ffn_up_exps.weight", "offset": 41211658240, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.17.ffn_norm.weight", "offset": 41919447040, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.18.attn_norm.weight", "offset": 41919467520, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.18.ffn_down_exps.weight", "offset": 41919488000, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.18.ffn_gate_exps.weight", "offset": 42951680000, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.18.ffn_up_exps.weight", "offset": 43659468800, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.18.ffn_gate_inp.weight", "offset": 44367257600, "shape": [ 5120, 160 ], "size": 707788800, "type": 0 }, { "name": "blk.18.ffn_down_shexp.weight", "offset": 44370534400, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.18.ffn_gate_shexp.weight", "offset": 44383436800, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.18.ffn_up_shexp.weight", "offset": 44392284160, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.18.ffn_norm.weight", "offset": 44401131520, "shape": [ 5120 ], "size": 8847360, "type": 0 }, { "name": "blk.18.attn_kv_a_norm.weight", "offset": 44401152000, "shape": [ 512 ], "size": 20480, "type": 0 }, { "name": "blk.18.attn_kv_a_mqa.weight", "offset": 44401154048, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.18.attn_kv_b.weight", "offset": 44402812928, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.18.attn_output.weight", "offset": 44412250112, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.18.attn_q_a_norm.weight", "offset": 44459436032, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.18.attn_q_a.weight", "offset": 44459442176, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.18.attn_q_b.weight", "offset": 44463865856, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.19.ffn_gate_inp.weight", "offset": 44485099520, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.19.ffn_down_shexp.weight", "offset": 44488376320, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.19.ffn_gate_shexp.weight", "offset": 44497223680, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.19.ffn_up_shexp.weight", "offset": 44506071040, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.19.attn_kv_a_norm.weight", "offset": 44514918400, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.19.attn_kv_a_mqa.weight", "offset": 44514920448, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.19.attn_kv_b.weight", "offset": 44516579328, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.19.attn_output.weight", "offset": 44526016512, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.19.attn_q_a_norm.weight", "offset": 44573202432, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.19.attn_q_a.weight", "offset": 44573208576, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.19.attn_q_b.weight", "offset": 44577632256, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.19.attn_norm.weight", "offset": 44598865920, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.19.ffn_down_exps.weight", "offset": 44598886400, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.19.ffn_gate_exps.weight", "offset": 45306675200, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.19.ffn_up_exps.weight", "offset": 46014464000, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.19.ffn_norm.weight", "offset": 46722252800, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.20.ffn_gate_inp.weight", "offset": 46722273280, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.20.ffn_down_shexp.weight", "offset": 46725550080, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.20.ffn_gate_shexp.weight", "offset": 46734397440, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.20.ffn_up_shexp.weight", "offset": 46743244800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.20.attn_kv_a_norm.weight", "offset": 46752092160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.20.attn_kv_a_mqa.weight", "offset": 46752094208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.20.attn_kv_b.weight", "offset": 46753753088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.20.attn_output.weight", "offset": 46763190272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.20.attn_q_a_norm.weight", "offset": 46810376192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.20.attn_q_a.weight", "offset": 46810382336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.20.attn_q_b.weight", "offset": 46814806016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.20.attn_norm.weight", "offset": 46836039680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.20.ffn_down_exps.weight", "offset": 46836060160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.20.ffn_gate_exps.weight", "offset": 47543848960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.20.ffn_up_exps.weight", "offset": 48251637760, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.20.ffn_norm.weight", "offset": 48959426560, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.21.ffn_gate_inp.weight", "offset": 48959447040, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.21.ffn_down_shexp.weight", "offset": 48962723840, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.21.ffn_gate_shexp.weight", "offset": 48975626240, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.21.ffn_up_shexp.weight", "offset": 48984473600, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.21.attn_kv_a_norm.weight", "offset": 48993320960, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.21.attn_kv_a_mqa.weight", "offset": 48993323008, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.21.attn_kv_b.weight", "offset": 48994981888, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.21.attn_output.weight", "offset": 49004419072, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.21.attn_q_a_norm.weight", "offset": 49051604992, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.21.attn_q_a.weight", "offset": 49051611136, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.21.attn_q_b.weight", "offset": 49056034816, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.21.attn_norm.weight", "offset": 49077268480, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.21.ffn_down_exps.weight", "offset": 49077288960, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.21.ffn_gate_exps.weight", "offset": 50109480960, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.21.ffn_up_exps.weight", "offset": 50817269760, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.21.ffn_norm.weight", "offset": 51525058560, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.22.ffn_gate_inp.weight", "offset": 51525079040, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.22.ffn_down_shexp.weight", "offset": 51528355840, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.22.ffn_gate_shexp.weight", "offset": 51537203200, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.22.ffn_up_shexp.weight", "offset": 51546050560, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.22.attn_kv_a_norm.weight", "offset": 51554897920, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.22.attn_kv_a_mqa.weight", "offset": 51554899968, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.22.attn_kv_b.weight", "offset": 51556558848, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.22.attn_output.weight", "offset": 51565996032, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.22.attn_q_a_norm.weight", "offset": 51613181952, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.22.attn_q_a.weight", "offset": 51613188096, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.22.attn_q_b.weight", "offset": 51617611776, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.22.attn_norm.weight", "offset": 51638845440, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.22.ffn_down_exps.weight", "offset": 51638865920, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.22.ffn_gate_exps.weight", "offset": 52346654720, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.22.ffn_up_exps.weight", "offset": 53054443520, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.22.ffn_norm.weight", "offset": 53762232320, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.23.ffn_gate_inp.weight", "offset": 53762252800, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.23.ffn_down_shexp.weight", "offset": 53765529600, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.23.ffn_gate_shexp.weight", "offset": 53774376960, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.23.ffn_up_shexp.weight", "offset": 53783224320, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.23.attn_kv_a_norm.weight", "offset": 53792071680, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.23.attn_kv_a_mqa.weight", "offset": 53792073728, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.23.attn_kv_b.weight", "offset": 53793732608, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.23.attn_output.weight", "offset": 53803169792, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.23.attn_q_a_norm.weight", "offset": 53850355712, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.23.attn_q_a.weight", "offset": 53850361856, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.23.attn_q_b.weight", "offset": 53854785536, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.23.attn_norm.weight", "offset": 53876019200, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.23.ffn_down_exps.weight", "offset": 53876039680, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.23.ffn_gate_exps.weight", "offset": 54583828480, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.23.ffn_up_exps.weight", "offset": 55291617280, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.23.ffn_norm.weight", "offset": 55999406080, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.24.ffn_gate_inp.weight", "offset": 55999426560, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.24.ffn_down_shexp.weight", "offset": 56002703360, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.24.ffn_gate_shexp.weight", "offset": 56015605760, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.24.ffn_up_shexp.weight", "offset": 56024453120, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.24.attn_kv_a_norm.weight", "offset": 56033300480, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.24.attn_kv_a_mqa.weight", "offset": 56033302528, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.24.attn_kv_b.weight", "offset": 56034961408, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.24.attn_output.weight", "offset": 56044398592, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.24.attn_q_a_norm.weight", "offset": 56091584512, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.24.attn_q_a.weight", "offset": 56091590656, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.24.attn_q_b.weight", "offset": 56096014336, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.24.attn_norm.weight", "offset": 56117248000, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.24.ffn_down_exps.weight", "offset": 56117268480, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.24.ffn_gate_exps.weight", "offset": 57149460480, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.24.ffn_up_exps.weight", "offset": 57857249280, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.24.ffn_norm.weight", "offset": 58565038080, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.25.ffn_gate_inp.weight", "offset": 58565058560, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.25.ffn_down_shexp.weight", "offset": 58568335360, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.25.ffn_gate_shexp.weight", "offset": 58577182720, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.25.ffn_up_shexp.weight", "offset": 58586030080, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.25.attn_kv_a_norm.weight", "offset": 58594877440, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.25.attn_kv_a_mqa.weight", "offset": 58594879488, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.25.attn_kv_b.weight", "offset": 58596538368, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.25.attn_output.weight", "offset": 58605975552, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.25.attn_q_a_norm.weight", "offset": 58653161472, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.25.attn_q_a.weight", "offset": 58653167616, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.25.attn_q_b.weight", "offset": 58657591296, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.25.attn_norm.weight", "offset": 58678824960, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.25.ffn_down_exps.weight", "offset": 58678845440, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.25.ffn_gate_exps.weight", "offset": 59386634240, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.25.ffn_up_exps.weight", "offset": 60094423040, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.25.ffn_norm.weight", "offset": 60802211840, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.26.ffn_gate_inp.weight", "offset": 60802232320, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.26.ffn_down_shexp.weight", "offset": 60805509120, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.26.ffn_gate_shexp.weight", "offset": 60814356480, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.26.ffn_up_shexp.weight", "offset": 60823203840, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.26.attn_kv_a_norm.weight", "offset": 60832051200, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.26.attn_kv_a_mqa.weight", "offset": 60832053248, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.26.attn_kv_b.weight", "offset": 60833712128, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.26.attn_output.weight", "offset": 60843149312, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.26.attn_q_a_norm.weight", "offset": 60890335232, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.26.attn_q_a.weight", "offset": 60890341376, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.26.attn_q_b.weight", "offset": 60894765056, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.26.attn_norm.weight", "offset": 60915998720, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.26.ffn_down_exps.weight", "offset": 60916019200, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.26.ffn_gate_exps.weight", "offset": 61623808000, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.26.ffn_up_exps.weight", "offset": 62331596800, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.26.ffn_norm.weight", "offset": 63039385600, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.27.ffn_gate_inp.weight", "offset": 63039406080, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.27.ffn_down_shexp.weight", "offset": 63042682880, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.27.ffn_gate_shexp.weight", "offset": 63055585280, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.27.ffn_up_shexp.weight", "offset": 63064432640, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.27.attn_kv_a_norm.weight", "offset": 63073280000, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.27.attn_kv_a_mqa.weight", "offset": 63073282048, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.27.attn_kv_b.weight", "offset": 63074940928, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.27.attn_output.weight", "offset": 63084378112, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.27.attn_q_a_norm.weight", "offset": 63131564032, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.27.attn_q_a.weight", "offset": 63131570176, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.27.attn_q_b.weight", "offset": 63135993856, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.27.attn_norm.weight", "offset": 63157227520, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.27.ffn_down_exps.weight", "offset": 63157248000, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.27.ffn_gate_exps.weight", "offset": 64189440000, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.27.ffn_up_exps.weight", "offset": 64897228800, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.27.ffn_norm.weight", "offset": 65605017600, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.28.ffn_gate_inp.weight", "offset": 65605038080, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.28.ffn_down_shexp.weight", "offset": 65608314880, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.28.ffn_gate_shexp.weight", "offset": 65617162240, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.28.ffn_up_shexp.weight", "offset": 65626009600, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.28.attn_kv_a_norm.weight", "offset": 65634856960, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.28.attn_kv_a_mqa.weight", "offset": 65634859008, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.28.attn_kv_b.weight", "offset": 65636517888, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.28.attn_output.weight", "offset": 65645955072, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.28.attn_q_a_norm.weight", "offset": 65693140992, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.28.attn_q_a.weight", "offset": 65693147136, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.28.attn_q_b.weight", "offset": 65697570816, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.28.attn_norm.weight", "offset": 65718804480, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.28.ffn_down_exps.weight", "offset": 65718824960, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.28.ffn_gate_exps.weight", "offset": 66426613760, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.28.ffn_up_exps.weight", "offset": 67134402560, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.28.ffn_norm.weight", "offset": 67842191360, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.29.ffn_gate_inp.weight", "offset": 67842211840, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.29.ffn_down_shexp.weight", "offset": 67845488640, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.29.ffn_gate_shexp.weight", "offset": 67854336000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.29.ffn_up_shexp.weight", "offset": 67863183360, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.29.attn_kv_a_norm.weight", "offset": 67872030720, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.29.attn_kv_a_mqa.weight", "offset": 67872032768, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.29.attn_kv_b.weight", "offset": 67873691648, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.29.attn_output.weight", "offset": 67883128832, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.29.attn_q_a_norm.weight", "offset": 67930314752, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.29.attn_q_a.weight", "offset": 67930320896, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.29.attn_q_b.weight", "offset": 67934744576, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.29.attn_norm.weight", "offset": 67955978240, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.29.ffn_down_exps.weight", "offset": 67955998720, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.29.ffn_gate_exps.weight", "offset": 68663787520, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.29.ffn_up_exps.weight", "offset": 69371576320, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.29.ffn_norm.weight", "offset": 70079365120, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.30.ffn_gate_inp.weight", "offset": 70079385600, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.30.ffn_down_shexp.weight", "offset": 70082662400, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.30.ffn_gate_shexp.weight", "offset": 70095564800, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.30.ffn_up_shexp.weight", "offset": 70104412160, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.30.attn_kv_a_norm.weight", "offset": 70113259520, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.30.attn_kv_a_mqa.weight", "offset": 70113261568, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.30.attn_kv_b.weight", "offset": 70114920448, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.30.attn_output.weight", "offset": 70124357632, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.30.attn_q_a_norm.weight", "offset": 70171543552, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.30.attn_q_a.weight", "offset": 70171549696, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.30.attn_q_b.weight", "offset": 70175973376, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.30.attn_norm.weight", "offset": 70197207040, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.30.ffn_down_exps.weight", "offset": 70197227520, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.30.ffn_gate_exps.weight", "offset": 71229419520, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.30.ffn_up_exps.weight", "offset": 71937208320, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.30.ffn_norm.weight", "offset": 72644997120, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.31.attn_norm.weight", "offset": 72645017600, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.31.ffn_down_exps.weight", "offset": 72645038080, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.31.ffn_gate_exps.weight", "offset": 73352826880, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.31.ffn_up_exps.weight", "offset": 74060615680, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.31.ffn_gate_inp.weight", "offset": 74768404480, "shape": [ 5120, 160 ], "size": 707788800, "type": 0 }, { "name": "blk.31.ffn_down_shexp.weight", "offset": 74771681280, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.31.ffn_gate_shexp.weight", "offset": 74780528640, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.31.ffn_up_shexp.weight", "offset": 74789376000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.31.ffn_norm.weight", "offset": 74798223360, "shape": [ 5120 ], "size": 8847360, "type": 0 }, { "name": "blk.31.attn_kv_a_norm.weight", "offset": 74798243840, "shape": [ 512 ], "size": 20480, "type": 0 }, { "name": "blk.31.attn_kv_a_mqa.weight", "offset": 74798245888, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.31.attn_kv_b.weight", "offset": 74799904768, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.31.attn_output.weight", "offset": 74809341952, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.31.attn_q_a_norm.weight", "offset": 74856527872, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.31.attn_q_a.weight", "offset": 74856534016, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.31.attn_q_b.weight", "offset": 74860957696, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.32.ffn_gate_inp.weight", "offset": 74882191360, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.32.ffn_down_shexp.weight", "offset": 74885468160, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.32.ffn_gate_shexp.weight", "offset": 74894315520, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.32.ffn_up_shexp.weight", "offset": 74903162880, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.32.attn_kv_a_norm.weight", "offset": 74912010240, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.32.attn_kv_a_mqa.weight", "offset": 74912012288, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.32.attn_kv_b.weight", "offset": 74913671168, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.32.attn_output.weight", "offset": 74923108352, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.32.attn_q_a_norm.weight", "offset": 74970294272, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.32.attn_q_a.weight", "offset": 74970300416, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.32.attn_q_b.weight", "offset": 74974724096, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.32.attn_norm.weight", "offset": 74995957760, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.32.ffn_down_exps.weight", "offset": 74995978240, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.32.ffn_gate_exps.weight", "offset": 75703767040, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.32.ffn_up_exps.weight", "offset": 76411555840, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.32.ffn_norm.weight", "offset": 77119344640, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.33.ffn_gate_inp.weight", "offset": 77119365120, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.33.ffn_down_shexp.weight", "offset": 77122641920, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.33.ffn_gate_shexp.weight", "offset": 77135544320, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.33.ffn_up_shexp.weight", "offset": 77144391680, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.33.attn_kv_a_norm.weight", "offset": 77153239040, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.33.attn_kv_a_mqa.weight", "offset": 77153241088, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.33.attn_kv_b.weight", "offset": 77154899968, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.33.attn_output.weight", "offset": 77164337152, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.33.attn_q_a_norm.weight", "offset": 77211523072, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.33.attn_q_a.weight", "offset": 77211529216, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.33.attn_q_b.weight", "offset": 77215952896, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.33.attn_norm.weight", "offset": 77237186560, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.33.ffn_down_exps.weight", "offset": 77237207040, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.33.ffn_gate_exps.weight", "offset": 78269399040, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.33.ffn_up_exps.weight", "offset": 78977187840, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.33.ffn_norm.weight", "offset": 79684976640, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.34.ffn_gate_inp.weight", "offset": 79684997120, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.34.ffn_down_shexp.weight", "offset": 79688273920, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.34.ffn_gate_shexp.weight", "offset": 79697121280, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.34.ffn_up_shexp.weight", "offset": 79705968640, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.34.attn_kv_a_norm.weight", "offset": 79714816000, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.34.attn_kv_a_mqa.weight", "offset": 79714818048, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.34.attn_kv_b.weight", "offset": 79716476928, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.34.attn_output.weight", "offset": 79725914112, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.34.attn_q_a_norm.weight", "offset": 79773100032, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.34.attn_q_a.weight", "offset": 79773106176, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.34.attn_q_b.weight", "offset": 79777529856, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.34.attn_norm.weight", "offset": 79798763520, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.34.ffn_down_exps.weight", "offset": 79798784000, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.34.ffn_gate_exps.weight", "offset": 80506572800, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.34.ffn_up_exps.weight", "offset": 81214361600, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.34.ffn_norm.weight", "offset": 81922150400, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.35.ffn_gate_inp.weight", "offset": 81922170880, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.35.ffn_down_shexp.weight", "offset": 81925447680, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.35.ffn_gate_shexp.weight", "offset": 81934295040, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.35.ffn_up_shexp.weight", "offset": 81943142400, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.35.attn_kv_a_norm.weight", "offset": 81951989760, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.35.attn_kv_a_mqa.weight", "offset": 81951991808, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.35.attn_kv_b.weight", "offset": 81953650688, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.35.attn_output.weight", "offset": 81963087872, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.35.attn_q_a_norm.weight", "offset": 82010273792, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.35.attn_q_a.weight", "offset": 82010279936, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.35.attn_q_b.weight", "offset": 82014703616, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.35.attn_norm.weight", "offset": 82035937280, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.35.ffn_down_exps.weight", "offset": 82035957760, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.35.ffn_gate_exps.weight", "offset": 82743746560, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.35.ffn_up_exps.weight", "offset": 83451535360, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.35.ffn_norm.weight", "offset": 84159324160, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.36.ffn_gate_inp.weight", "offset": 84159344640, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.36.ffn_down_shexp.weight", "offset": 84162621440, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.36.ffn_gate_shexp.weight", "offset": 84175523840, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.36.ffn_up_shexp.weight", "offset": 84184371200, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.36.attn_kv_a_norm.weight", "offset": 84193218560, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.36.attn_kv_a_mqa.weight", "offset": 84193220608, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.36.attn_kv_b.weight", "offset": 84194879488, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.36.attn_output.weight", "offset": 84204316672, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.36.attn_q_a_norm.weight", "offset": 84251502592, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.36.attn_q_a.weight", "offset": 84251508736, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.36.attn_q_b.weight", "offset": 84255932416, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.36.attn_norm.weight", "offset": 84277166080, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.36.ffn_down_exps.weight", "offset": 84277186560, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.36.ffn_gate_exps.weight", "offset": 85309378560, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.36.ffn_up_exps.weight", "offset": 86017167360, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.36.ffn_norm.weight", "offset": 86724956160, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.37.ffn_gate_inp.weight", "offset": 86724976640, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.37.ffn_down_shexp.weight", "offset": 86728253440, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.37.ffn_gate_shexp.weight", "offset": 86737100800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.37.ffn_up_shexp.weight", "offset": 86745948160, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.37.attn_kv_a_norm.weight", "offset": 86754795520, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.37.attn_kv_a_mqa.weight", "offset": 86754797568, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.37.attn_kv_b.weight", "offset": 86756456448, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.37.attn_output.weight", "offset": 86765893632, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.37.attn_q_a_norm.weight", "offset": 86813079552, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.37.attn_q_a.weight", "offset": 86813085696, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.37.attn_q_b.weight", "offset": 86817509376, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.37.attn_norm.weight", "offset": 86838743040, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.37.ffn_down_exps.weight", "offset": 86838763520, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.37.ffn_gate_exps.weight", "offset": 87546552320, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.37.ffn_up_exps.weight", "offset": 88254341120, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.37.ffn_norm.weight", "offset": 88962129920, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.38.ffn_gate_inp.weight", "offset": 88962150400, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.38.ffn_down_shexp.weight", "offset": 88965427200, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.38.ffn_gate_shexp.weight", "offset": 88974274560, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.38.ffn_up_shexp.weight", "offset": 88983121920, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.38.attn_kv_a_norm.weight", "offset": 88991969280, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.38.attn_kv_a_mqa.weight", "offset": 88991971328, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.38.attn_kv_b.weight", "offset": 88993630208, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.38.attn_output.weight", "offset": 89003067392, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.38.attn_q_a_norm.weight", "offset": 89050253312, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.38.attn_q_a.weight", "offset": 89050259456, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.38.attn_q_b.weight", "offset": 89054683136, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.38.attn_norm.weight", "offset": 89075916800, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.38.ffn_down_exps.weight", "offset": 89075937280, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.38.ffn_gate_exps.weight", "offset": 89783726080, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.38.ffn_up_exps.weight", "offset": 90491514880, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.38.ffn_norm.weight", "offset": 91199303680, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.39.ffn_gate_inp.weight", "offset": 91199324160, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.39.ffn_down_shexp.weight", "offset": 91202600960, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.39.ffn_gate_shexp.weight", "offset": 91215503360, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.39.ffn_up_shexp.weight", "offset": 91224350720, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.39.attn_kv_a_norm.weight", "offset": 91233198080, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.39.attn_kv_a_mqa.weight", "offset": 91233200128, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.39.attn_kv_b.weight", "offset": 91234859008, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.39.attn_output.weight", "offset": 91244296192, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.39.attn_q_a_norm.weight", "offset": 91291482112, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.39.attn_q_a.weight", "offset": 91291488256, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.39.attn_q_b.weight", "offset": 91295911936, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.39.attn_norm.weight", "offset": 91317145600, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.39.ffn_down_exps.weight", "offset": 91317166080, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.39.ffn_gate_exps.weight", "offset": 92349358080, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.39.ffn_up_exps.weight", "offset": 93057146880, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.39.ffn_norm.weight", "offset": 93764935680, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.40.ffn_gate_inp.weight", "offset": 93764956160, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.40.ffn_down_shexp.weight", "offset": 93768232960, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.40.ffn_gate_shexp.weight", "offset": 93777080320, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.40.ffn_up_shexp.weight", "offset": 93785927680, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.40.attn_kv_a_norm.weight", "offset": 93794775040, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.40.attn_kv_a_mqa.weight", "offset": 93794777088, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.40.attn_kv_b.weight", "offset": 93796435968, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.40.attn_output.weight", "offset": 93805873152, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.40.attn_q_a_norm.weight", "offset": 93853059072, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.40.attn_q_a.weight", "offset": 93853065216, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.40.attn_q_b.weight", "offset": 93857488896, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.40.attn_norm.weight", "offset": 93878722560, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.40.ffn_down_exps.weight", "offset": 93878743040, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.40.ffn_gate_exps.weight", "offset": 94586531840, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.40.ffn_up_exps.weight", "offset": 95294320640, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.40.ffn_norm.weight", "offset": 96002109440, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.41.ffn_gate_inp.weight", "offset": 96002129920, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.41.ffn_down_shexp.weight", "offset": 96005406720, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.41.ffn_gate_shexp.weight", "offset": 96014254080, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.41.ffn_up_shexp.weight", "offset": 96023101440, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.41.attn_kv_a_norm.weight", "offset": 96031948800, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.41.attn_kv_a_mqa.weight", "offset": 96031950848, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.41.attn_kv_b.weight", "offset": 96033609728, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.41.attn_output.weight", "offset": 96043046912, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.41.attn_q_a_norm.weight", "offset": 96090232832, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.41.attn_q_a.weight", "offset": 96090238976, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.41.attn_q_b.weight", "offset": 96094662656, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.41.attn_norm.weight", "offset": 96115896320, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.41.ffn_down_exps.weight", "offset": 96115916800, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.41.ffn_gate_exps.weight", "offset": 96823705600, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.41.ffn_up_exps.weight", "offset": 97531494400, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.41.ffn_norm.weight", "offset": 98239283200, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.42.ffn_gate_inp.weight", "offset": 98239303680, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.42.ffn_down_shexp.weight", "offset": 98242580480, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.42.ffn_gate_shexp.weight", "offset": 98255482880, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.42.ffn_up_shexp.weight", "offset": 98264330240, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.42.attn_kv_a_norm.weight", "offset": 98273177600, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.42.attn_kv_a_mqa.weight", "offset": 98273179648, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.42.attn_kv_b.weight", "offset": 98274838528, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.42.attn_output.weight", "offset": 98284275712, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.42.attn_q_a_norm.weight", "offset": 98331461632, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.42.attn_q_a.weight", "offset": 98331467776, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.42.attn_q_b.weight", "offset": 98335891456, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.42.attn_norm.weight", "offset": 98357125120, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.42.ffn_down_exps.weight", "offset": 98357145600, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.42.ffn_gate_exps.weight", "offset": 99389337600, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.42.ffn_up_exps.weight", "offset": 100097126400, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.42.ffn_norm.weight", "offset": 100804915200, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.43.ffn_gate_inp.weight", "offset": 100804935680, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.43.ffn_down_shexp.weight", "offset": 100808212480, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.43.ffn_gate_shexp.weight", "offset": 100817059840, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.43.ffn_up_shexp.weight", "offset": 100825907200, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.43.attn_kv_a_norm.weight", "offset": 100834754560, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.43.attn_kv_a_mqa.weight", "offset": 100834756608, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.43.attn_kv_b.weight", "offset": 100836415488, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.43.attn_output.weight", "offset": 100845852672, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.43.attn_q_a_norm.weight", "offset": 100893038592, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.43.attn_q_a.weight", "offset": 100893044736, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.43.attn_q_b.weight", "offset": 100897468416, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.43.attn_norm.weight", "offset": 100918702080, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.43.ffn_down_exps.weight", "offset": 100918722560, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.43.ffn_gate_exps.weight", "offset": 101626511360, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.43.ffn_up_exps.weight", "offset": 102334300160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.43.ffn_norm.weight", "offset": 103042088960, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.44.attn_norm.weight", "offset": 103042109440, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.44.ffn_down_exps.weight", "offset": 103042129920, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.44.ffn_gate_exps.weight", "offset": 103749918720, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.44.ffn_up_exps.weight", "offset": 104457707520, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.44.ffn_gate_inp.weight", "offset": 105165496320, "shape": [ 5120, 160 ], "size": 707788800, "type": 0 }, { "name": "blk.44.ffn_down_shexp.weight", "offset": 105168773120, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.44.ffn_gate_shexp.weight", "offset": 105177620480, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.44.ffn_up_shexp.weight", "offset": 105186467840, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.44.ffn_norm.weight", "offset": 105195315200, "shape": [ 5120 ], "size": 8847360, "type": 0 }, { "name": "blk.44.attn_kv_a_norm.weight", "offset": 105195335680, "shape": [ 512 ], "size": 20480, "type": 0 }, { "name": "blk.44.attn_kv_a_mqa.weight", "offset": 105195337728, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.44.attn_kv_b.weight", "offset": 105196996608, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.44.attn_output.weight", "offset": 105206433792, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.44.attn_q_a_norm.weight", "offset": 105253619712, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.44.attn_q_a.weight", "offset": 105253625856, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.44.attn_q_b.weight", "offset": 105258049536, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.45.ffn_gate_inp.weight", "offset": 105279283200, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.45.ffn_down_shexp.weight", "offset": 105282560000, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.45.ffn_gate_shexp.weight", "offset": 105295462400, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.45.ffn_up_shexp.weight", "offset": 105304309760, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.45.attn_kv_a_norm.weight", "offset": 105313157120, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.45.attn_kv_a_mqa.weight", "offset": 105313159168, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.45.attn_kv_b.weight", "offset": 105314818048, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.45.attn_output.weight", "offset": 105324255232, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.45.attn_q_a_norm.weight", "offset": 105371441152, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.45.attn_q_a.weight", "offset": 105371447296, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.45.attn_q_b.weight", "offset": 105375870976, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.45.attn_norm.weight", "offset": 105397104640, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.45.ffn_down_exps.weight", "offset": 105397125120, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.45.ffn_gate_exps.weight", "offset": 106429317120, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.45.ffn_up_exps.weight", "offset": 107137105920, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.45.ffn_norm.weight", "offset": 107844894720, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.46.ffn_gate_inp.weight", "offset": 107844915200, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.46.ffn_down_shexp.weight", "offset": 107848192000, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.46.ffn_gate_shexp.weight", "offset": 107857039360, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.46.ffn_up_shexp.weight", "offset": 107865886720, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.46.attn_kv_a_norm.weight", "offset": 107874734080, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.46.attn_kv_a_mqa.weight", "offset": 107874736128, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.46.attn_kv_b.weight", "offset": 107876395008, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.46.attn_output.weight", "offset": 107885832192, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.46.attn_q_a_norm.weight", "offset": 107933018112, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.46.attn_q_a.weight", "offset": 107933024256, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.46.attn_q_b.weight", "offset": 107937447936, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.46.attn_norm.weight", "offset": 107958681600, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.46.ffn_down_exps.weight", "offset": 107958702080, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.46.ffn_gate_exps.weight", "offset": 108666490880, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.46.ffn_up_exps.weight", "offset": 109374279680, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.46.ffn_norm.weight", "offset": 110082068480, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.47.ffn_gate_inp.weight", "offset": 110082088960, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.47.ffn_down_shexp.weight", "offset": 110085365760, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.47.ffn_gate_shexp.weight", "offset": 110094213120, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.47.ffn_up_shexp.weight", "offset": 110103060480, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.47.attn_kv_a_norm.weight", "offset": 110111907840, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.47.attn_kv_a_mqa.weight", "offset": 110111909888, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.47.attn_kv_b.weight", "offset": 110113568768, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.47.attn_output.weight", "offset": 110123005952, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.47.attn_q_a_norm.weight", "offset": 110170191872, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.47.attn_q_a.weight", "offset": 110170198016, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.47.attn_q_b.weight", "offset": 110174621696, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.47.attn_norm.weight", "offset": 110195855360, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.47.ffn_down_exps.weight", "offset": 110195875840, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.47.ffn_gate_exps.weight", "offset": 110903664640, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.47.ffn_up_exps.weight", "offset": 111611453440, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.47.ffn_norm.weight", "offset": 112319242240, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.48.ffn_gate_inp.weight", "offset": 112319262720, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.48.ffn_down_shexp.weight", "offset": 112322539520, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.48.ffn_gate_shexp.weight", "offset": 112335441920, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.48.ffn_up_shexp.weight", "offset": 112344289280, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.48.attn_kv_a_norm.weight", "offset": 112353136640, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.48.attn_kv_a_mqa.weight", "offset": 112353138688, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.48.attn_kv_b.weight", "offset": 112354797568, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.48.attn_output.weight", "offset": 112364234752, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.48.attn_q_a_norm.weight", "offset": 112411420672, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.48.attn_q_a.weight", "offset": 112411426816, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.48.attn_q_b.weight", "offset": 112415850496, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.48.attn_norm.weight", "offset": 112437084160, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "\u0000\u0000blk.56.attn_q_b.weight\u0002\u0000\u0000", "offset": 112437104640, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.48.ffn_gate_exps.weight", "offset": 113469296640, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.48.ffn_up_exps.weight", "offset": 114177085440, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.48.ffn_norm.weight", "offset": 114884874240, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.49.ffn_gate_inp.weight", "offset": 114884894720, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.49.ffn_down_shexp.weight", "offset": 114888171520, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.49.ffn_gate_shexp.weight", "offset": 114897018880, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.49.ffn_up_shexp.weight", "offset": 114905866240, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.49.attn_kv_a_norm.weight", "offset": 114914713600, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.49.attn_kv_a_mqa.weight", "offset": 114914715648, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.49.attn_kv_b.weight", "offset": 114916374528, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.49.attn_output.weight", "offset": 114925811712, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.49.attn_q_a_norm.weight", "offset": 114972997632, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.49.attn_q_a.weight", "offset": 114973003776, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.49.attn_q_b.weight", "offset": 114977427456, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.49.attn_norm.weight", "offset": 114998661120, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.49.ffn_down_exps.weight", "offset": 114998681600, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.49.ffn_gate_exps.weight", "offset": 115706470400, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.49.ffn_up_exps.weight", "offset": 116414259200, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.49.ffn_norm.weight", "offset": 117122048000, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.50.ffn_gate_inp.weight", "offset": 117122068480, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.50.ffn_down_shexp.weight", "offset": 117125345280, "shape": [ 3072, 5120 ], "size": 3276800, "type": 12 }, { "name": "blk.50.ffn_gate_shexp.weight", "offset": 117134192640, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.50.ffn_up_shexp.weight", "offset": 117143040000, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.50.attn_kv_a_norm.weight", "offset": 117151887360, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.50.attn_kv_a_mqa.weight", "offset": 117151889408, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.50.attn_kv_b.weight", "offset": 117153548288, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.50.attn_output.weight", "offset": 117162985472, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.50.attn_q_a_norm.weight", "offset": 117210171392, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.50.attn_q_a.weight", "offset": 117210177536, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.50.attn_q_b.weight", "offset": 117214601216, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.50.attn_norm.weight", "offset": 117235834880, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.50.ffn_down_exps.weight", "offset": 117235855360, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 12 }, { "name": "blk.50.ffn_gate_exps.weight", "offset": 117943644160, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.50.ffn_up_exps.weight", "offset": 118651432960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.50.ffn_norm.weight", "offset": 119359221760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.51.ffn_gate_inp.weight", "offset": 119359242240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.51.ffn_down_shexp.weight", "offset": 119362519040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.51.ffn_gate_shexp.weight", "offset": 119375421440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.51.ffn_up_shexp.weight", "offset": 119384268800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.51.attn_kv_a_norm.weight", "offset": 119393116160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.51.attn_kv_a_mqa.weight", "offset": 119393118208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.51.attn_kv_b.weight", "offset": 119394777088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.51.attn_output.weight", "offset": 119404214272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.51.attn_q_a_norm.weight", "offset": 119451400192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.51.attn_q_a.weight", "offset": 119451406336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.51.attn_q_b.weight", "offset": 119455830016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.51.attn_norm.weight", "offset": 119477063680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.51.ffn_down_exps.weight", "offset": 119477084160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.51.ffn_gate_exps.weight", "offset": 120509276160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.51.ffn_up_exps.weight", "offset": 121217064960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.51.ffn_norm.weight", "offset": 121924853760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.52.ffn_gate_inp.weight", "offset": 121924874240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.52.ffn_down_shexp.weight", "offset": 121928151040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.52.ffn_gate_shexp.weight", "offset": 121941053440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.52.ffn_up_shexp.weight", "offset": 121949900800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.52.attn_kv_a_norm.weight", "offset": 121958748160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.52.attn_kv_a_mqa.weight", "offset": 121958750208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.52.attn_kv_b.weight", "offset": 121960409088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.52.attn_output.weight", "offset": 121969846272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.52.attn_q_a_norm.weight", "offset": 122017032192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.52.attn_q_a.weight", "offset": 122017038336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.52.attn_q_b.weight", "offset": 122021462016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.52.attn_norm.weight", "offset": 122042695680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.52.ffn_down_exps.weight", "offset": 122042716160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.52.ffn_gate_exps.weight", "offset": 123074908160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.52.ffn_up_exps.weight", "offset": 123782696960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.52.ffn_norm.weight", "offset": 124490485760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.53.ffn_gate_inp.weight", "offset": 124490506240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.53.ffn_down_shexp.weight", "offset": 124493783040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.53.ffn_gate_shexp.weight", "offset": 124506685440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.53.ffn_up_shexp.weight", "offset": 124515532800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.53.attn_kv_a_norm.weight", "offset": 124524380160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.53.attn_kv_a_mqa.weight", "offset": 124524382208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.53.attn_kv_b.weight", "offset": 124526041088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.53.attn_output.weight", "offset": 124535478272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.53.attn_q_a_norm.weight", "offset": 124582664192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.53.attn_q_a.weight", "offset": 124582670336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.53.attn_q_b.weight", "offset": 124587094016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.53.attn_norm.weight", "offset": 124608327680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.53.ffn_down_exps.weight", "offset": 124608348160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.53.ffn_gate_exps.weight", "offset": 125640540160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.53.ffn_up_exps.weight", "offset": 126348328960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.53.ffn_norm.weight", "offset": 127056117760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.54.ffn_gate_inp.weight", "offset": 127056138240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.54.ffn_down_shexp.weight", "offset": 127059415040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.54.ffn_gate_shexp.weight", "offset": 127072317440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.54.ffn_up_shexp.weight", "offset": 127081164800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.54.attn_kv_a_norm.weight", "offset": 127090012160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.54.attn_kv_a_mqa.weight", "offset": 127090014208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.54.attn_kv_b.weight", "offset": 127091673088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.54.attn_output.weight", "offset": 127101110272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.54.attn_q_a_norm.weight", "offset": 127148296192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.54.attn_q_a.weight", "offset": 127148302336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.54.attn_q_b.weight", "offset": 127152726016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.54.attn_norm.weight", "offset": 127173959680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.54.ffn_down_exps.weight", "offset": 127173980160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.54.ffn_gate_exps.weight", "offset": 128206172160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.54.ffn_up_exps.weight", "offset": 128913960960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.54.ffn_norm.weight", "offset": 129621749760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.55.ffn_gate_inp.weight", "offset": 129621770240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.55.ffn_down_shexp.weight", "offset": 129625047040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.55.ffn_gate_shexp.weight", "offset": 129637949440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.55.ffn_up_shexp.weight", "offset": 129646796800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.55.attn_kv_a_norm.weight", "offset": 129655644160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.55.attn_kv_a_mqa.weight", "offset": 129655646208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.55.attn_kv_b.weight", "offset": 129657305088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.55.attn_output.weight", "offset": 129666742272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.55.attn_q_a_norm.weight", "offset": 129713928192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.55.attn_q_a.weight", "offset": 129713934336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.55.attn_q_b.weight", "offset": 129718358016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.55.attn_norm.weight", "offset": 129739591680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.55.ffn_down_exps.weight", "offset": 129739612160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.55.ffn_gate_exps.weight", "offset": 130771804160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.55.ffn_up_exps.weight", "offset": 131479592960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.55.ffn_norm.weight", "offset": 132187381760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.56.ffn_gate_inp.weight", "offset": 132187402240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.56.ffn_down_shexp.weight", "offset": 132190679040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.56.ffn_gate_shexp.weight", "offset": 132203581440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.56.ffn_up_shexp.weight", "offset": 132212428800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.56.attn_kv_a_norm.weight", "offset": 132221276160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.56.attn_kv_a_mqa.weight", "offset": 132221278208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.56.attn_kv_b.weight", "offset": 132222937088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.56.attn_output.weight", "offset": 132232374272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.56.attn_q_a_norm.weight", "offset": 132279560192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.56.attn_q_a.weight", "offset": 132279566336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.56.attn_q_b.weight", "offset": 132283990016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.56.attn_norm.weight", "offset": 132305223680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.56.ffn_down_exps.weight", "offset": 132305244160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.56.ffn_gate_exps.weight", "offset": 133337436160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.56.ffn_up_exps.weight", "offset": 134045224960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.56.ffn_norm.weight", "offset": 134753013760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.57.attn_norm.weight", "offset": 134753034240, "shape": [ 5120 ], "size": 20480, "type": 0 }, { "name": "blk.57.ffn_down_exps.weight", "offset": 134753054720, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.57.ffn_gate_exps.weight", "offset": 135785246720, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.57.ffn_up_exps.weight", "offset": 136493035520, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.57.ffn_gate_inp.weight", "offset": 137200824320, "shape": [ 5120, 160 ], "size": 707788800, "type": 0 }, { "name": "blk.57.ffn_down_shexp.weight", "offset": 137204101120, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.57.ffn_gate_shexp.weight", "offset": 137217003520, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.57.ffn_up_shexp.weight", "offset": 137225850880, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.57.ffn_norm.weight", "offset": 137234698240, "shape": [ 5120 ], "size": 8847360, "type": 0 }, { "name": "blk.57.attn_kv_a_norm.weight", "offset": 137234718720, "shape": [ 512 ], "size": 20480, "type": 0 }, { "name": "blk.57.attn_kv_a_mqa.weight", "offset": 137234720768, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.57.attn_kv_b.weight", "offset": 137236379648, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.57.attn_output.weight", "offset": 137245816832, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.57.attn_q_a_norm.weight", "offset": 137293002752, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.57.attn_q_a.weight", "offset": 137293008896, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.57.attn_q_b.weight", "offset": 137297432576, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.58.ffn_gate_inp.weight", "offset": 137318666240, "shape": [ 5120, 160 ], "size": 21233664, "type": 0 }, { "name": "blk.58.ffn_down_shexp.weight", "offset": 137321943040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.58.ffn_gate_shexp.weight", "offset": 137334845440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.58.ffn_up_shexp.weight", "offset": 137343692800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.58.attn_kv_a_norm.weight", "offset": 137352540160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.58.attn_kv_a_mqa.weight", "offset": 137352542208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.58.attn_kv_b.weight", "offset": 137354201088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.58.attn_output.weight", "offset": 137363638272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.58.attn_q_a_norm.weight", "offset": 137410824192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.58.attn_q_a.weight", "offset": 137410830336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.58.attn_q_b.weight", "offset": 137415254016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.58.attn_norm.weight", "offset": 137436487680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.58.ffn_down_exps.weight", "offset": 137436508160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.58.ffn_gate_exps.weight", "offset": 138468700160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.58.ffn_up_exps.weight", "offset": 139176488960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.58.ffn_norm.weight", "offset": 139884277760, "shape": [ 5120 ], "size": 707788800, "type": 0 }, { "name": "blk.59.ffn_gate_inp.weight", "offset": 139884298240, "shape": [ 5120, 160 ], "size": 20480, "type": 0 }, { "name": "blk.59.ffn_down_shexp.weight", "offset": 139887575040, "shape": [ 3072, 5120 ], "size": 3276800, "type": 14 }, { "name": "blk.59.ffn_gate_shexp.weight", "offset": 139900477440, "shape": [ 5120, 3072 ], "size": 12902400, "type": 12 }, { "name": "blk.59.ffn_up_shexp.weight", "offset": 139909324800, "shape": [ 5120, 3072 ], "size": 8847360, "type": 12 }, { "name": "blk.59.attn_kv_a_norm.weight", "offset": 139918172160, "shape": [ 512 ], "size": 8847360, "type": 0 }, { "name": "blk.59.attn_kv_a_mqa.weight", "offset": 139918174208, "shape": [ 5120, 576 ], "size": 2048, "type": 12 }, { "name": "blk.59.attn_kv_b.weight", "offset": 139919833088, "shape": [ 512, 32768 ], "size": 1658880, "type": 12 }, { "name": "blk.59.attn_output.weight", "offset": 139929270272, "shape": [ 16384, 5120 ], "size": 9437184, "type": 12 }, { "name": "blk.59.attn_q_a_norm.weight", "offset": 139976456192, "shape": [ 1536 ], "size": 47185920, "type": 0 }, { "name": "blk.59.attn_q_a.weight", "offset": 139976462336, "shape": [ 5120, 1536 ], "size": 6144, "type": 12 }, { "name": "blk.59.attn_q_b.weight", "offset": 139980886016, "shape": [ 1536, 24576 ], "size": 4423680, "type": 12 }, { "name": "blk.59.attn_norm.weight", "offset": 140002119680, "shape": [ 5120 ], "size": 21233664, "type": 0 }, { "name": "blk.59.ffn_down_exps.weight", "offset": 140002140160, "shape": [ 1536, 5120, 160 ], "size": 20480, "type": 14 }, { "name": "blk.59.ffn_gate_exps.weight", "offset": 141034332160, "shape": [ 5120, 1536, 160 ], "size": 1032192000, "type": 12 }, { "name": "blk.59.ffn_up_exps.weight", "offset": 141742120960, "shape": [ 5120, 1536, 160 ], "size": 707788800, "type": 12 }, { "name": "blk.59.ffn_norm.weight", "offset": 142449909760, "shape": [ 5120 ], "size": 707788800, "type": 0 } ], "version": 3 }