An open-source Mixture-of-Experts code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.

16B 236B

111.5K Pulls Updated 12 days ago

{ "metadata": { "deepseek2.attention.head_count": 16, "deepseek2.attention.head_count_kv": 16, "deepseek2.attention.key_length": 192, "deepseek2.attention.kv_lora_rank": 512, "deepseek2.attention.layer_norm_rms_epsilon": 0.000001, "deepseek2.attention.value_length": 128, "deepseek2.block_count": 27, "deepseek2.context_length": 163840, "deepseek2.embedding_length": 2048, "deepseek2.expert_count": 64, "deepseek2.expert_feed_forward_length": 1408, "deepseek2.expert_shared_count": 2, "deepseek2.expert_used_count": 6, "deepseek2.expert_weights_scale": 1, "deepseek2.feed_forward_length": 10944, "deepseek2.leading_dense_block_count": 1, "deepseek2.rope.dimension_count": 64, "deepseek2.rope.freq_base": 10000, "deepseek2.rope.scaling.factor": 40, "deepseek2.rope.scaling.original_context_length": 4096, "deepseek2.rope.scaling.type": "yarn", "deepseek2.rope.scaling.yarn_log_multiplier": 0.0707, "deepseek2.vocab_size": 102400, "general.architecture": "deepseek2", "general.file_type": 15, "general.name": "DeepSeek-Coder-V2-Lite-Base", "general.quantization_version": 2, "tokenizer.ggml.add_bos_token": true, "tokenizer.ggml.add_eos_token": false, "tokenizer.ggml.bos_token_id": 100000, "tokenizer.ggml.eos_token_id": 100001, "tokenizer.ggml.merges": "... (99757 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.padding_token_id": 100001, "tokenizer.ggml.pre": "deepseek-llm", "tokenizer.ggml.token_type": "... (102400 values)", "tokenizer.ggml.tokens": "... (102400 values)" }, "num_params": 15706484224, "tensors": [ { "name": "token_embd.weight", "offset": 172032000, "shape": [ 2048, 102400 ], "size": 172032000, "type": 12 }, { "name": "blk.0.attn_norm.weight", "offset": 289996800, "shape": [ 2048 ], "size": 117964800, "type": 0 }, { "name": "blk.0.ffn_down.weight", "offset": 290004992, "shape": [ 10944, 2048 ], "size": 8192, "type": 8 }, { "name": "blk.0.ffn_gate.weight", "offset": 313819136, "shape": [ 2048, 10944 ], "size": 23814144, "type": 12 }, { "name": "blk.0.ffn_up.weight", "offset": 326426624, "shape": [ 2048, 10944 ], "size": 12607488, "type": 12 }, { "name": "blk.0.ffn_norm.weight", "offset": 339034112, "shape": [ 2048 ], "size": 12607488, "type": 0 }, { "name": "blk.0.attn_kv_a_norm.weight", "offset": 339042304, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.0.attn_kv_a_mqa.weight", "offset": 339044352, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.0.attn_kv_b.weight", "offset": 339707904, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.0.attn_output.weight", "offset": 340887552, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.0.attn_q.weight", "offset": 343246848, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.1.attn_norm.weight", "offset": 346785792, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.1.ffn_down_exps.weight", "offset": 346793984, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.1.ffn_gate_exps.weight", "offset": 542877696, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.1.ffn_up_exps.weight", "offset": 646686720, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.1.ffn_gate_inp.weight", "offset": 750495744, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.1.ffn_down_shexp.weight", "offset": 751020032, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.1.ffn_gate_shexp.weight", "offset": 755750912, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.1.ffn_up_shexp.weight", "offset": 758994944, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.1.ffn_norm.weight", "offset": 762238976, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.1.attn_kv_a_norm.weight", "offset": 762247168, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_kv_a_mqa.weight", "offset": 762249216, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.1.attn_kv_b.weight", "offset": 762912768, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.1.attn_output.weight", "offset": 764092416, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.1.attn_q.weight", "offset": 766451712, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.2.attn_norm.weight", "offset": 769990656, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.2.ffn_down_exps.weight", "offset": 769998848, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.2.ffn_gate_exps.weight", "offset": 966082560, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.2.ffn_up_exps.weight", "offset": 1069891584, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.2.ffn_gate_inp.weight", "offset": 1173700608, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.2.ffn_down_shexp.weight", "offset": 1174224896, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.2.ffn_gate_shexp.weight", "offset": 1178955776, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.2.ffn_up_shexp.weight", "offset": 1182199808, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.2.ffn_norm.weight", "offset": 1185443840, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.2.attn_kv_a_norm.weight", "offset": 1185452032, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_kv_a_mqa.weight", "offset": 1185454080, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.2.attn_kv_b.weight", "offset": 1186117632, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.2.attn_output.weight", "offset": 1187297280, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.2.attn_q.weight", "offset": 1189656576, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.3.attn_norm.weight", "offset": 1193195520, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.3.ffn_down_exps.weight", "offset": 1193203712, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.3.ffn_gate_exps.weight", "offset": 1320081408, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.3.ffn_up_exps.weight", "offset": 1423890432, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.3.ffn_gate_inp.weight", "offset": 1527699456, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.3.ffn_down_shexp.weight", "offset": 1528223744, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.3.ffn_gate_shexp.weight", "offset": 1531467776, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.3.ffn_up_shexp.weight", "offset": 1534711808, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.3.ffn_norm.weight", "offset": 1537955840, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.3.attn_kv_a_norm.weight", "offset": 1537964032, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_kv_a_mqa.weight", "offset": 1537966080, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.3.attn_kv_b.weight", "offset": 1538629632, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.3.attn_output.weight", "offset": 1539809280, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.3.attn_q.weight", "offset": 1542168576, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.4.attn_norm.weight", "offset": 1545707520, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.4.ffn_down_exps.weight", "offset": 1545715712, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.4.ffn_gate_exps.weight", "offset": 1672593408, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.4.ffn_up_exps.weight", "offset": 1776402432, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.4.ffn_gate_inp.weight", "offset": 1880211456, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.4.ffn_down_shexp.weight", "offset": 1880735744, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.4.ffn_gate_shexp.weight", "offset": 1883979776, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.4.ffn_up_shexp.weight", "offset": 1887223808, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.4.ffn_norm.weight", "offset": 1890467840, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.4.attn_kv_a_norm.weight", "offset": 1890476032, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_kv_a_mqa.weight", "offset": 1890478080, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.4.attn_kv_b.weight", "offset": 1891141632, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.4.attn_output.weight", "offset": 1892321280, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.4.attn_q.weight", "offset": 1894680576, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.5.attn_norm.weight", "offset": 1898219520, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.5.ffn_down_exps.weight", "offset": 1898227712, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.5.ffn_gate_exps.weight", "offset": 2094311424, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.5.ffn_up_exps.weight", "offset": 2198120448, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.5.ffn_gate_inp.weight", "offset": 2301929472, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.5.ffn_down_shexp.weight", "offset": 2302453760, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.5.ffn_gate_shexp.weight", "offset": 2307184640, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.5.ffn_up_shexp.weight", "offset": 2310428672, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.5.ffn_norm.weight", "offset": 2313672704, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.5.attn_kv_a_norm.weight", "offset": 2313680896, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_kv_a_mqa.weight", "offset": 2313682944, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.5.attn_kv_b.weight", "offset": 2314346496, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.5.attn_output.weight", "offset": 2315526144, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.5.attn_q.weight", "offset": 2317885440, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.6.attn_norm.weight", "offset": 2321424384, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.6.ffn_down_exps.weight", "offset": 2321432576, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.6.ffn_gate_exps.weight", "offset": 2448310272, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.6.ffn_up_exps.weight", "offset": 2552119296, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.6.ffn_gate_inp.weight", "offset": 2655928320, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.6.ffn_down_shexp.weight", "offset": 2656452608, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.6.ffn_gate_shexp.weight", "offset": 2659696640, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.6.ffn_up_shexp.weight", "offset": 2662940672, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.6.ffn_norm.weight", "offset": 2666184704, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.6.attn_kv_a_norm.weight", "offset": 2666192896, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_kv_a_mqa.weight", "offset": 2666194944, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.6.attn_kv_b.weight", "offset": 2666858496, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.6.attn_output.weight", "offset": 2668038144, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.6.attn_q.weight", "offset": 2670397440, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.7.ffn_gate_inp.weight", "offset": 2673936384, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.7.ffn_down_shexp.weight", "offset": 2674460672, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.7.ffn_gate_shexp.weight", "offset": 2677704704, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.7.ffn_up_shexp.weight", "offset": 2680948736, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.7.attn_kv_a_norm.weight", "offset": 2684192768, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.7.attn_kv_a_mqa.weight", "offset": 2684194816, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.7.attn_kv_b.weight", "offset": 2684858368, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.7.attn_output.weight", "offset": 2686038016, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.7.attn_q.weight", "offset": 2688397312, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "output_norm.weight", "offset": 2691936256, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.10.attn_norm.weight", "offset": 2691944448, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.ffn_down_exps.weight", "offset": 2691952640, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.10.ffn_gate_exps.weight", "offset": 2818830336, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.10.ffn_up_exps.weight", "offset": 2922639360, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.10.ffn_gate_inp.weight", "offset": 3026448384, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.10.ffn_down_shexp.weight", "offset": 3026972672, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.10.ffn_gate_shexp.weight", "offset": 3030216704, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.10.ffn_up_shexp.weight", "offset": 3033460736, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.10.ffn_norm.weight", "offset": 3036704768, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.10.attn_kv_a_norm.weight", "offset": 3036712960, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_kv_a_mqa.weight", "offset": 3036715008, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.10.attn_kv_b.weight", "offset": 3037378560, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.10.attn_output.weight", "offset": 3038558208, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.10.attn_q.weight", "offset": 3040917504, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.11.attn_norm.weight", "offset": 3044456448, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.11.ffn_down_exps.weight", "offset": 3044464640, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.11.ffn_gate_exps.weight", "offset": 3240548352, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.11.ffn_up_exps.weight", "offset": 3344357376, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.11.ffn_gate_inp.weight", "offset": 3448166400, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.11.ffn_down_shexp.weight", "offset": 3448690688, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.11.ffn_gate_shexp.weight", "offset": 3453421568, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.11.ffn_up_shexp.weight", "offset": 3456665600, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.11.ffn_norm.weight", "offset": 3459909632, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.11.attn_kv_a_norm.weight", "offset": 3459917824, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_kv_a_mqa.weight", "offset": 3459919872, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.11.attn_kv_b.weight", "offset": 3460583424, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.11.attn_output.weight", "offset": 3461763072, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.11.attn_q.weight", "offset": 3464122368, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.12.attn_norm.weight", "offset": 3467661312, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.12.ffn_down_exps.weight", "offset": 3467669504, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.12.ffn_gate_exps.weight", "offset": 3594547200, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.12.ffn_up_exps.weight", "offset": 3698356224, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.12.ffn_gate_inp.weight", "offset": 3802165248, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.12.ffn_down_shexp.weight", "offset": 3802689536, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.12.ffn_gate_shexp.weight", "offset": 3805933568, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.12.ffn_up_shexp.weight", "offset": 3809177600, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.12.ffn_norm.weight", "offset": 3812421632, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.12.attn_kv_a_norm.weight", "offset": 3812429824, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_kv_a_mqa.weight", "offset": 3812431872, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.12.attn_kv_b.weight", "offset": 3813095424, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.12.attn_output.weight", "offset": 3814275072, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.12.attn_q.weight", "offset": 3816634368, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.13.attn_norm.weight", "offset": 3820173312, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.13.ffn_down_exps.weight", "offset": 3820181504, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.13.ffn_gate_exps.weight", "offset": 3947059200, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.13.ffn_up_exps.weight", "offset": 4050868224, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.13.ffn_gate_inp.weight", "offset": 4154677248, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.13.ffn_down_shexp.weight", "offset": 4155201536, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.13.ffn_gate_shexp.weight", "offset": 4158445568, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.13.ffn_up_shexp.weight", "offset": 4161689600, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.13.ffn_norm.weight", "offset": 4164933632, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.13.attn_kv_a_norm.weight", "offset": 4164941824, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_kv_a_mqa.weight", "offset": 4164943872, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.13.attn_kv_b.weight", "offset": 4165607424, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.13.attn_output.weight", "offset": 4166787072, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.13.attn_q.weight", "offset": 4169146368, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.14.ffn_gate_inp.weight", "offset": 4172685312, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.14.ffn_down_shexp.weight", "offset": 4173209600, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.14.ffn_gate_shexp.weight", "offset": 4177940480, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.14.ffn_up_shexp.weight", "offset": 4181184512, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.14.attn_kv_a_norm.weight", "offset": 4184428544, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.14.attn_kv_a_mqa.weight", "offset": 4184430592, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.14.attn_kv_b.weight", "offset": 4185094144, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.14.attn_output.weight", "offset": 4186273792, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.14.attn_q.weight", "offset": 4188633088, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.7.attn_norm.weight", "offset": 4192172032, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.7.ffn_down_exps.weight", "offset": 4192180224, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.7.ffn_gate_exps.weight", "offset": 4319057920, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.7.ffn_up_exps.weight", "offset": 4422866944, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.7.ffn_norm.weight", "offset": 4526675968, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.8.attn_norm.weight", "offset": 4526684160, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.ffn_down_exps.weight", "offset": 4526692352, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.8.ffn_gate_exps.weight", "offset": 4722776064, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.8.ffn_up_exps.weight", "offset": 4826585088, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.8.ffn_gate_inp.weight", "offset": 4930394112, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.8.ffn_down_shexp.weight", "offset": 4930918400, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.8.ffn_gate_shexp.weight", "offset": 4935649280, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.8.ffn_up_shexp.weight", "offset": 4938893312, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.8.ffn_norm.weight", "offset": 4942137344, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.8.attn_kv_a_norm.weight", "offset": 4942145536, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_kv_a_mqa.weight", "offset": 4942147584, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.8.attn_kv_b.weight", "offset": 4942811136, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.8.attn_output.weight", "offset": 4943990784, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.8.attn_q.weight", "offset": 4946350080, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.9.attn_norm.weight", "offset": 4949889024, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.9.ffn_down_exps.weight", "offset": 4949897216, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.9.ffn_gate_exps.weight", "offset": 5076774912, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.9.ffn_up_exps.weight", "offset": 5180583936, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.9.ffn_gate_inp.weight", "offset": 5284392960, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.9.ffn_down_shexp.weight", "offset": 5284917248, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.9.ffn_gate_shexp.weight", "offset": 5288161280, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.9.ffn_up_shexp.weight", "offset": 5291405312, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.9.ffn_norm.weight", "offset": 5294649344, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.9.attn_kv_a_norm.weight", "offset": 5294657536, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_kv_a_mqa.weight", "offset": 5294659584, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.9.attn_kv_b.weight", "offset": 5295323136, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.9.attn_output.weight", "offset": 5296502784, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.9.attn_q.weight", "offset": 5298862080, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.14.attn_norm.weight", "offset": 5302401024, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.14.ffn_down_exps.weight", "offset": 5302409216, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.14.ffn_gate_exps.weight", "offset": 5498492928, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.14.ffn_up_exps.weight", "offset": 5602301952, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.14.ffn_norm.weight", "offset": 5706110976, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.15.attn_norm.weight", "offset": 5706119168, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.ffn_down_exps.weight", "offset": 5706127360, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.15.ffn_gate_exps.weight", "offset": 5833005056, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.15.ffn_up_exps.weight", "offset": 5936814080, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.15.ffn_gate_inp.weight", "offset": 6040623104, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.15.ffn_down_shexp.weight", "offset": 6041147392, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.15.ffn_gate_shexp.weight", "offset": 6044391424, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.15.ffn_up_shexp.weight", "offset": 6047635456, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.15.ffn_norm.weight", "offset": 6050879488, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.15.attn_kv_a_norm.weight", "offset": 6050887680, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_kv_a_mqa.weight", "offset": 6050889728, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.15.attn_kv_b.weight", "offset": 6051553280, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.15.attn_output.weight", "offset": 6052732928, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.15.attn_q.weight", "offset": 6055092224, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.16.attn_norm.weight", "offset": 6058631168, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.16.ffn_down_exps.weight", "offset": 6058639360, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.16.ffn_gate_exps.weight", "offset": 6185517056, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.16.ffn_up_exps.weight", "offset": 6289326080, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.16.ffn_gate_inp.weight", "offset": 6393135104, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.16.ffn_down_shexp.weight", "offset": 6393659392, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.16.ffn_gate_shexp.weight", "offset": 6396903424, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.16.ffn_up_shexp.weight", "offset": 6400147456, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.16.ffn_norm.weight", "offset": 6403391488, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.16.attn_kv_a_norm.weight", "offset": 6403399680, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_kv_a_mqa.weight", "offset": 6403401728, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.16.attn_kv_b.weight", "offset": 6404065280, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.16.attn_output.weight", "offset": 6405244928, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.16.attn_q.weight", "offset": 6407604224, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.17.attn_norm.weight", "offset": 6411143168, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.17.ffn_down_exps.weight", "offset": 6411151360, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.17.ffn_gate_exps.weight", "offset": 6607235072, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.17.ffn_up_exps.weight", "offset": 6711044096, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.17.ffn_gate_inp.weight", "offset": 6814853120, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.17.ffn_down_shexp.weight", "offset": 6815377408, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.17.ffn_gate_shexp.weight", "offset": 6820108288, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.17.ffn_up_shexp.weight", "offset": 6823352320, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.17.ffn_norm.weight", "offset": 6826596352, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.17.attn_kv_a_norm.weight", "offset": 6826604544, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_kv_a_mqa.weight", "offset": 6826606592, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.17.attn_kv_b.weight", "offset": 6827270144, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.17.attn_output.weight", "offset": 6828449792, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.17.attn_q.weight", "offset": 6830809088, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.18.attn_norm.weight", "offset": 6834348032, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.18.ffn_down_exps.weight", "offset": 6834356224, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.18.ffn_gate_exps.weight", "offset": 6961233920, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.18.ffn_up_exps.weight", "offset": 7065042944, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.18.ffn_gate_inp.weight", "offset": 7168851968, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.18.ffn_down_shexp.weight", "offset": 7169376256, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.18.ffn_gate_shexp.weight", "offset": 7172620288, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.18.ffn_up_shexp.weight", "offset": 7175864320, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.18.ffn_norm.weight", "offset": 7179108352, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.18.attn_kv_a_norm.weight", "offset": 7179116544, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_kv_a_mqa.weight", "offset": 7179118592, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.18.attn_kv_b.weight", "offset": 7179782144, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.18.attn_output.weight", "offset": 7180961792, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.18.attn_q.weight", "offset": 7183321088, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.19.attn_norm.weight", "offset": 7186860032, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.19.ffn_down_exps.weight", "offset": 7186868224, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.19.ffn_gate_exps.weight", "offset": 7313745920, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.19.ffn_up_exps.weight", "offset": 7417554944, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.19.ffn_gate_inp.weight", "offset": 7521363968, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.19.ffn_down_shexp.weight", "offset": 7521888256, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.19.ffn_gate_shexp.weight", "offset": 7525132288, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.19.ffn_up_shexp.weight", "offset": 7528376320, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.19.ffn_norm.weight", "offset": 7531620352, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.19.attn_kv_a_norm.weight", "offset": 7531628544, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.19.attn_kv_a_mqa.weight", "offset": 7531630592, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.19.attn_kv_b.weight", "offset": 7532294144, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.19.attn_output.weight", "offset": 7533473792, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.19.attn_q.weight", "offset": 7535833088, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.20.attn_norm.weight", "offset": 7539372032, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.20.ffn_down_exps.weight", "offset": 7539380224, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.20.ffn_gate_exps.weight", "offset": 7735463936, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.20.ffn_up_exps.weight", "offset": 7839272960, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.20.ffn_gate_inp.weight", "offset": 7943081984, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.20.ffn_down_shexp.weight", "offset": 7943606272, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.20.ffn_gate_shexp.weight", "offset": 7948337152, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.20.ffn_up_shexp.weight", "offset": 7951581184, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.20.ffn_norm.weight", "offset": 7954825216, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.20.attn_kv_a_norm.weight", "offset": 7954833408, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_kv_a_mqa.weight", "offset": 7954835456, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.20.attn_kv_b.weight", "offset": 7955499008, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.20.attn_output.weight", "offset": 7956678656, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.20.attn_q.weight", "offset": 7959037952, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.21.attn_norm.weight", "offset": 7962576896, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.21.ffn_down_exps.weight", "offset": 7962585088, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.21.ffn_gate_exps.weight", "offset": 8089462784, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.21.ffn_up_exps.weight", "offset": 8193271808, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.21.ffn_gate_inp.weight", "offset": 8297080832, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.21.ffn_down_shexp.weight", "offset": 8297605120, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.21.ffn_gate_shexp.weight", "offset": 8300849152, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.21.ffn_up_shexp.weight", "offset": 8304093184, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.21.ffn_norm.weight", "offset": 8307337216, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.21.attn_kv_a_norm.weight", "offset": 8307345408, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_kv_a_mqa.weight", "offset": 8307347456, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.21.attn_kv_b.weight", "offset": 8308011008, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.21.attn_output.weight", "offset": 8309190656, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.21.attn_q.weight", "offset": 8311549952, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.22.ffn_gate_inp.weight", "offset": 8315088896, "shape": [ 2048, 64 ], "size": 3538944, "type": 0 }, { "name": "blk.22.ffn_down_shexp.weight", "offset": 8315613184, "shape": [ 2816, 2048 ], "size": 524288, "type": 12 }, { "name": "blk.22.ffn_gate_shexp.weight", "offset": 8318857216, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.22.ffn_up_shexp.weight", "offset": 8322101248, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.22.attn_kv_a_norm.weight", "offset": 8325345280, "shape": [ 512 ], "size": 3244032, "type": 0 }, { "name": "blk.22.attn_kv_a_mqa.weight", "offset": 8325347328, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.22.attn_kv_b.weight", "offset": 8326010880, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.22.attn_output.weight", "offset": 8327190528, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.22.attn_q.weight", "offset": 8329549824, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.22.attn_norm.weight", "offset": 8333088768, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.22.ffn_down_exps.weight", "offset": 8333096960, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 6 }, { "name": "blk.22.ffn_gate_exps.weight", "offset": 8459974656, "shape": [ 2048, 1408, 64 ], "size": 126877696, "type": 12 }, { "name": "blk.22.ffn_up_exps.weight", "offset": 8563783680, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.22.ffn_norm.weight", "offset": 8667592704, "shape": [ 2048 ], "size": 103809024, "type": 0 }, { "name": "blk.23.attn_norm.weight", "offset": 8667600896, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.ffn_down_exps.weight", "offset": 8667609088, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.23.ffn_gate_exps.weight", "offset": 8863692800, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.23.ffn_up_exps.weight", "offset": 8967501824, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.23.ffn_gate_inp.weight", "offset": 9071310848, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.23.ffn_down_shexp.weight", "offset": 9071835136, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.23.ffn_gate_shexp.weight", "offset": 9076566016, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.23.ffn_up_shexp.weight", "offset": 9079810048, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.23.ffn_norm.weight", "offset": 9083054080, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.23.attn_kv_a_norm.weight", "offset": 9083062272, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_kv_a_mqa.weight", "offset": 9083064320, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.23.attn_kv_b.weight", "offset": 9083727872, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.23.attn_output.weight", "offset": 9084907520, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.23.attn_q.weight", "offset": 9087266816, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.24.attn_norm.weight", "offset": 9090805760, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.24.ffn_down_exps.weight", "offset": 9090813952, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.24.ffn_gate_exps.weight", "offset": 9286897664, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.24.ffn_up_exps.weight", "offset": 9390706688, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.24.ffn_gate_inp.weight", "offset": 9494515712, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.24.ffn_down_shexp.weight", "offset": 9495040000, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.24.ffn_gate_shexp.weight", "offset": 9499770880, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.24.ffn_up_shexp.weight", "offset": 9503014912, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.24.ffn_norm.weight", "offset": 9506258944, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.24.attn_kv_a_norm.weight", "offset": 9506267136, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.24.attn_kv_a_mqa.weight", "offset": 9506269184, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.24.attn_kv_b.weight", "offset": 9506932736, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.24.attn_output.weight", "offset": 9508112384, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.24.attn_q.weight", "offset": 9510471680, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.25.attn_norm.weight", "offset": 9514010624, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.25.ffn_down_exps.weight", "offset": 9514018816, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.25.ffn_gate_exps.weight", "offset": 9710102528, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.25.ffn_up_exps.weight", "offset": 9813911552, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.25.ffn_gate_inp.weight", "offset": 9917720576, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.25.ffn_down_shexp.weight", "offset": 9918244864, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.25.ffn_gate_shexp.weight", "offset": 9922975744, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.25.ffn_up_shexp.weight", "offset": 9926219776, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.25.ffn_norm.weight", "offset": 9929463808, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.25.attn_kv_a_norm.weight", "offset": 9929472000, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.25.attn_kv_a_mqa.weight", "offset": 9929474048, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.25.attn_kv_b.weight", "offset": 9930137600, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.25.attn_output.weight", "offset": 9931317248, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.25.attn_q.weight", "offset": 9933676544, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 }, { "name": "blk.26.attn_norm.weight", "offset": 9937215488, "shape": [ 2048 ], "size": 3538944, "type": 0 }, { "name": "blk.26.ffn_down_exps.weight", "offset": 9937223680, "shape": [ 1408, 2048, 64 ], "size": 8192, "type": 8 }, { "name": "blk.26.ffn_gate_exps.weight", "offset": 10133307392, "shape": [ 2048, 1408, 64 ], "size": 196083712, "type": 12 }, { "name": "blk.26.ffn_up_exps.weight", "offset": 10237116416, "shape": [ 2048, 1408, 64 ], "size": 103809024, "type": 12 }, { "name": "blk.26.ffn_gate_inp.weight", "offset": 10340925440, "shape": [ 2048, 64 ], "size": 103809024, "type": 0 }, { "name": "blk.26.ffn_down_shexp.weight", "offset": 10341449728, "shape": [ 2816, 2048 ], "size": 524288, "type": 14 }, { "name": "blk.26.ffn_gate_shexp.weight", "offset": 10346180608, "shape": [ 2048, 2816 ], "size": 4730880, "type": 12 }, { "name": "blk.26.ffn_up_shexp.weight", "offset": 10349424640, "shape": [ 2048, 2816 ], "size": 3244032, "type": 12 }, { "name": "blk.26.ffn_norm.weight", "offset": 10352668672, "shape": [ 2048 ], "size": 3244032, "type": 0 }, { "name": "blk.26.attn_kv_a_norm.weight", "offset": 10352676864, "shape": [ 512 ], "size": 8192, "type": 0 }, { "name": "blk.26.attn_kv_a_mqa.weight", "offset": 10352678912, "shape": [ 2048, 576 ], "size": 2048, "type": 12 }, { "name": "blk.26.attn_kv_b.weight", "offset": 10353342464, "shape": [ 512, 4096 ], "size": 663552, "type": 12 }, { "name": "blk.26.attn_output.weight", "offset": 10354522112, "shape": [ 2048, 2048 ], "size": 1179648, "type": 12 }, { "name": "blk.26.attn_q.weight", "offset": 10356881408, "shape": [ 2048, 3072 ], "size": 2359296, "type": 12 } ], "version": 3 }