DeepSeek Coder is a capable coding model trained on two trillion code and natural language tokens.

1B 7B 33B

196.5K Pulls Updated 6 months ago

102 Tags

d040cc185215 · 776MB
{ "metadata": { "general.architecture": "llama", "general.file_type": 2, "general.name": "deepseek-ai", "general.quantization_version": 2, "llama.attention.head_count": 16, "llama.attention.head_count_kv": 16, "llama.attention.layer_norm_rms_epsilon": 0.000001, "llama.block_count": 24, "llama.context_length": 16384, "llama.embedding_length": 2048, "llama.feed_forward_length": 5504, "llama.rope.dimension_count": 128, "llama.rope.freq_base": 100000, "llama.rope.scaling.factor": 4, "llama.rope.scaling.type": "linear", "tokenizer.ggml.add_bos_token": true, "tokenizer.ggml.add_eos_token": false, "tokenizer.ggml.bos_token_id": 32013, "tokenizer.ggml.eos_token_id": 32021, "tokenizer.ggml.merges": "... (31757 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.padding_token_id": 32014, "tokenizer.ggml.scores": "... (32256 values)", "tokenizer.ggml.token_type": "... (32256 values)", "tokenizer.ggml.tokens": "... (32256 values)" }, "num_params": 1346471936, "tensors": [ { "name": "blk.0.attn_q.weight", "offset": 37158912, "shape": [ 2048, 2048 ], "size": 37158912, "type": 2 }, { "name": "blk.0.attn_k.weight", "offset": 39518208, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.0.attn_v.weight", "offset": 41877504, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.0.attn_output.weight", "offset": 44236800, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.0.ffn_gate.weight", "offset": 46596096, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.0.ffn_up.weight", "offset": 52936704, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.0.ffn_down.weight", "offset": 59277312, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.0.attn_norm.weight", "offset": 65617920, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.0.ffn_norm.weight", "offset": 65626112, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_q.weight", "offset": 65634304, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.1.attn_k.weight", "offset": 67993600, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.1.attn_v.weight", "offset": 70352896, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.1.attn_output.weight", "offset": 72712192, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.1.ffn_gate.weight", "offset": 75071488, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.1.ffn_up.weight", "offset": 81412096, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.1.ffn_down.weight", "offset": 87752704, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.1.attn_norm.weight", "offset": 94093312, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.1.ffn_norm.weight", "offset": 94101504, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_q.weight", "offset": 94109696, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.2.attn_k.weight", "offset": 96468992, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.2.attn_v.weight", "offset": 98828288, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.2.attn_output.weight", "offset": 101187584, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.2.ffn_gate.weight", "offset": 103546880, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.2.ffn_up.weight", "offset": 109887488, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.2.ffn_down.weight", "offset": 116228096, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.2.attn_norm.weight", "offset": 122568704, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.2.ffn_norm.weight", "offset": 122576896, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_q.weight", "offset": 122585088, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.3.attn_k.weight", "offset": 124944384, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.3.attn_v.weight", "offset": 127303680, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.3.attn_output.weight", "offset": 129662976, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.3.ffn_gate.weight", "offset": 132022272, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.3.ffn_up.weight", "offset": 138362880, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.3.ffn_down.weight", "offset": 144703488, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.3.attn_norm.weight", "offset": 151044096, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.3.ffn_norm.weight", "offset": 151052288, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_q.weight", "offset": 151060480, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.4.attn_k.weight", "offset": 153419776, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.4.attn_v.weight", "offset": 155779072, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.4.attn_output.weight", "offset": 158138368, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.4.ffn_gate.weight", "offset": 160497664, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.4.ffn_up.weight", "offset": 166838272, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.4.ffn_down.weight", "offset": 173178880, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.4.attn_norm.weight", "offset": 179519488, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.4.ffn_norm.weight", "offset": 179527680, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_q.weight", "offset": 179535872, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.5.attn_k.weight", "offset": 181895168, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.5.attn_v.weight", "offset": 184254464, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.5.attn_output.weight", "offset": 186613760, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.5.ffn_gate.weight", "offset": 188973056, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.5.ffn_up.weight", "offset": 195313664, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.5.ffn_down.weight", "offset": 201654272, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.5.attn_norm.weight", "offset": 207994880, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.5.ffn_norm.weight", "offset": 208003072, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_q.weight", "offset": 208011264, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.6.attn_k.weight", "offset": 210370560, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.6.attn_v.weight", "offset": 212729856, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.6.attn_output.weight", "offset": 215089152, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.6.ffn_gate.weight", "offset": 217448448, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.6.ffn_up.weight", "offset": 223789056, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.6.ffn_down.weight", "offset": 230129664, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.6.attn_norm.weight", "offset": 236470272, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.6.ffn_norm.weight", "offset": 236478464, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.7.attn_q.weight", "offset": 236486656, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.7.attn_k.weight", "offset": 238845952, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.7.attn_v.weight", "offset": 241205248, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.7.attn_output.weight", "offset": 243564544, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.7.ffn_gate.weight", "offset": 245923840, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.7.ffn_up.weight", "offset": 252264448, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.7.ffn_down.weight", "offset": 258605056, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.7.attn_norm.weight", "offset": 264945664, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.7.ffn_norm.weight", "offset": 264953856, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_q.weight", "offset": 264962048, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.8.attn_k.weight", "offset": 267321344, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.8.attn_v.weight", "offset": 269680640, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.8.attn_output.weight", "offset": 272039936, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.8.ffn_gate.weight", "offset": 274399232, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.8.ffn_up.weight", "offset": 280739840, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.8.ffn_down.weight", "offset": 287080448, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.8.attn_norm.weight", "offset": 293421056, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.8.ffn_norm.weight", "offset": 293429248, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_q.weight", "offset": 293437440, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.9.attn_k.weight", "offset": 295796736, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.9.attn_v.weight", "offset": 298156032, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.9.attn_output.weight", "offset": 300515328, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.9.ffn_gate.weight", "offset": 302874624, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.9.ffn_up.weight", "offset": 309215232, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.9.ffn_down.weight", "offset": 315555840, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.9.attn_norm.weight", "offset": 321896448, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.9.ffn_norm.weight", "offset": 321904640, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_q.weight", "offset": 321912832, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.10.attn_k.weight", "offset": 324272128, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.10.attn_v.weight", "offset": 326631424, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.10.attn_output.weight", "offset": 328990720, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.10.ffn_gate.weight", "offset": 331350016, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.10.ffn_up.weight", "offset": 337690624, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.10.ffn_down.weight", "offset": 344031232, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.10.attn_norm.weight", "offset": 350371840, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.10.ffn_norm.weight", "offset": 350380032, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_q.weight", "offset": 350388224, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.11.attn_k.weight", "offset": 352747520, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.11.attn_v.weight", "offset": 355106816, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.11.attn_output.weight", "offset": 357466112, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.11.ffn_gate.weight", "offset": 359825408, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.11.ffn_up.weight", "offset": 366166016, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.11.ffn_down.weight", "offset": 372506624, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.11.attn_norm.weight", "offset": 378847232, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.11.ffn_norm.weight", "offset": 378855424, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_q.weight", "offset": 378863616, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.12.attn_k.weight", "offset": 381222912, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.12.attn_v.weight", "offset": 383582208, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.12.attn_output.weight", "offset": 385941504, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.12.ffn_gate.weight", "offset": 388300800, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.12.ffn_up.weight", "offset": 394641408, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.12.ffn_down.weight", "offset": 400982016, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.12.attn_norm.weight", "offset": 407322624, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.12.ffn_norm.weight", "offset": 407330816, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_q.weight", "offset": 407339008, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.13.attn_k.weight", "offset": 409698304, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.13.attn_v.weight", "offset": 412057600, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.13.attn_output.weight", "offset": 414416896, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.13.ffn_gate.weight", "offset": 416776192, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.13.ffn_up.weight", "offset": 423116800, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.13.ffn_down.weight", "offset": 429457408, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.13.attn_norm.weight", "offset": 435798016, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.13.ffn_norm.weight", "offset": 435806208, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.14.attn_q.weight", "offset": 435814400, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.14.attn_k.weight", "offset": 438173696, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.14.attn_v.weight", "offset": 440532992, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.14.attn_output.weight", "offset": 442892288, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.14.ffn_gate.weight", "offset": 445251584, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.14.ffn_up.weight", "offset": 451592192, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.14.ffn_down.weight", "offset": 457932800, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.14.attn_norm.weight", "offset": 464273408, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.14.ffn_norm.weight", "offset": 464281600, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_q.weight", "offset": 464289792, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.15.attn_k.weight", "offset": 466649088, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.15.attn_v.weight", "offset": 469008384, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.15.attn_output.weight", "offset": 471367680, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.15.ffn_gate.weight", "offset": 473726976, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.15.ffn_up.weight", "offset": 480067584, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.15.ffn_down.weight", "offset": 486408192, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.15.attn_norm.weight", "offset": 492748800, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.15.ffn_norm.weight", "offset": 492756992, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_q.weight", "offset": 492765184, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.16.attn_k.weight", "offset": 495124480, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.16.attn_v.weight", "offset": 497483776, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.16.attn_output.weight", "offset": 499843072, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.16.ffn_gate.weight", "offset": 502202368, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.16.ffn_up.weight", "offset": 508542976, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.16.ffn_down.weight", "offset": 514883584, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.16.attn_norm.weight", "offset": 521224192, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.16.ffn_norm.weight", "offset": 521232384, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_q.weight", "offset": 521240576, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.17.attn_k.weight", "offset": 523599872, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.17.attn_v.weight", "offset": 525959168, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.17.attn_output.weight", "offset": 528318464, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.17.ffn_gate.weight", "offset": 530677760, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.17.ffn_up.weight", "offset": 537018368, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.17.ffn_down.weight", "offset": 543358976, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.17.attn_norm.weight", "offset": 549699584, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.17.ffn_norm.weight", "offset": 549707776, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_q.weight", "offset": 549715968, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.18.attn_k.weight", "offset": 552075264, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.18.attn_v.weight", "offset": 554434560, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.18.attn_output.weight", "offset": 556793856, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.18.ffn_gate.weight", "offset": 559153152, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.18.ffn_up.weight", "offset": 565493760, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.18.ffn_down.weight", "offset": 571834368, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.18.attn_norm.weight", "offset": 578174976, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.18.ffn_norm.weight", "offset": 578183168, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.19.attn_q.weight", "offset": 578191360, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.19.attn_k.weight", "offset": 580550656, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.19.attn_v.weight", "offset": 582909952, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.19.attn_output.weight", "offset": 585269248, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.19.ffn_gate.weight", "offset": 587628544, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.19.ffn_up.weight", "offset": 593969152, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.19.ffn_down.weight", "offset": 600309760, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.19.attn_norm.weight", "offset": 606650368, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.19.ffn_norm.weight", "offset": 606658560, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_q.weight", "offset": 606666752, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.20.attn_k.weight", "offset": 609026048, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.20.attn_v.weight", "offset": 611385344, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.20.attn_output.weight", "offset": 613744640, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.20.ffn_gate.weight", "offset": 616103936, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.20.ffn_up.weight", "offset": 622444544, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.20.ffn_down.weight", "offset": 628785152, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.20.attn_norm.weight", "offset": 635125760, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.20.ffn_norm.weight", "offset": 635133952, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_q.weight", "offset": 635142144, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.21.attn_k.weight", "offset": 637501440, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.21.attn_v.weight", "offset": 639860736, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.21.attn_output.weight", "offset": 642220032, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.21.ffn_gate.weight", "offset": 644579328, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.21.ffn_up.weight", "offset": 650919936, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.21.ffn_down.weight", "offset": 657260544, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.21.attn_norm.weight", "offset": 663601152, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.21.ffn_norm.weight", "offset": 663609344, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.22.attn_q.weight", "offset": 663617536, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.22.attn_k.weight", "offset": 665976832, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.22.attn_v.weight", "offset": 668336128, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.22.attn_output.weight", "offset": 670695424, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.22.ffn_gate.weight", "offset": 673054720, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.22.ffn_up.weight", "offset": 679395328, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.22.ffn_down.weight", "offset": 685735936, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.22.attn_norm.weight", "offset": 692076544, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.22.ffn_norm.weight", "offset": 692084736, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_q.weight", "offset": 692092928, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.23.attn_k.weight", "offset": 694452224, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.23.attn_v.weight", "offset": 696811520, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.23.attn_output.weight", "offset": 699170816, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.23.ffn_gate.weight", "offset": 701530112, "shape": [ 2048, 5504 ], "size": 2359296, "type": 2 }, { "name": "blk.23.ffn_up.weight", "offset": 707870720, "shape": [ 2048, 5504 ], "size": 6340608, "type": 2 }, { "name": "blk.23.ffn_down.weight", "offset": 714211328, "shape": [ 5504, 2048 ], "size": 6340608, "type": 2 }, { "name": "blk.23.attn_norm.weight", "offset": 720551936, "shape": [ 2048 ], "size": 6340608, "type": 0 }, { "name": "blk.23.ffn_norm.weight", "offset": 720560128, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "output_norm.weight", "offset": 720568320, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "output.weight", "offset": 720576512, "shape": [ 2048, 32256 ], "size": 8192, "type": 14 } ], "version": 3 }