Stable LM 2 is a state-of-the-art 1.6B and 12B parameter language model trained on multilingual data in English, Spanish, German, Italian, French, Portuguese, and Dutch.

1.6B 12B

36.1K Pulls Updated 6 weeks ago

84 Tags

47082a26972c · 983MB
{ "metadata": { "general.architecture": "stablelm", "general.file_type": 2, "general.name": "stablelm-2-zephyr-1_6b", "general.quantization_version": 2, "stablelm.attention.head_count": 32, "stablelm.attention.layer_norm_epsilon": 0.00001, "stablelm.block_count": 24, "stablelm.context_length": 4096, "stablelm.embedding_length": 2048, "stablelm.feed_forward_length": 5632, "stablelm.rope.dimension_count": 16, "stablelm.use_parallel_residual": true, "tokenizer.ggml.bos_token_id": 100257, "tokenizer.ggml.eos_token_id": 100257, "tokenizer.ggml.merges": "... (100000 values)", "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.token_type": "... (100352 values)", "tokenizer.ggml.tokens": "... (100352 values)", "tokenizer.ggml.unknown_token_id": 100257 }, "num_params": 1644515328, "tensors": [ { "name": "token_embd.weight", "offset": 168591360, "shape": [ 2048, 100352 ], "size": 168591360, "type": 2 }, { "name": "blk.0.attn_norm.bias", "offset": 284196864, "shape": [ 2048 ], "size": 115605504, "type": 0 }, { "name": "blk.0.attn_norm.weight", "offset": 284205056, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.0.ffn_down.weight", "offset": 284213248, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.0.ffn_gate.weight", "offset": 290701312, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.0.ffn_up.weight", "offset": 297189376, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.0.ffn_norm.bias", "offset": 303677440, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.0.ffn_norm.weight", "offset": 303685632, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.0.attn_k.bias", "offset": 303693824, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.0.attn_k.weight", "offset": 303702016, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.0.attn_output.weight", "offset": 306061312, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.0.attn_q.bias", "offset": 308420608, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.0.attn_q.weight", "offset": 308428800, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.0.attn_v.bias", "offset": 310788096, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.0.attn_v.weight", "offset": 310796288, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.1.attn_norm.bias", "offset": 313155584, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.1.attn_norm.weight", "offset": 313163776, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.1.ffn_down.weight", "offset": 313171968, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.1.ffn_gate.weight", "offset": 319660032, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.1.ffn_up.weight", "offset": 326148096, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.1.ffn_norm.bias", "offset": 332636160, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.1.ffn_norm.weight", "offset": 332644352, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_k.bias", "offset": 332652544, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.1.attn_k.weight", "offset": 332660736, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.1.attn_output.weight", "offset": 335020032, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.1.attn_q.bias", "offset": 337379328, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.1.attn_q.weight", "offset": 337387520, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.1.attn_v.bias", "offset": 339746816, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.1.attn_v.weight", "offset": 339755008, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.10.attn_norm.bias", "offset": 342114304, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.10.attn_norm.weight", "offset": 342122496, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.ffn_down.weight", "offset": 342130688, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.10.ffn_gate.weight", "offset": 348618752, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.10.ffn_up.weight", "offset": 355106816, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.10.ffn_norm.bias", "offset": 361594880, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.10.ffn_norm.weight", "offset": 361603072, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_k.bias", "offset": 361611264, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.10.attn_k.weight", "offset": 361619456, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.10.attn_output.weight", "offset": 363978752, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.10.attn_q.bias", "offset": 366338048, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.10.attn_q.weight", "offset": 366346240, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.10.attn_v.bias", "offset": 368705536, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.10.attn_v.weight", "offset": 368713728, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.11.attn_norm.bias", "offset": 371073024, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.11.attn_norm.weight", "offset": 371081216, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.11.ffn_down.weight", "offset": 371089408, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.11.ffn_gate.weight", "offset": 377577472, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.11.ffn_up.weight", "offset": 384065536, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.11.ffn_norm.bias", "offset": 390553600, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.11.ffn_norm.weight", "offset": 390561792, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_k.bias", "offset": 390569984, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.11.attn_k.weight", "offset": 390578176, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.11.attn_output.weight", "offset": 392937472, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.11.attn_q.bias", "offset": 395296768, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.11.attn_q.weight", "offset": 395304960, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.11.attn_v.bias", "offset": 397664256, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.11.attn_v.weight", "offset": 397672448, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.12.attn_norm.bias", "offset": 400031744, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.12.attn_norm.weight", "offset": 400039936, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.12.ffn_down.weight", "offset": 400048128, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.12.ffn_gate.weight", "offset": 406536192, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.12.ffn_up.weight", "offset": 413024256, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.12.ffn_norm.bias", "offset": 419512320, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.12.ffn_norm.weight", "offset": 419520512, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_k.bias", "offset": 419528704, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.12.attn_k.weight", "offset": 419536896, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.12.attn_output.weight", "offset": 421896192, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.12.attn_q.bias", "offset": 424255488, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.12.attn_q.weight", "offset": 424263680, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.12.attn_v.bias", "offset": 426622976, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.12.attn_v.weight", "offset": 426631168, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.13.attn_norm.bias", "offset": 428990464, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.13.attn_norm.weight", "offset": 428998656, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.13.ffn_down.weight", "offset": 429006848, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.13.ffn_gate.weight", "offset": 435494912, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.13.ffn_up.weight", "offset": 441982976, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.13.ffn_norm.bias", "offset": 448471040, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.13.ffn_norm.weight", "offset": 448479232, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_k.bias", "offset": 448487424, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.13.attn_k.weight", "offset": 448495616, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.13.attn_output.weight", "offset": 450854912, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.13.attn_q.bias", "offset": 453214208, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.13.attn_q.weight", "offset": 453222400, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.13.attn_v.bias", "offset": 455581696, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.13.attn_v.weight", "offset": 455589888, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.14.attn_norm.bias", "offset": 457949184, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.14.attn_norm.weight", "offset": 457957376, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.14.ffn_down.weight", "offset": 457965568, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.14.ffn_gate.weight", "offset": 464453632, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.14.ffn_up.weight", "offset": 470941696, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.14.ffn_norm.bias", "offset": 477429760, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.14.ffn_norm.weight", "offset": 477437952, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.14.attn_k.bias", "offset": 477446144, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.14.attn_k.weight", "offset": 477454336, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.14.attn_output.weight", "offset": 479813632, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.14.attn_q.bias", "offset": 482172928, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.14.attn_q.weight", "offset": 482181120, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.14.attn_v.bias", "offset": 484540416, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.14.attn_v.weight", "offset": 484548608, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.15.attn_norm.bias", "offset": 486907904, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.15.attn_norm.weight", "offset": 486916096, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.ffn_down.weight", "offset": 486924288, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.15.ffn_gate.weight", "offset": 493412352, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.15.ffn_up.weight", "offset": 499900416, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.15.ffn_norm.bias", "offset": 506388480, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.15.ffn_norm.weight", "offset": 506396672, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_k.bias", "offset": 506404864, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.15.attn_k.weight", "offset": 506413056, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.15.attn_output.weight", "offset": 508772352, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.15.attn_q.bias", "offset": 511131648, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.15.attn_q.weight", "offset": 511139840, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.15.attn_v.bias", "offset": 513499136, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.15.attn_v.weight", "offset": 513507328, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.16.attn_norm.bias", "offset": 515866624, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.16.attn_norm.weight", "offset": 515874816, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.16.ffn_down.weight", "offset": 515883008, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.16.ffn_gate.weight", "offset": 522371072, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.16.ffn_up.weight", "offset": 528859136, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.16.ffn_norm.bias", "offset": 535347200, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.16.ffn_norm.weight", "offset": 535355392, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_k.bias", "offset": 535363584, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.16.attn_k.weight", "offset": 535371776, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.16.attn_output.weight", "offset": 537731072, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.16.attn_q.bias", "offset": 540090368, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.16.attn_q.weight", "offset": 540098560, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.16.attn_v.bias", "offset": 542457856, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.16.attn_v.weight", "offset": 542466048, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.17.attn_norm.bias", "offset": 544825344, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.17.attn_norm.weight", "offset": 544833536, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.17.ffn_down.weight", "offset": 544841728, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.17.ffn_gate.weight", "offset": 551329792, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.17.ffn_up.weight", "offset": 557817856, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.17.ffn_norm.bias", "offset": 564305920, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.17.ffn_norm.weight", "offset": 564314112, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_k.bias", "offset": 564322304, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.17.attn_k.weight", "offset": 564330496, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.17.attn_output.weight", "offset": 566689792, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.17.attn_q.bias", "offset": 569049088, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.17.attn_q.weight", "offset": 569057280, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.17.attn_v.bias", "offset": 571416576, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.17.attn_v.weight", "offset": 571424768, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.18.attn_norm.bias", "offset": 573784064, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.18.attn_norm.weight", "offset": 573792256, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.18.ffn_down.weight", "offset": 573800448, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.18.ffn_gate.weight", "offset": 580288512, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.18.ffn_up.weight", "offset": 586776576, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.18.ffn_norm.bias", "offset": 593264640, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.18.ffn_norm.weight", "offset": 593272832, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_k.bias", "offset": 593281024, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.18.attn_k.weight", "offset": 593289216, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.18.attn_output.weight", "offset": 595648512, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.18.attn_q.bias", "offset": 598007808, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.18.attn_q.weight", "offset": 598016000, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.18.attn_v.bias", "offset": 600375296, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.18.attn_v.weight", "offset": 600383488, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.19.attn_norm.bias", "offset": 602742784, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.19.attn_norm.weight", "offset": 602750976, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.19.ffn_down.weight", "offset": 602759168, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.19.ffn_gate.weight", "offset": 609247232, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.19.ffn_up.weight", "offset": 615735296, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.19.ffn_norm.bias", "offset": 622223360, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.19.ffn_norm.weight", "offset": 622231552, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.19.attn_k.bias", "offset": 622239744, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.19.attn_k.weight", "offset": 622247936, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.19.attn_output.weight", "offset": 624607232, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.19.attn_q.bias", "offset": 626966528, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.19.attn_q.weight", "offset": 626974720, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.19.attn_v.bias", "offset": 629334016, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.19.attn_v.weight", "offset": 629342208, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.2.attn_norm.bias", "offset": 631701504, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.2.attn_norm.weight", "offset": 631709696, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.2.ffn_down.weight", "offset": 631717888, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.2.ffn_gate.weight", "offset": 638205952, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.2.ffn_up.weight", "offset": 644694016, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.2.ffn_norm.bias", "offset": 651182080, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.2.ffn_norm.weight", "offset": 651190272, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_k.bias", "offset": 651198464, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.2.attn_k.weight", "offset": 651206656, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.2.attn_output.weight", "offset": 653565952, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.2.attn_q.bias", "offset": 655925248, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.2.attn_q.weight", "offset": 655933440, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.2.attn_v.bias", "offset": 658292736, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.2.attn_v.weight", "offset": 658300928, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.20.attn_norm.bias", "offset": 660660224, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.20.attn_norm.weight", "offset": 660668416, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.20.ffn_down.weight", "offset": 660676608, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.20.ffn_gate.weight", "offset": 667164672, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.20.ffn_up.weight", "offset": 673652736, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.20.ffn_norm.bias", "offset": 680140800, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.20.ffn_norm.weight", "offset": 680148992, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_k.bias", "offset": 680157184, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.20.attn_k.weight", "offset": 680165376, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.20.attn_output.weight", "offset": 682524672, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.20.attn_q.bias", "offset": 684883968, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.20.attn_q.weight", "offset": 684892160, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.20.attn_v.bias", "offset": 687251456, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.20.attn_v.weight", "offset": 687259648, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.21.attn_norm.bias", "offset": 689618944, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.21.attn_norm.weight", "offset": 689627136, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.21.ffn_down.weight", "offset": 689635328, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.21.ffn_gate.weight", "offset": 696123392, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.21.ffn_up.weight", "offset": 702611456, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.21.ffn_norm.bias", "offset": 709099520, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.21.ffn_norm.weight", "offset": 709107712, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_k.bias", "offset": 709115904, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.21.attn_k.weight", "offset": 709124096, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.21.attn_output.weight", "offset": 711483392, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.21.attn_q.bias", "offset": 713842688, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.21.attn_q.weight", "offset": 713850880, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.21.attn_v.bias", "offset": 716210176, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.21.attn_v.weight", "offset": 716218368, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.22.attn_norm.bias", "offset": 718577664, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.22.attn_norm.weight", "offset": 718585856, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.22.ffn_down.weight", "offset": 718594048, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.22.ffn_gate.weight", "offset": 725082112, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.22.ffn_up.weight", "offset": 731570176, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.22.ffn_norm.bias", "offset": 738058240, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.22.ffn_norm.weight", "offset": 738066432, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.22.attn_k.bias", "offset": 738074624, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.22.attn_k.weight", "offset": 738082816, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.22.attn_output.weight", "offset": 740442112, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.22.attn_q.bias", "offset": 742801408, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.22.attn_q.weight", "offset": 742809600, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.22.attn_v.bias", "offset": 745168896, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.22.attn_v.weight", "offset": 745177088, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.23.attn_norm.bias", "offset": 747536384, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.23.attn_norm.weight", "offset": 747544576, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.ffn_down.weight", "offset": 747552768, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.23.ffn_gate.weight", "offset": 754040832, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.23.ffn_up.weight", "offset": 760528896, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.23.ffn_norm.bias", "offset": 767016960, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.23.ffn_norm.weight", "offset": 767025152, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_k.bias", "offset": 767033344, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.23.attn_k.weight", "offset": 767041536, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.23.attn_output.weight", "offset": 769400832, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.23.attn_q.bias", "offset": 771760128, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.23.attn_q.weight", "offset": 771768320, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.23.attn_v.bias", "offset": 774127616, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.23.attn_v.weight", "offset": 774135808, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.3.attn_norm.bias", "offset": 776495104, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.3.attn_norm.weight", "offset": 776503296, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.3.ffn_down.weight", "offset": 776511488, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.3.ffn_gate.weight", "offset": 782999552, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.3.ffn_up.weight", "offset": 789487616, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.3.ffn_norm.bias", "offset": 795975680, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.3.ffn_norm.weight", "offset": 795983872, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_k.bias", "offset": 795992064, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.3.attn_k.weight", "offset": 796000256, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.3.attn_output.weight", "offset": 798359552, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.3.attn_q.bias", "offset": 800718848, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.3.attn_q.weight", "offset": 800727040, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.3.attn_v.bias", "offset": 803086336, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.3.attn_v.weight", "offset": 803094528, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.4.attn_norm.bias", "offset": 805453824, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.4.attn_norm.weight", "offset": 805462016, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.4.ffn_down.weight", "offset": 805470208, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.4.ffn_gate.weight", "offset": 811958272, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.4.ffn_up.weight", "offset": 818446336, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.4.ffn_norm.bias", "offset": 824934400, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.4.ffn_norm.weight", "offset": 824942592, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_k.bias", "offset": 824950784, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.4.attn_k.weight", "offset": 824958976, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.4.attn_output.weight", "offset": 827318272, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.4.attn_q.bias", "offset": 829677568, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.4.attn_q.weight", "offset": 829685760, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.4.attn_v.bias", "offset": 832045056, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.4.attn_v.weight", "offset": 832053248, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.5.attn_norm.bias", "offset": 834412544, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.5.attn_norm.weight", "offset": 834420736, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.5.ffn_down.weight", "offset": 834428928, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.5.ffn_gate.weight", "offset": 840916992, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.5.ffn_up.weight", "offset": 847405056, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.5.ffn_norm.bias", "offset": 853893120, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.5.ffn_norm.weight", "offset": 853901312, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_k.bias", "offset": 853909504, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.5.attn_k.weight", "offset": 853917696, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.5.attn_output.weight", "offset": 856276992, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.5.attn_q.bias", "offset": 858636288, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.5.attn_q.weight", "offset": 858644480, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.5.attn_v.bias", "offset": 861003776, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.5.attn_v.weight", "offset": 861011968, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.6.attn_norm.bias", "offset": 863371264, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.6.attn_norm.weight", "offset": 863379456, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.6.ffn_down.weight", "offset": 863387648, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.6.ffn_gate.weight", "offset": 869875712, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.6.ffn_up.weight", "offset": 876363776, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.6.ffn_norm.bias", "offset": 882851840, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.6.ffn_norm.weight", "offset": 882860032, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_k.bias", "offset": 882868224, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.6.attn_k.weight", "offset": 882876416, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.6.attn_output.weight", "offset": 885235712, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.6.attn_q.bias", "offset": 887595008, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.6.attn_q.weight", "offset": 887603200, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.6.attn_v.bias", "offset": 889962496, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.6.attn_v.weight", "offset": 889970688, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.7.attn_norm.bias", "offset": 892329984, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.7.attn_norm.weight", "offset": 892338176, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.7.ffn_down.weight", "offset": 892346368, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.7.ffn_gate.weight", "offset": 898834432, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.7.ffn_up.weight", "offset": 905322496, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.7.ffn_norm.bias", "offset": 911810560, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.7.ffn_norm.weight", "offset": 911818752, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.7.attn_k.bias", "offset": 911826944, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.7.attn_k.weight", "offset": 911835136, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.7.attn_output.weight", "offset": 914194432, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.7.attn_q.bias", "offset": 916553728, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.7.attn_q.weight", "offset": 916561920, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.7.attn_v.bias", "offset": 918921216, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.7.attn_v.weight", "offset": 918929408, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.8.attn_norm.bias", "offset": 921288704, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.8.attn_norm.weight", "offset": 921296896, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.ffn_down.weight", "offset": 921305088, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.8.ffn_gate.weight", "offset": 927793152, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.8.ffn_up.weight", "offset": 934281216, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.8.ffn_norm.bias", "offset": 940769280, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.8.ffn_norm.weight", "offset": 940777472, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_k.bias", "offset": 940785664, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.8.attn_k.weight", "offset": 940793856, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.8.attn_output.weight", "offset": 943153152, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.8.attn_q.bias", "offset": 945512448, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.8.attn_q.weight", "offset": 945520640, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.8.attn_v.bias", "offset": 947879936, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.8.attn_v.weight", "offset": 947888128, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.9.attn_norm.bias", "offset": 950247424, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.9.attn_norm.weight", "offset": 950255616, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.9.ffn_down.weight", "offset": 950263808, "shape": [ 5632, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.9.ffn_gate.weight", "offset": 956751872, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.9.ffn_up.weight", "offset": 963239936, "shape": [ 2048, 5632 ], "size": 6488064, "type": 2 }, { "name": "blk.9.ffn_norm.bias", "offset": 969728000, "shape": [ 2048 ], "size": 6488064, "type": 0 }, { "name": "blk.9.ffn_norm.weight", "offset": 969736192, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_k.bias", "offset": 969744384, "shape": [ 2048 ], "size": 8192, "type": 0 }, { "name": "blk.9.attn_k.weight", "offset": 969752576, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.9.attn_output.weight", "offset": 972111872, "shape": [ 2048, 2048 ], "size": 2359296, "type": 2 }, { "name": "blk.9.attn_q.bias", "offset": 974471168, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.9.attn_q.weight", "offset": 974479360, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "blk.9.attn_v.bias", "offset": 976838656, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "blk.9.attn_v.weight", "offset": 976846848, "shape": [ 2048, 2048 ], "size": 8192, "type": 2 }, { "name": "output_norm.bias", "offset": 979206144, "shape": [ 2048 ], "size": 2359296, "type": 0 }, { "name": "output_norm.weight", "offset": 979214336, "shape": [ 2048 ], "size": 8192, "type": 0 } ], "version": 3 }