3 hours ago

A Mistral Nemo 12B fine-tune that speaks in the voice of Marcus Aurelius, the Roman emperor and Stoic philosopher whose Meditations were composed in Greek on the Danubian frontier and never published in his lifetime.

tools
710ef564cf87 · 456MB
    Metadata
  • general.architecture
    llama
  • adapter.lora.alpha
    128
  • adapter.type
    lora
  • Tensor
    blk.0
  • blk.0.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.0.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.0.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.0.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.0.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.0.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.0.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.0.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.0.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.0.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.0.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.0.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.0.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.0.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.1
  • blk.1.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.1.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.1.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.1.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.1.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.1.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.1.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.1.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.1.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.1.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.1.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.1.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.1.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.1.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.2
  • blk.2.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.2.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.2.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.2.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.2.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.2.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.2.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.2.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.2.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.2.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.2.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.2.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.2.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.2.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.3
  • blk.3.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.3.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.3.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.3.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.3.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.3.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.3.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.3.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.3.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.3.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.3.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.3.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.3.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.3.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.4
  • blk.4.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.4.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.4.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.4.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.4.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.4.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.4.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.4.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.4.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.4.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.4.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.4.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.4.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.4.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.5
  • blk.5.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.5.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.5.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.5.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.5.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.5.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.5.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.5.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.5.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.5.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.5.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.5.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.5.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.5.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.6
  • blk.6.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.6.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.6.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.6.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.6.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.6.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.6.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.6.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.6.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.6.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.6.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.6.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.6.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.6.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.7
  • blk.7.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.7.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.7.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.7.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.7.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.7.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.7.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.7.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.7.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.7.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.7.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.7.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.7.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.7.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.8
  • blk.8.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.8.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.8.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.8.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.8.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.8.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.8.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.8.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.8.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.8.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.8.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.8.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.8.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.8.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.9
  • blk.9.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.9.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.9.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.9.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.9.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.9.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.9.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.9.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.9.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.9.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.9.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.9.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.9.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.9.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.10
  • blk.10.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.10.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.10.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.10.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.10.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.10.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.10.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.10.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.10.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.10.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.10.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.10.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.10.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.10.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.11
  • blk.11.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.11.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.11.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.11.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.11.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.11.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.11.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.11.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.11.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.11.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.11.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.11.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.11.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.11.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.12
  • blk.12.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.12.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.12.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.12.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.12.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.12.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.12.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.12.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.12.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.12.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.12.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.12.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.12.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.12.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.13
  • blk.13.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.13.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.13.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.13.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.13.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.13.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.13.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.13.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.13.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.13.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.13.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.13.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.13.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.13.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.14
  • blk.14.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.14.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.14.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.14.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.14.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.14.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.14.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.14.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.14.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.14.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.14.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.14.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.14.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.14.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.15
  • blk.15.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.15.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.15.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.15.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.15.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.15.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.15.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.15.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.15.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.15.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.15.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.15.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.15.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.15.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.16
  • blk.16.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.16.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.16.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.16.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.16.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.16.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.16.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.16.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.16.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.16.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.16.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.16.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.16.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.16.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.17
  • blk.17.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.17.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.17.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.17.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.17.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.17.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.17.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.17.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.17.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.17.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.17.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.17.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.17.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.17.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.18
  • blk.18.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.18.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.18.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.18.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.18.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.18.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.18.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.18.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.18.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.18.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.18.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.18.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.18.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.18.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.19
  • blk.19.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.19.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.19.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.19.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.19.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.19.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.19.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.19.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.19.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.19.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.19.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.19.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.19.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.19.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.20
  • blk.20.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.20.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.20.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.20.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.20.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.20.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.20.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.20.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.20.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.20.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.20.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.20.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.20.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.20.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.21
  • blk.21.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.21.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.21.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.21.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.21.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.21.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.21.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.21.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.21.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.21.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.21.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.21.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.21.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.21.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.22
  • blk.22.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.22.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.22.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.22.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.22.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.22.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.22.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.22.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.22.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.22.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.22.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.22.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.22.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.22.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.23
  • blk.23.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.23.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.23.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.23.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.23.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.23.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.23.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.23.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.23.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.23.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.23.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.23.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.23.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.23.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.24
  • blk.24.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.24.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.24.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.24.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.24.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.24.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.24.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.24.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.24.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.24.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.24.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.24.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.24.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.24.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.25
  • blk.25.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.25.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.25.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.25.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.25.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.25.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.25.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.25.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.25.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.25.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.25.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.25.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.25.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.25.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.26
  • blk.26.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.26.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.26.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.26.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.26.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.26.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.26.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.26.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.26.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.26.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.26.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.26.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.26.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.26.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.27
  • blk.27.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.27.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.27.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.27.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.27.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.27.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.27.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.27.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.27.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.27.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.27.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.27.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.27.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.27.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.28
  • blk.28.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.28.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.28.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.28.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.28.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.28.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.28.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.28.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.28.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.28.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.28.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.28.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.28.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.28.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.29
  • blk.29.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.29.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.29.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.29.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.29.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.29.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.29.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.29.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.29.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.29.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.29.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.29.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.29.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.29.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.30
  • blk.30.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.30.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.30.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.30.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.30.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.30.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.30.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.30.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.30.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.30.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.30.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.30.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.30.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.30.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.31
  • blk.31.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.31.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.31.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.31.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.31.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.31.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.31.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.31.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.31.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.31.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.31.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.31.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.31.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.31.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.32
  • blk.32.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.32.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.32.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.32.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.32.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.32.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.32.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.32.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.32.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.32.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.32.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.32.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.32.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.32.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.33
  • blk.33.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.33.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.33.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.33.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.33.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.33.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.33.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.33.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.33.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.33.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.33.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.33.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.33.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.33.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.34
  • blk.34.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.34.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.34.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.34.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.34.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.34.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.34.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.34.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.34.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.34.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.34.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.34.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.34.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.34.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.35
  • blk.35.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.35.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.35.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.35.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.35.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.35.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.35.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.35.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.35.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.35.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.35.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.35.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.35.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.35.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.36
  • blk.36.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.36.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.36.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.36.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.36.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.36.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.36.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.36.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.36.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.36.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.36.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.36.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.36.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.36.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.37
  • blk.37.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.37.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.37.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.37.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.37.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.37.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.37.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.37.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.37.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.37.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.37.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.37.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.37.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.37.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.38
  • blk.38.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.38.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.38.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.38.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.38.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.38.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.38.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.38.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.38.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.38.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.38.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.38.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.38.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.38.ffn_up.weight.lora_b
    F16
    [64, 14336]
  • blk.39
  • blk.39.attn_k.weight.lora_a
    F16
    [5120, 64]
  • blk.39.attn_k.weight.lora_b
    F16
    [64, 1024]
  • blk.39.attn_output.weight.lora_a
    F16
    [4096, 64]
  • blk.39.attn_output.weight.lora_b
    F16
    [64, 5120]
  • blk.39.attn_q.weight.lora_a
    F16
    [5120, 64]
  • blk.39.attn_q.weight.lora_b
    F16
    [64, 4096]
  • blk.39.attn_v.weight.lora_a
    F16
    [5120, 64]
  • blk.39.attn_v.weight.lora_b
    F16
    [64, 1024]
  • blk.39.ffn_down.weight.lora_a
    F16
    [14336, 64]
  • blk.39.ffn_down.weight.lora_b
    F16
    [64, 5120]
  • blk.39.ffn_gate.weight.lora_a
    F16
    [5120, 64]
  • blk.39.ffn_gate.weight.lora_b
    F16
    [64, 14336]
  • blk.39.ffn_up.weight.lora_a
    F16
    [5120, 64]
  • blk.39.ffn_up.weight.lora_b
    F16
    [64, 14336]