latest
6.1GB
Finetuned on continue's autocomplete data (600 steps)
3B
169 Pulls Updated 5 months ago
7655c15bd581 · 6.1GB
-
general.architecturestarcoder2
-
general.file_typeF16
-
starcoder2.attention.head_count24
-
starcoder2.attention.head_count_kv2
-
starcoder2.attention.layer_norm_epsilon1e-05
-
starcoder2.block_count30
-
starcoder2.context_length16384
-
starcoder2.embedding_length3072
-
starcoder2.feed_forward_length12288
-
starcoder2.rope.freq_base999999.44
-
tokenizer.ggml.bos_token_id0
-
tokenizer.ggml.eos_token_id0
-
tokenizer.ggml.merges[Ġ Ġ ĠĠ ĠĠ ĠĠĠĠ ĠĠĠĠ ĠĠ Ġ e r ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.token_type[3 3 3 3 3 ...]
-
tokenizer.ggml.tokens[<|endoftext|> <fim_prefix> <fim_middle> <fim_suffix> <fim_pad> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[3072 49152]
-
blk.0.attn_norm.biasF32[3072]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_up.biasF32[12288]
-
blk.0.ffn_up.weightF16[3072 12288]
-
blk.0.ffn_down.biasF32[3072]
-
blk.0.ffn_down.weightF16[12288 3072]
-
blk.0.ffn_norm.biasF32[3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.attn_k.biasF32[256]
-
blk.0.attn_k.weightF16[3072 256]
-
blk.0.attn_output.biasF32[3072]
-
blk.0.attn_output.weightF16[3072 3072]
-
blk.0.attn_q.biasF32[3072]
-
blk.0.attn_q.weightF16[3072 3072]
-
blk.0.attn_v.biasF32[256]
-
blk.0.attn_v.weightF16[3072 256]
-
blk.1.attn_norm.biasF32[3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_up.biasF32[12288]
-
blk.1.ffn_up.weightF16[3072 12288]
-
blk.1.ffn_down.biasF32[3072]
-
blk.1.ffn_down.weightF16[12288 3072]
-
blk.1.ffn_norm.biasF32[3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.attn_k.biasF32[256]
-
blk.1.attn_k.weightF16[3072 256]
-
blk.1.attn_output.biasF32[3072]
-
blk.1.attn_output.weightF16[3072 3072]
-
blk.1.attn_q.biasF32[3072]
-
blk.1.attn_q.weightF16[3072 3072]
-
blk.1.attn_v.biasF32[256]
-
blk.1.attn_v.weightF16[3072 256]
-
blk.2.attn_norm.biasF32[3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_up.biasF32[12288]
-
blk.2.ffn_up.weightF16[3072 12288]
-
blk.2.ffn_down.biasF32[3072]
-
blk.2.ffn_down.weightF16[12288 3072]
-
blk.2.ffn_norm.biasF32[3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.attn_k.biasF32[256]
-
blk.2.attn_k.weightF16[3072 256]
-
blk.2.attn_output.biasF32[3072]
-
blk.2.attn_output.weightF16[3072 3072]
-
blk.2.attn_q.biasF32[3072]
-
blk.2.attn_q.weightF16[3072 3072]
-
blk.2.attn_v.biasF32[256]
-
blk.2.attn_v.weightF16[3072 256]
-
blk.3.attn_norm.biasF32[3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_up.biasF32[12288]
-
blk.3.ffn_up.weightF16[3072 12288]
-
blk.3.ffn_down.biasF32[3072]
-
blk.3.ffn_down.weightF16[12288 3072]
-
blk.3.ffn_norm.biasF32[3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.attn_k.biasF32[256]
-
blk.3.attn_k.weightF16[3072 256]
-
blk.3.attn_output.biasF32[3072]
-
blk.3.attn_output.weightF16[3072 3072]
-
blk.3.attn_q.biasF32[3072]
-
blk.3.attn_q.weightF16[3072 3072]
-
blk.3.attn_v.biasF32[256]
-
blk.3.attn_v.weightF16[3072 256]
-
blk.4.attn_norm.biasF32[3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_up.biasF32[12288]
-
blk.4.ffn_up.weightF16[3072 12288]
-
blk.4.ffn_down.biasF32[3072]
-
blk.4.ffn_down.weightF16[12288 3072]
-
blk.4.ffn_norm.biasF32[3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.attn_k.biasF32[256]
-
blk.4.attn_k.weightF16[3072 256]
-
blk.4.attn_output.biasF32[3072]
-
blk.4.attn_output.weightF16[3072 3072]
-
blk.4.attn_q.biasF32[3072]
-
blk.4.attn_q.weightF16[3072 3072]
-
blk.4.attn_v.biasF32[256]
-
blk.4.attn_v.weightF16[3072 256]
-
blk.5.attn_norm.biasF32[3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_up.biasF32[12288]
-
blk.5.ffn_up.weightF16[3072 12288]
-
blk.5.ffn_down.biasF32[3072]
-
blk.5.ffn_down.weightF16[12288 3072]
-
blk.5.ffn_norm.biasF32[3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.attn_k.biasF32[256]
-
blk.5.attn_k.weightF16[3072 256]
-
blk.5.attn_output.biasF32[3072]
-
blk.5.attn_output.weightF16[3072 3072]
-
blk.5.attn_q.biasF32[3072]
-
blk.5.attn_q.weightF16[3072 3072]
-
blk.5.attn_v.biasF32[256]
-
blk.5.attn_v.weightF16[3072 256]
-
blk.6.attn_norm.biasF32[3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_up.biasF32[12288]
-
blk.6.ffn_up.weightF16[3072 12288]
-
blk.6.ffn_down.biasF32[3072]
-
blk.6.ffn_down.weightF16[12288 3072]
-
blk.6.ffn_norm.biasF32[3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.attn_k.biasF32[256]
-
blk.6.attn_k.weightF16[3072 256]
-
blk.6.attn_output.biasF32[3072]
-
blk.6.attn_output.weightF16[3072 3072]
-
blk.6.attn_q.biasF32[3072]
-
blk.6.attn_q.weightF16[3072 3072]
-
blk.6.attn_v.biasF32[256]
-
blk.6.attn_v.weightF16[3072 256]
-
blk.7.attn_norm.biasF32[3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_up.biasF32[12288]
-
blk.7.ffn_up.weightF16[3072 12288]
-
blk.7.ffn_down.biasF32[3072]
-
blk.7.ffn_down.weightF16[12288 3072]
-
blk.7.ffn_norm.biasF32[3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.attn_k.biasF32[256]
-
blk.7.attn_k.weightF16[3072 256]
-
blk.7.attn_output.biasF32[3072]
-
blk.7.attn_output.weightF16[3072 3072]
-
blk.7.attn_q.biasF32[3072]
-
blk.7.attn_q.weightF16[3072 3072]
-
blk.7.attn_v.biasF32[256]
-
blk.7.attn_v.weightF16[3072 256]
-
blk.8.attn_norm.biasF32[3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_up.biasF32[12288]
-
blk.8.ffn_up.weightF16[3072 12288]
-
blk.8.ffn_down.biasF32[3072]
-
blk.8.ffn_down.weightF16[12288 3072]
-
blk.8.ffn_norm.biasF32[3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.attn_k.biasF32[256]
-
blk.8.attn_k.weightF16[3072 256]
-
blk.8.attn_output.biasF32[3072]
-
blk.8.attn_output.weightF16[3072 3072]
-
blk.8.attn_q.biasF32[3072]
-
blk.8.attn_q.weightF16[3072 3072]
-
blk.8.attn_v.biasF32[256]
-
blk.8.attn_v.weightF16[3072 256]
-
blk.9.attn_norm.biasF32[3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_up.biasF32[12288]
-
blk.9.ffn_up.weightF16[3072 12288]
-
blk.9.ffn_down.biasF32[3072]
-
blk.9.ffn_down.weightF16[12288 3072]
-
blk.9.ffn_norm.biasF32[3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.attn_k.biasF32[256]
-
blk.9.attn_k.weightF16[3072 256]
-
blk.9.attn_output.biasF32[3072]
-
blk.9.attn_output.weightF16[3072 3072]
-
blk.9.attn_q.biasF32[3072]
-
blk.9.attn_q.weightF16[3072 3072]
-
blk.9.attn_v.biasF32[256]
-
blk.9.attn_v.weightF16[3072 256]
-
blk.10.attn_norm.biasF32[3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_up.biasF32[12288]
-
blk.10.ffn_up.weightF16[3072 12288]
-
blk.10.ffn_down.biasF32[3072]
-
blk.10.ffn_down.weightF16[12288 3072]
-
blk.10.ffn_norm.biasF32[3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.attn_k.biasF32[256]
-
blk.10.attn_k.weightF16[3072 256]
-
blk.10.attn_output.biasF32[3072]
-
blk.10.attn_output.weightF16[3072 3072]
-
blk.10.attn_q.biasF32[3072]
-
blk.10.attn_q.weightF16[3072 3072]
-
blk.10.attn_v.biasF32[256]
-
blk.10.attn_v.weightF16[3072 256]
-
blk.11.attn_k.biasF32[256]
-
blk.11.attn_k.weightF16[3072 256]
-
blk.11.attn_output.biasF32[3072]
-
blk.11.attn_output.weightF16[3072 3072]
-
blk.11.attn_q.biasF32[3072]
-
blk.11.attn_q.weightF16[3072 3072]
-
blk.11.attn_v.biasF32[256]
-
blk.11.attn_v.weightF16[3072 256]
-
blk.11.attn_norm.biasF32[3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_up.biasF32[12288]
-
blk.11.ffn_up.weightF16[3072 12288]
-
blk.11.ffn_down.biasF32[3072]
-
blk.11.ffn_down.weightF16[12288 3072]
-
blk.11.ffn_norm.biasF32[3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.12.attn_norm.biasF32[3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_up.biasF32[12288]
-
blk.12.ffn_up.weightF16[3072 12288]
-
blk.12.ffn_down.biasF32[3072]
-
blk.12.ffn_down.weightF16[12288 3072]
-
blk.12.ffn_norm.biasF32[3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.attn_k.biasF32[256]
-
blk.12.attn_k.weightF16[3072 256]
-
blk.12.attn_output.biasF32[3072]
-
blk.12.attn_output.weightF16[3072 3072]
-
blk.12.attn_q.biasF32[3072]
-
blk.12.attn_q.weightF16[3072 3072]
-
blk.12.attn_v.biasF32[256]
-
blk.12.attn_v.weightF16[3072 256]
-
blk.13.attn_norm.biasF32[3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_up.biasF32[12288]
-
blk.13.ffn_up.weightF16[3072 12288]
-
blk.13.ffn_down.biasF32[3072]
-
blk.13.ffn_down.weightF16[12288 3072]
-
blk.13.ffn_norm.biasF32[3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.attn_k.biasF32[256]
-
blk.13.attn_k.weightF16[3072 256]
-
blk.13.attn_output.biasF32[3072]
-
blk.13.attn_output.weightF16[3072 3072]
-
blk.13.attn_q.biasF32[3072]
-
blk.13.attn_q.weightF16[3072 3072]
-
blk.13.attn_v.biasF32[256]
-
blk.13.attn_v.weightF16[3072 256]
-
blk.14.attn_norm.biasF32[3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_up.biasF32[12288]
-
blk.14.ffn_up.weightF16[3072 12288]
-
blk.14.ffn_down.biasF32[3072]
-
blk.14.ffn_down.weightF16[12288 3072]
-
blk.14.ffn_norm.biasF32[3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.attn_k.biasF32[256]
-
blk.14.attn_k.weightF16[3072 256]
-
blk.14.attn_output.biasF32[3072]
-
blk.14.attn_output.weightF16[3072 3072]
-
blk.14.attn_q.biasF32[3072]
-
blk.14.attn_q.weightF16[3072 3072]
-
blk.14.attn_v.biasF32[256]
-
blk.14.attn_v.weightF16[3072 256]
-
blk.15.attn_norm.biasF32[3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_up.biasF32[12288]
-
blk.15.ffn_up.weightF16[3072 12288]
-
blk.15.ffn_down.biasF32[3072]
-
blk.15.ffn_down.weightF16[12288 3072]
-
blk.15.ffn_norm.biasF32[3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.attn_k.biasF32[256]
-
blk.15.attn_k.weightF16[3072 256]
-
blk.15.attn_output.biasF32[3072]
-
blk.15.attn_output.weightF16[3072 3072]
-
blk.15.attn_q.biasF32[3072]
-
blk.15.attn_q.weightF16[3072 3072]
-
blk.15.attn_v.biasF32[256]
-
blk.15.attn_v.weightF16[3072 256]
-
blk.16.attn_norm.biasF32[3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_up.biasF32[12288]
-
blk.16.ffn_up.weightF16[3072 12288]
-
blk.16.ffn_down.biasF32[3072]
-
blk.16.ffn_down.weightF16[12288 3072]
-
blk.16.ffn_norm.biasF32[3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.attn_k.biasF32[256]
-
blk.16.attn_k.weightF16[3072 256]
-
blk.16.attn_output.biasF32[3072]
-
blk.16.attn_output.weightF16[3072 3072]
-
blk.16.attn_q.biasF32[3072]
-
blk.16.attn_q.weightF16[3072 3072]
-
blk.16.attn_v.biasF32[256]
-
blk.16.attn_v.weightF16[3072 256]
-
blk.17.attn_norm.biasF32[3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_up.biasF32[12288]
-
blk.17.ffn_up.weightF16[3072 12288]
-
blk.17.ffn_down.biasF32[3072]
-
blk.17.ffn_down.weightF16[12288 3072]
-
blk.17.ffn_norm.biasF32[3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.attn_k.biasF32[256]
-
blk.17.attn_k.weightF16[3072 256]
-
blk.17.attn_output.biasF32[3072]
-
blk.17.attn_output.weightF16[3072 3072]
-
blk.17.attn_q.biasF32[3072]
-
blk.17.attn_q.weightF16[3072 3072]
-
blk.17.attn_v.biasF32[256]
-
blk.17.attn_v.weightF16[3072 256]
-
blk.18.attn_norm.biasF32[3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_up.biasF32[12288]
-
blk.18.ffn_up.weightF16[3072 12288]
-
blk.18.ffn_down.biasF32[3072]
-
blk.18.ffn_down.weightF16[12288 3072]
-
blk.18.ffn_norm.biasF32[3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.attn_k.biasF32[256]
-
blk.18.attn_k.weightF16[3072 256]
-
blk.18.attn_output.biasF32[3072]
-
blk.18.attn_output.weightF16[3072 3072]
-
blk.18.attn_q.biasF32[3072]
-
blk.18.attn_q.weightF16[3072 3072]
-
blk.18.attn_v.biasF32[256]
-
blk.18.attn_v.weightF16[3072 256]
-
blk.19.attn_norm.biasF32[3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_up.biasF32[12288]
-
blk.19.ffn_up.weightF16[3072 12288]
-
blk.19.ffn_down.biasF32[3072]
-
blk.19.ffn_down.weightF16[12288 3072]
-
blk.19.ffn_norm.biasF32[3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.attn_k.biasF32[256]
-
blk.19.attn_k.weightF16[3072 256]
-
blk.19.attn_output.biasF32[3072]
-
blk.19.attn_output.weightF16[3072 3072]
-
blk.19.attn_q.biasF32[3072]
-
blk.19.attn_q.weightF16[3072 3072]
-
blk.19.attn_v.biasF32[256]
-
blk.19.attn_v.weightF16[3072 256]
-
blk.20.attn_norm.biasF32[3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_up.biasF32[12288]
-
blk.20.ffn_up.weightF16[3072 12288]
-
blk.20.ffn_down.biasF32[3072]
-
blk.20.ffn_down.weightF16[12288 3072]
-
blk.20.ffn_norm.biasF32[3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.attn_k.biasF32[256]
-
blk.20.attn_k.weightF16[3072 256]
-
blk.20.attn_output.biasF32[3072]
-
blk.20.attn_output.weightF16[3072 3072]
-
blk.20.attn_q.biasF32[3072]
-
blk.20.attn_q.weightF16[3072 3072]
-
blk.20.attn_v.biasF32[256]
-
blk.20.attn_v.weightF16[3072 256]
-
blk.21.attn_norm.biasF32[3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_up.biasF32[12288]
-
blk.21.ffn_up.weightF16[3072 12288]
-
blk.21.ffn_down.biasF32[3072]
-
blk.21.ffn_down.weightF16[12288 3072]
-
blk.21.ffn_norm.biasF32[3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.attn_k.biasF32[256]
-
blk.21.attn_k.weightF16[3072 256]
-
blk.21.attn_output.biasF32[3072]
-
blk.21.attn_output.weightF16[3072 3072]
-
blk.21.attn_q.biasF32[3072]
-
blk.21.attn_q.weightF16[3072 3072]
-
blk.21.attn_v.biasF32[256]
-
blk.21.attn_v.weightF16[3072 256]
-
blk.22.attn_norm.biasF32[3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_up.biasF32[12288]
-
blk.22.ffn_up.weightF16[3072 12288]
-
blk.22.ffn_down.biasF32[3072]
-
blk.22.ffn_down.weightF16[12288 3072]
-
blk.22.ffn_norm.biasF32[3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.attn_k.biasF32[256]
-
blk.22.attn_k.weightF16[3072 256]
-
blk.22.attn_output.biasF32[3072]
-
blk.22.attn_output.weightF16[3072 3072]
-
blk.22.attn_q.biasF32[3072]
-
blk.22.attn_q.weightF16[3072 3072]
-
blk.22.attn_v.biasF32[256]
-
blk.22.attn_v.weightF16[3072 256]
-
blk.23.attn_norm.biasF32[3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_up.biasF32[12288]
-
blk.23.ffn_up.weightF16[3072 12288]
-
blk.23.ffn_down.biasF32[3072]
-
blk.23.ffn_down.weightF16[12288 3072]
-
blk.23.ffn_norm.biasF32[3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.attn_k.biasF32[256]
-
blk.23.attn_k.weightF16[3072 256]
-
blk.23.attn_output.biasF32[3072]
-
blk.23.attn_output.weightF16[3072 3072]
-
blk.23.attn_q.biasF32[3072]
-
blk.23.attn_q.weightF16[3072 3072]
-
blk.23.attn_v.biasF32[256]
-
blk.23.attn_v.weightF16[3072 256]
-
blk.24.attn_k.biasF32[256]
-
blk.24.attn_k.weightF16[3072 256]
-
blk.24.attn_output.biasF32[3072]
-
blk.24.attn_output.weightF16[3072 3072]
-
blk.24.attn_q.biasF32[3072]
-
blk.24.attn_q.weightF16[3072 3072]
-
blk.24.attn_v.biasF32[256]
-
blk.24.attn_v.weightF16[3072 256]
-
blk.24.attn_norm.biasF32[3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_up.biasF32[12288]
-
blk.24.ffn_up.weightF16[3072 12288]
-
blk.24.ffn_down.biasF32[3072]
-
blk.24.ffn_down.weightF16[12288 3072]
-
blk.24.ffn_norm.biasF32[3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.25.attn_norm.biasF32[3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_up.biasF32[12288]
-
blk.25.ffn_up.weightF16[3072 12288]
-
blk.25.ffn_down.biasF32[3072]
-
blk.25.ffn_down.weightF16[12288 3072]
-
blk.25.ffn_norm.biasF32[3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.attn_k.biasF32[256]
-
blk.25.attn_k.weightF16[3072 256]
-
blk.25.attn_output.biasF32[3072]
-
blk.25.attn_output.weightF16[3072 3072]
-
blk.25.attn_q.biasF32[3072]
-
blk.25.attn_q.weightF16[3072 3072]
-
blk.25.attn_v.biasF32[256]
-
blk.25.attn_v.weightF16[3072 256]
-
blk.26.attn_norm.biasF32[3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_up.biasF32[12288]
-
blk.26.ffn_up.weightF16[3072 12288]
-
blk.26.ffn_down.biasF32[3072]
-
blk.26.ffn_down.weightF16[12288 3072]
-
blk.26.ffn_norm.biasF32[3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.attn_k.biasF32[256]
-
blk.26.attn_k.weightF16[3072 256]
-
blk.26.attn_output.biasF32[3072]
-
blk.26.attn_output.weightF16[3072 3072]
-
blk.26.attn_q.biasF32[3072]
-
blk.26.attn_q.weightF16[3072 3072]
-
blk.26.attn_v.biasF32[256]
-
blk.26.attn_v.weightF16[3072 256]
-
blk.27.attn_norm.biasF32[3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_up.biasF32[12288]
-
blk.27.ffn_up.weightF16[3072 12288]
-
blk.27.ffn_down.biasF32[3072]
-
blk.27.ffn_down.weightF16[12288 3072]
-
blk.27.ffn_norm.biasF32[3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.attn_k.biasF32[256]
-
blk.27.attn_k.weightF16[3072 256]
-
blk.27.attn_output.biasF32[3072]
-
blk.27.attn_output.weightF16[3072 3072]
-
blk.27.attn_q.biasF32[3072]
-
blk.27.attn_q.weightF16[3072 3072]
-
blk.27.attn_v.biasF32[256]
-
blk.27.attn_v.weightF16[3072 256]
-
blk.28.attn_norm.biasF32[3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_up.biasF32[12288]
-
blk.28.ffn_up.weightF16[3072 12288]
-
blk.28.ffn_down.biasF32[3072]
-
blk.28.ffn_down.weightF16[12288 3072]
-
blk.28.ffn_norm.biasF32[3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.attn_k.biasF32[256]
-
blk.28.attn_k.weightF16[3072 256]
-
blk.28.attn_output.biasF32[3072]
-
blk.28.attn_output.weightF16[3072 3072]
-
blk.28.attn_q.biasF32[3072]
-
blk.28.attn_q.weightF16[3072 3072]
-
blk.28.attn_v.biasF32[256]
-
blk.28.attn_v.weightF16[3072 256]
-
blk.29.attn_norm.biasF32[3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_up.biasF32[12288]
-
blk.29.ffn_up.weightF16[3072 12288]
-
blk.29.ffn_down.biasF32[3072]
-
blk.29.ffn_down.weightF16[12288 3072]
-
blk.29.ffn_norm.biasF32[3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.attn_k.biasF32[256]
-
blk.29.attn_k.weightF16[3072 256]
-
blk.29.attn_output.biasF32[3072]
-
blk.29.attn_output.weightF16[3072 3072]
-
blk.29.attn_q.biasF32[3072]
-
blk.29.attn_q.weightF16[3072 3072]
-
blk.29.attn_v.biasF32[256]
-
blk.29.attn_v.weightF16[3072 256]
-
output_norm.biasF32[3072]
-
output_norm.weightF32[3072]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29