latest
6.1GB
Finetuned on the https://huggingface.co/datasets/oakela/dlt-github-2 dataset
3B
20 Pulls Updated 6 months ago
9e68b7633174 · 6.1GB
-
general.architecturestarcoder2
-
general.file_typeF16
-
starcoder2.attention.head_count24
-
starcoder2.attention.head_count_kv2
-
starcoder2.attention.layer_norm_epsilon1e-05
-
starcoder2.block_count30
-
starcoder2.context_length16384
-
starcoder2.embedding_length3072
-
starcoder2.feed_forward_length12288
-
starcoder2.rope.freq_base999999.44
-
tokenizer.ggml.bos_token_id0
-
tokenizer.ggml.eos_token_id0
-
tokenizer.ggml.merges[Ġ Ġ ĠĠ ĠĠ ĠĠĠĠ ĠĠĠĠ ĠĠ Ġ e r ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.token_type[3 3 3 3 3 ...]
-
tokenizer.ggml.tokens[<|endoftext|> <fim_prefix> <fim_middle> <fim_suffix> <fim_pad> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[3072 49152]
-
blk.0.attn_norm.biasF32[3072]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_up.biasF32[12288]
-
blk.0.ffn_up.weightF16[3072 12288]
-
blk.0.ffn_down.biasF32[3072]
-
blk.0.ffn_down.weightF16[12288 3072]
-
blk.0.ffn_norm.biasF32[3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.attn_k.biasF32[256]
-
blk.0.attn_k.weightF16[3072 256]
-
blk.0.attn_output.biasF32[3072]
-
blk.0.attn_output.weightF16[3072 3072]
-
blk.0.attn_q.biasF32[3072]
-
blk.0.attn_q.weightF16[3072 3072]
-
blk.0.attn_v.biasF32[256]
-
blk.0.attn_v.weightF16[3072 256]
-
blk.1.attn_norm.biasF32[3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_up.biasF32[12288]
-
blk.1.ffn_up.weightF16[3072 12288]
-
blk.1.ffn_down.biasF32[3072]
-
blk.1.ffn_down.weightF16[12288 3072]
-
blk.1.ffn_norm.biasF32[3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.attn_k.biasF32[256]
-
blk.1.attn_k.weightF16[3072 256]
-
blk.1.attn_output.biasF32[3072]
-
blk.1.attn_output.weightF16[3072 3072]
-
blk.1.attn_q.biasF32[3072]
-
blk.1.attn_q.weightF16[3072 3072]
-
blk.1.attn_v.biasF32[256]
-
blk.1.attn_v.weightF16[3072 256]
-
blk.2.attn_norm.biasF32[3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_up.biasF32[12288]
-
blk.2.ffn_up.weightF16[3072 12288]
-
blk.2.ffn_down.biasF32[3072]
-
blk.2.ffn_down.weightF16[12288 3072]
-
blk.2.ffn_norm.biasF32[3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.attn_k.biasF32[256]
-
blk.2.attn_k.weightF16[3072 256]
-
blk.2.attn_output.biasF32[3072]
-
blk.2.attn_output.weightF16[3072 3072]
-
blk.2.attn_q.biasF32[3072]
-
blk.2.attn_q.weightF16[3072 3072]
-
blk.2.attn_v.biasF32[256]
-
blk.2.attn_v.weightF16[3072 256]
-
blk.3.attn_norm.biasF32[3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_up.biasF32[12288]
-
blk.3.ffn_up.weightF16[3072 12288]
-
blk.3.ffn_down.biasF32[3072]
-
blk.3.ffn_down.weightF16[12288 3072]
-
blk.3.ffn_norm.biasF32[3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.attn_k.biasF32[256]
-
blk.3.attn_k.weightF16[3072 256]
-
blk.3.attn_output.biasF32[3072]
-
blk.3.attn_output.weightF16[3072 3072]
-
blk.3.attn_q.biasF32[3072]
-
blk.3.attn_q.weightF16[3072 3072]
-
blk.3.attn_v.biasF32[256]
-
blk.3.attn_v.weightF16[3072 256]
-
blk.4.attn_norm.biasF32[3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_up.biasF32[12288]
-
blk.4.ffn_up.weightF16[3072 12288]
-
blk.4.ffn_down.biasF32[3072]
-
blk.4.ffn_down.weightF16[12288 3072]
-
blk.4.ffn_norm.biasF32[3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.attn_k.biasF32[256]
-
blk.4.attn_k.weightF16[3072 256]
-
blk.4.attn_output.biasF32[3072]
-
blk.4.attn_output.weightF16[3072 3072]
-
blk.4.attn_q.biasF32[3072]
-
blk.4.attn_q.weightF16[3072 3072]
-
blk.4.attn_v.biasF32[256]
-
blk.4.attn_v.weightF16[3072 256]
-
blk.5.attn_norm.biasF32[3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_up.biasF32[12288]
-
blk.5.ffn_up.weightF16[3072 12288]
-
blk.5.ffn_down.biasF32[3072]
-
blk.5.ffn_down.weightF16[12288 3072]
-
blk.5.ffn_norm.biasF32[3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.attn_k.biasF32[256]
-
blk.5.attn_k.weightF16[3072 256]
-
blk.5.attn_output.biasF32[3072]
-
blk.5.attn_output.weightF16[3072 3072]
-
blk.5.attn_q.biasF32[3072]
-
blk.5.attn_q.weightF16[3072 3072]
-
blk.5.attn_v.biasF32[256]
-
blk.5.attn_v.weightF16[3072 256]
-
blk.6.attn_norm.biasF32[3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_up.biasF32[12288]
-
blk.6.ffn_up.weightF16[3072 12288]
-
blk.6.ffn_down.biasF32[3072]
-
blk.6.ffn_down.weightF16[12288 3072]
-
blk.6.ffn_norm.biasF32[3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.attn_k.biasF32[256]
-
blk.6.attn_k.weightF16[3072 256]
-
blk.6.attn_output.biasF32[3072]
-
blk.6.attn_output.weightF16[3072 3072]
-
blk.6.attn_q.biasF32[3072]
-
blk.6.attn_q.weightF16[3072 3072]
-
blk.6.attn_v.biasF32[256]
-
blk.6.attn_v.weightF16[3072 256]
-
blk.7.attn_norm.biasF32[3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_up.biasF32[12288]
-
blk.7.ffn_up.weightF16[3072 12288]
-
blk.7.ffn_down.biasF32[3072]
-
blk.7.ffn_down.weightF16[12288 3072]
-
blk.7.ffn_norm.biasF32[3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.attn_k.biasF32[256]
-
blk.7.attn_k.weightF16[3072 256]
-
blk.7.attn_output.biasF32[3072]
-
blk.7.attn_output.weightF16[3072 3072]
-
blk.7.attn_q.biasF32[3072]
-
blk.7.attn_q.weightF16[3072 3072]
-
blk.7.attn_v.biasF32[256]
-
blk.7.attn_v.weightF16[3072 256]
-
blk.8.attn_norm.biasF32[3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_up.biasF32[12288]
-
blk.8.ffn_up.weightF16[3072 12288]
-
blk.8.ffn_down.biasF32[3072]
-
blk.8.ffn_down.weightF16[12288 3072]
-
blk.8.ffn_norm.biasF32[3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.attn_k.biasF32[256]
-
blk.8.attn_k.weightF16[3072 256]
-
blk.8.attn_output.biasF32[3072]
-
blk.8.attn_output.weightF16[3072 3072]
-
blk.8.attn_q.biasF32[3072]
-
blk.8.attn_q.weightF16[3072 3072]
-
blk.8.attn_v.biasF32[256]
-
blk.8.attn_v.weightF16[3072 256]
-
blk.9.attn_norm.biasF32[3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_up.biasF32[12288]
-
blk.9.ffn_up.weightF16[3072 12288]
-
blk.9.ffn_down.biasF32[3072]
-
blk.9.ffn_down.weightF16[12288 3072]
-
blk.9.ffn_norm.biasF32[3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.attn_k.biasF32[256]
-
blk.9.attn_k.weightF16[3072 256]
-
blk.9.attn_output.biasF32[3072]
-
blk.9.attn_output.weightF16[3072 3072]
-
blk.9.attn_q.biasF32[3072]
-
blk.9.attn_q.weightF16[3072 3072]
-
blk.9.attn_v.biasF32[256]
-
blk.9.attn_v.weightF16[3072 256]
-
blk.10.attn_norm.biasF32[3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_up.biasF32[12288]
-
blk.10.ffn_up.weightF16[3072 12288]
-
blk.10.ffn_down.biasF32[3072]
-
blk.10.ffn_down.weightF16[12288 3072]
-
blk.10.ffn_norm.biasF32[3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.attn_k.biasF32[256]
-
blk.10.attn_k.weightF16[3072 256]
-
blk.10.attn_output.biasF32[3072]
-
blk.10.attn_output.weightF16[3072 3072]
-
blk.10.attn_q.biasF32[3072]
-
blk.10.attn_q.weightF16[3072 3072]
-
blk.10.attn_v.biasF32[256]
-
blk.10.attn_v.weightF16[3072 256]
-
blk.11.attn_k.biasF32[256]
-
blk.11.attn_k.weightF16[3072 256]
-
blk.11.attn_output.biasF32[3072]
-
blk.11.attn_output.weightF16[3072 3072]
-
blk.11.attn_q.biasF32[3072]
-
blk.11.attn_q.weightF16[3072 3072]
-
blk.11.attn_v.biasF32[256]
-
blk.11.attn_v.weightF16[3072 256]
-
blk.11.attn_norm.biasF32[3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_up.biasF32[12288]
-
blk.11.ffn_up.weightF16[3072 12288]
-
blk.11.ffn_down.biasF32[3072]
-
blk.11.ffn_down.weightF16[12288 3072]
-
blk.11.ffn_norm.biasF32[3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.12.attn_norm.biasF32[3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_up.biasF32[12288]
-
blk.12.ffn_up.weightF16[3072 12288]
-
blk.12.ffn_down.biasF32[3072]
-
blk.12.ffn_down.weightF16[12288 3072]
-
blk.12.ffn_norm.biasF32[3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.attn_k.biasF32[256]
-
blk.12.attn_k.weightF16[3072 256]
-
blk.12.attn_output.biasF32[3072]
-
blk.12.attn_output.weightF16[3072 3072]
-
blk.12.attn_q.biasF32[3072]
-
blk.12.attn_q.weightF16[3072 3072]
-
blk.12.attn_v.biasF32[256]
-
blk.12.attn_v.weightF16[3072 256]
-
blk.13.attn_norm.biasF32[3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_up.biasF32[12288]
-
blk.13.ffn_up.weightF16[3072 12288]
-
blk.13.ffn_down.biasF32[3072]
-
blk.13.ffn_down.weightF16[12288 3072]
-
blk.13.ffn_norm.biasF32[3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.attn_k.biasF32[256]
-
blk.13.attn_k.weightF16[3072 256]
-
blk.13.attn_output.biasF32[3072]
-
blk.13.attn_output.weightF16[3072 3072]
-
blk.13.attn_q.biasF32[3072]
-
blk.13.attn_q.weightF16[3072 3072]
-
blk.13.attn_v.biasF32[256]
-
blk.13.attn_v.weightF16[3072 256]
-
blk.14.attn_norm.biasF32[3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_up.biasF32[12288]
-
blk.14.ffn_up.weightF16[3072 12288]
-
blk.14.ffn_down.biasF32[3072]
-
blk.14.ffn_down.weightF16[12288 3072]
-
blk.14.ffn_norm.biasF32[3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.attn_k.biasF32[256]
-
blk.14.attn_k.weightF16[3072 256]
-
blk.14.attn_output.biasF32[3072]
-
blk.14.attn_output.weightF16[3072 3072]
-
blk.14.attn_q.biasF32[3072]
-
blk.14.attn_q.weightF16[3072 3072]
-
blk.14.attn_v.biasF32[256]
-
blk.14.attn_v.weightF16[3072 256]
-
blk.15.attn_norm.biasF32[3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_up.biasF32[12288]
-
blk.15.ffn_up.weightF16[3072 12288]
-
blk.15.ffn_down.biasF32[3072]
-
blk.15.ffn_down.weightF16[12288 3072]
-
blk.15.ffn_norm.biasF32[3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.attn_k.biasF32[256]
-
blk.15.attn_k.weightF16[3072 256]
-
blk.15.attn_output.biasF32[3072]
-
blk.15.attn_output.weightF16[3072 3072]
-
blk.15.attn_q.biasF32[3072]
-
blk.15.attn_q.weightF16[3072 3072]
-
blk.15.attn_v.biasF32[256]
-
blk.15.attn_v.weightF16[3072 256]
-
blk.16.attn_norm.biasF32[3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_up.biasF32[12288]
-
blk.16.ffn_up.weightF16[3072 12288]
-
blk.16.ffn_down.biasF32[3072]
-
blk.16.ffn_down.weightF16[12288 3072]
-
blk.16.ffn_norm.biasF32[3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.attn_k.biasF32[256]
-
blk.16.attn_k.weightF16[3072 256]
-
blk.16.attn_output.biasF32[3072]
-
blk.16.attn_output.weightF16[3072 3072]
-
blk.16.attn_q.biasF32[3072]
-
blk.16.attn_q.weightF16[3072 3072]
-
blk.16.attn_v.biasF32[256]
-
blk.16.attn_v.weightF16[3072 256]
-
blk.17.attn_norm.biasF32[3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_up.biasF32[12288]
-
blk.17.ffn_up.weightF16[3072 12288]
-
blk.17.ffn_down.biasF32[3072]
-
blk.17.ffn_down.weightF16[12288 3072]
-
blk.17.ffn_norm.biasF32[3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.attn_k.biasF32[256]
-
blk.17.attn_k.weightF16[3072 256]
-
blk.17.attn_output.biasF32[3072]
-
blk.17.attn_output.weightF16[3072 3072]
-
blk.17.attn_q.biasF32[3072]
-
blk.17.attn_q.weightF16[3072 3072]
-
blk.17.attn_v.biasF32[256]
-
blk.17.attn_v.weightF16[3072 256]
-
blk.18.attn_norm.biasF32[3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_up.biasF32[12288]
-
blk.18.ffn_up.weightF16[3072 12288]
-
blk.18.ffn_down.biasF32[3072]
-
blk.18.ffn_down.weightF16[12288 3072]
-
blk.18.ffn_norm.biasF32[3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.attn_k.biasF32[256]
-
blk.18.attn_k.weightF16[3072 256]
-
blk.18.attn_output.biasF32[3072]
-
blk.18.attn_output.weightF16[3072 3072]
-
blk.18.attn_q.biasF32[3072]
-
blk.18.attn_q.weightF16[3072 3072]
-
blk.18.attn_v.biasF32[256]
-
blk.18.attn_v.weightF16[3072 256]
-
blk.19.attn_norm.biasF32[3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_up.biasF32[12288]
-
blk.19.ffn_up.weightF16[3072 12288]
-
blk.19.ffn_down.biasF32[3072]
-
blk.19.ffn_down.weightF16[12288 3072]
-
blk.19.ffn_norm.biasF32[3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.attn_k.biasF32[256]
-
blk.19.attn_k.weightF16[3072 256]
-
blk.19.attn_output.biasF32[3072]
-
blk.19.attn_output.weightF16[3072 3072]
-
blk.19.attn_q.biasF32[3072]
-
blk.19.attn_q.weightF16[3072 3072]
-
blk.19.attn_v.biasF32[256]
-
blk.19.attn_v.weightF16[3072 256]
-
blk.20.attn_norm.biasF32[3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_up.biasF32[12288]
-
blk.20.ffn_up.weightF16[3072 12288]
-
blk.20.ffn_down.biasF32[3072]
-
blk.20.ffn_down.weightF16[12288 3072]
-
blk.20.ffn_norm.biasF32[3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.attn_k.biasF32[256]
-
blk.20.attn_k.weightF16[3072 256]
-
blk.20.attn_output.biasF32[3072]
-
blk.20.attn_output.weightF16[3072 3072]
-
blk.20.attn_q.biasF32[3072]
-
blk.20.attn_q.weightF16[3072 3072]
-
blk.20.attn_v.biasF32[256]
-
blk.20.attn_v.weightF16[3072 256]
-
blk.21.attn_norm.biasF32[3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_up.biasF32[12288]
-
blk.21.ffn_up.weightF16[3072 12288]
-
blk.21.ffn_down.biasF32[3072]
-
blk.21.ffn_down.weightF16[12288 3072]
-
blk.21.ffn_norm.biasF32[3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.attn_k.biasF32[256]
-
blk.21.attn_k.weightF16[3072 256]
-
blk.21.attn_output.biasF32[3072]
-
blk.21.attn_output.weightF16[3072 3072]
-
blk.21.attn_q.biasF32[3072]
-
blk.21.attn_q.weightF16[3072 3072]
-
blk.21.attn_v.biasF32[256]
-
blk.21.attn_v.weightF16[3072 256]
-
blk.22.attn_norm.biasF32[3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_up.biasF32[12288]
-
blk.22.ffn_up.weightF16[3072 12288]
-
blk.22.ffn_down.biasF32[3072]
-
blk.22.ffn_down.weightF16[12288 3072]
-
blk.22.ffn_norm.biasF32[3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.attn_k.biasF32[256]
-
blk.22.attn_k.weightF16[3072 256]
-
blk.22.attn_output.biasF32[3072]
-
blk.22.attn_output.weightF16[3072 3072]
-
blk.22.attn_q.biasF32[3072]
-
blk.22.attn_q.weightF16[3072 3072]
-
blk.22.attn_v.biasF32[256]
-
blk.22.attn_v.weightF16[3072 256]
-
blk.23.attn_norm.biasF32[3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_up.biasF32[12288]
-
blk.23.ffn_up.weightF16[3072 12288]
-
blk.23.ffn_down.biasF32[3072]
-
blk.23.ffn_down.weightF16[12288 3072]
-
blk.23.ffn_norm.biasF32[3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.attn_k.biasF32[256]
-
blk.23.attn_k.weightF16[3072 256]
-
blk.23.attn_output.biasF32[3072]
-
blk.23.attn_output.weightF16[3072 3072]
-
blk.23.attn_q.biasF32[3072]
-
blk.23.attn_q.weightF16[3072 3072]
-
blk.23.attn_v.biasF32[256]
-
blk.23.attn_v.weightF16[3072 256]
-
blk.24.attn_k.biasF32[256]
-
blk.24.attn_k.weightF16[3072 256]
-
blk.24.attn_output.biasF32[3072]
-
blk.24.attn_output.weightF16[3072 3072]
-
blk.24.attn_q.biasF32[3072]
-
blk.24.attn_q.weightF16[3072 3072]
-
blk.24.attn_v.biasF32[256]
-
blk.24.attn_v.weightF16[3072 256]
-
blk.24.attn_norm.biasF32[3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_up.biasF32[12288]
-
blk.24.ffn_up.weightF16[3072 12288]
-
blk.24.ffn_down.biasF32[3072]
-
blk.24.ffn_down.weightF16[12288 3072]
-
blk.24.ffn_norm.biasF32[3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.25.attn_norm.biasF32[3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_up.biasF32[12288]
-
blk.25.ffn_up.weightF16[3072 12288]
-
blk.25.ffn_down.biasF32[3072]
-
blk.25.ffn_down.weightF16[12288 3072]
-
blk.25.ffn_norm.biasF32[3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.attn_k.biasF32[256]
-
blk.25.attn_k.weightF16[3072 256]
-
blk.25.attn_output.biasF32[3072]
-
blk.25.attn_output.weightF16[3072 3072]
-
blk.25.attn_q.biasF32[3072]
-
blk.25.attn_q.weightF16[3072 3072]
-
blk.25.attn_v.biasF32[256]
-
blk.25.attn_v.weightF16[3072 256]
-
blk.26.attn_norm.biasF32[3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_up.biasF32[12288]
-
blk.26.ffn_up.weightF16[3072 12288]
-
blk.26.ffn_down.biasF32[3072]
-
blk.26.ffn_down.weightF16[12288 3072]
-
blk.26.ffn_norm.biasF32[3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.attn_k.biasF32[256]
-
blk.26.attn_k.weightF16[3072 256]
-
blk.26.attn_output.biasF32[3072]
-
blk.26.attn_output.weightF16[3072 3072]
-
blk.26.attn_q.biasF32[3072]
-
blk.26.attn_q.weightF16[3072 3072]
-
blk.26.attn_v.biasF32[256]
-
blk.26.attn_v.weightF16[3072 256]
-
blk.27.attn_norm.biasF32[3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_up.biasF32[12288]
-
blk.27.ffn_up.weightF16[3072 12288]
-
blk.27.ffn_down.biasF32[3072]
-
blk.27.ffn_down.weightF16[12288 3072]
-
blk.27.ffn_norm.biasF32[3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.attn_k.biasF32[256]
-
blk.27.attn_k.weightF16[3072 256]
-
blk.27.attn_output.biasF32[3072]
-
blk.27.attn_output.weightF16[3072 3072]
-
blk.27.attn_q.biasF32[3072]
-
blk.27.attn_q.weightF16[3072 3072]
-
blk.27.attn_v.biasF32[256]
-
blk.27.attn_v.weightF16[3072 256]
-
blk.28.attn_norm.biasF32[3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_up.biasF32[12288]
-
blk.28.ffn_up.weightF16[3072 12288]
-
blk.28.ffn_down.biasF32[3072]
-
blk.28.ffn_down.weightF16[12288 3072]
-
blk.28.ffn_norm.biasF32[3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.attn_k.biasF32[256]
-
blk.28.attn_k.weightF16[3072 256]
-
blk.28.attn_output.biasF32[3072]
-
blk.28.attn_output.weightF16[3072 3072]
-
blk.28.attn_q.biasF32[3072]
-
blk.28.attn_q.weightF16[3072 3072]
-
blk.28.attn_v.biasF32[256]
-
blk.28.attn_v.weightF16[3072 256]
-
blk.29.attn_norm.biasF32[3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_up.biasF32[12288]
-
blk.29.ffn_up.weightF16[3072 12288]
-
blk.29.ffn_down.biasF32[3072]
-
blk.29.ffn_down.weightF16[12288 3072]
-
blk.29.ffn_norm.biasF32[3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.attn_k.biasF32[256]
-
blk.29.attn_k.weightF16[3072 256]
-
blk.29.attn_output.biasF32[3072]
-
blk.29.attn_output.weightF16[3072 3072]
-
blk.29.attn_q.biasF32[3072]
-
blk.29.attn_q.weightF16[3072 3072]
-
blk.29.attn_v.biasF32[256]
-
blk.29.attn_v.weightF16[3072 256]
-
output_norm.biasF32[3072]
-
output_norm.weightF32[3072]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29