StarChat is a series of language models that are trained to act as helpful coding assistants.
80 Pulls Updated 8 months ago
28f7bac9dc65 · 32GB
-
general.architecturestarcoder
-
general.file_typeF16
-
starcoder.attention.head_count48
-
starcoder.attention.head_count_kv1
-
starcoder.attention.layer_norm_epsilon1e-05
-
starcoder.block_count40
-
starcoder.context_length8192
-
starcoder.embedding_length6144
-
starcoder.feed_forward_length24576
-
tokenizer.ggml.bos_token_id0
-
tokenizer.ggml.eos_token_id0
-
tokenizer.ggml.merges[Ġ Ġ ĠĠ ĠĠ ĠĠĠĠ ĠĠĠĠ ĠĠ Ġ e r ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.scores[0 0 0 0 0 ...]
-
tokenizer.ggml.token_type[1 1 1 1 1 ...]
-
tokenizer.ggml.tokens[<|endoftext|> <fim_prefix> <fim_middle> <fim_suffix> <fim_pad> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[6144 49156]
-
position_embd.weightF16[6144 8192]
-
blk.0.attn_norm.weightF32[6144]
-
blk.0.attn_norm.biasF32[6144]
-
blk.0.attn_qkv.weightF16[6144 6400]
-
blk.0.attn_qkv.biasF32[6400]
-
blk.0.attn_output.weightF16[6144 6144]
-
blk.0.attn_output.biasF32[6144]
-
blk.0.ffn_norm.weightF32[6144]
-
blk.0.ffn_norm.biasF32[6144]
-
blk.0.ffn_up.weightF16[6144 24576]
-
blk.0.ffn_up.biasF32[24576]
-
blk.0.ffn_down.weightF16[24576 6144]
-
blk.0.ffn_down.biasF32[6144]
-
blk.1.attn_norm.weightF32[6144]
-
blk.1.attn_norm.biasF32[6144]
-
blk.1.attn_qkv.weightF16[6144 6400]
-
blk.1.attn_qkv.biasF32[6400]
-
blk.1.attn_output.weightF16[6144 6144]
-
blk.1.attn_output.biasF32[6144]
-
blk.1.ffn_norm.weightF32[6144]
-
blk.1.ffn_norm.biasF32[6144]
-
blk.1.ffn_up.weightF16[6144 24576]
-
blk.1.ffn_up.biasF32[24576]
-
blk.1.ffn_down.weightF16[24576 6144]
-
blk.1.ffn_down.biasF32[6144]
-
blk.2.attn_norm.weightF32[6144]
-
blk.2.attn_norm.biasF32[6144]
-
blk.2.attn_qkv.weightF16[6144 6400]
-
blk.2.attn_qkv.biasF32[6400]
-
blk.2.attn_output.weightF16[6144 6144]
-
blk.2.attn_output.biasF32[6144]
-
blk.2.ffn_norm.weightF32[6144]
-
blk.2.ffn_norm.biasF32[6144]
-
blk.2.ffn_up.weightF16[6144 24576]
-
blk.2.ffn_up.biasF32[24576]
-
blk.2.ffn_down.weightF16[24576 6144]
-
blk.2.ffn_down.biasF32[6144]
-
blk.3.attn_norm.weightF32[6144]
-
blk.3.attn_norm.biasF32[6144]
-
blk.3.attn_qkv.weightF16[6144 6400]
-
blk.3.attn_qkv.biasF32[6400]
-
blk.3.attn_output.weightF16[6144 6144]
-
blk.3.attn_output.biasF32[6144]
-
blk.3.ffn_norm.weightF32[6144]
-
blk.3.ffn_norm.biasF32[6144]
-
blk.3.ffn_up.weightF16[6144 24576]
-
blk.3.ffn_up.biasF32[24576]
-
blk.3.ffn_down.weightF16[24576 6144]
-
blk.3.ffn_down.biasF32[6144]
-
blk.4.attn_norm.weightF32[6144]
-
blk.4.attn_norm.biasF32[6144]
-
blk.4.attn_qkv.weightF16[6144 6400]
-
blk.4.attn_qkv.biasF32[6400]
-
blk.4.attn_output.weightF16[6144 6144]
-
blk.4.attn_output.biasF32[6144]
-
blk.4.ffn_norm.weightF32[6144]
-
blk.4.ffn_norm.biasF32[6144]
-
blk.4.ffn_up.weightF16[6144 24576]
-
blk.4.ffn_up.biasF32[24576]
-
blk.4.ffn_down.weightF16[24576 6144]
-
blk.4.ffn_down.biasF32[6144]
-
blk.5.attn_norm.weightF32[6144]
-
blk.5.attn_norm.biasF32[6144]
-
blk.5.attn_qkv.weightF16[6144 6400]
-
blk.5.attn_qkv.biasF32[6400]
-
blk.5.attn_output.weightF16[6144 6144]
-
blk.5.attn_output.biasF32[6144]
-
blk.5.ffn_norm.weightF32[6144]
-
blk.5.ffn_norm.biasF32[6144]
-
blk.5.ffn_up.weightF16[6144 24576]
-
blk.5.ffn_up.biasF32[24576]
-
blk.5.ffn_down.weightF16[24576 6144]
-
blk.5.ffn_down.biasF32[6144]
-
blk.6.attn_norm.weightF32[6144]
-
blk.6.attn_norm.biasF32[6144]
-
blk.6.attn_qkv.weightF16[6144 6400]
-
blk.6.attn_qkv.biasF32[6400]
-
blk.6.attn_output.weightF16[6144 6144]
-
blk.6.attn_output.biasF32[6144]
-
blk.6.ffn_norm.weightF32[6144]
-
blk.6.ffn_norm.biasF32[6144]
-
blk.6.ffn_up.weightF16[6144 24576]
-
blk.6.ffn_up.biasF32[24576]
-
blk.6.ffn_down.weightF16[24576 6144]
-
blk.6.ffn_down.biasF32[6144]
-
blk.7.attn_norm.weightF32[6144]
-
blk.7.attn_norm.biasF32[6144]
-
blk.7.attn_qkv.weightF16[6144 6400]
-
blk.7.attn_qkv.biasF32[6400]
-
blk.7.attn_output.weightF16[6144 6144]
-
blk.7.attn_output.biasF32[6144]
-
blk.7.ffn_norm.weightF32[6144]
-
blk.7.ffn_norm.biasF32[6144]
-
blk.7.ffn_up.weightF16[6144 24576]
-
blk.7.ffn_up.biasF32[24576]
-
blk.7.ffn_down.weightF16[24576 6144]
-
blk.7.ffn_down.biasF32[6144]
-
blk.8.attn_norm.weightF32[6144]
-
blk.8.attn_norm.biasF32[6144]
-
blk.8.attn_qkv.weightF16[6144 6400]
-
blk.8.attn_qkv.biasF32[6400]
-
blk.8.attn_output.weightF16[6144 6144]
-
blk.8.attn_output.biasF32[6144]
-
blk.8.ffn_norm.weightF32[6144]
-
blk.8.ffn_norm.biasF32[6144]
-
blk.8.ffn_up.weightF16[6144 24576]
-
blk.8.ffn_up.biasF32[24576]
-
blk.8.ffn_down.weightF16[24576 6144]
-
blk.8.ffn_down.biasF32[6144]
-
blk.9.attn_norm.weightF32[6144]
-
blk.9.attn_norm.biasF32[6144]
-
blk.9.attn_qkv.weightF16[6144 6400]
-
blk.9.attn_qkv.biasF32[6400]
-
blk.9.attn_output.weightF16[6144 6144]
-
blk.9.attn_output.biasF32[6144]
-
blk.9.ffn_norm.weightF32[6144]
-
blk.9.ffn_norm.biasF32[6144]
-
blk.9.ffn_up.weightF16[6144 24576]
-
blk.9.ffn_up.biasF32[24576]
-
blk.9.ffn_down.weightF16[24576 6144]
-
blk.9.ffn_down.biasF32[6144]
-
blk.10.attn_norm.weightF32[6144]
-
blk.10.attn_norm.biasF32[6144]
-
blk.10.attn_qkv.weightF16[6144 6400]
-
blk.10.attn_qkv.biasF32[6400]
-
blk.10.attn_output.weightF16[6144 6144]
-
blk.10.attn_output.biasF32[6144]
-
blk.10.ffn_norm.weightF32[6144]
-
blk.10.ffn_norm.biasF32[6144]
-
blk.10.ffn_up.weightF16[6144 24576]
-
blk.10.ffn_up.biasF32[24576]
-
blk.10.ffn_down.weightF16[24576 6144]
-
blk.10.ffn_down.biasF32[6144]
-
blk.11.attn_norm.weightF32[6144]
-
blk.11.attn_norm.biasF32[6144]
-
blk.11.attn_qkv.weightF16[6144 6400]
-
blk.11.attn_qkv.biasF32[6400]
-
blk.11.attn_output.weightF16[6144 6144]
-
blk.11.attn_output.biasF32[6144]
-
blk.11.ffn_norm.weightF32[6144]
-
blk.11.ffn_norm.biasF32[6144]
-
blk.11.ffn_up.weightF16[6144 24576]
-
blk.11.ffn_up.biasF32[24576]
-
blk.11.ffn_down.weightF16[24576 6144]
-
blk.11.ffn_down.biasF32[6144]
-
blk.12.attn_norm.weightF32[6144]
-
blk.12.attn_norm.biasF32[6144]
-
blk.12.attn_qkv.weightF16[6144 6400]
-
blk.12.attn_qkv.biasF32[6400]
-
blk.12.attn_output.weightF16[6144 6144]
-
blk.12.attn_output.biasF32[6144]
-
blk.12.ffn_norm.weightF32[6144]
-
blk.12.ffn_norm.biasF32[6144]
-
blk.12.ffn_up.weightF16[6144 24576]
-
blk.12.ffn_up.biasF32[24576]
-
blk.12.ffn_down.weightF16[24576 6144]
-
blk.12.ffn_down.biasF32[6144]
-
blk.13.attn_norm.weightF32[6144]
-
blk.13.attn_norm.biasF32[6144]
-
blk.13.attn_qkv.weightF16[6144 6400]
-
blk.13.attn_qkv.biasF32[6400]
-
blk.13.attn_output.weightF16[6144 6144]
-
blk.13.attn_output.biasF32[6144]
-
blk.13.ffn_norm.weightF32[6144]
-
blk.13.ffn_norm.biasF32[6144]
-
blk.13.ffn_up.weightF16[6144 24576]
-
blk.13.ffn_up.biasF32[24576]
-
blk.13.ffn_down.weightF16[24576 6144]
-
blk.13.ffn_down.biasF32[6144]
-
blk.14.attn_norm.weightF32[6144]
-
blk.14.attn_norm.biasF32[6144]
-
blk.14.attn_qkv.weightF16[6144 6400]
-
blk.14.attn_qkv.biasF32[6400]
-
blk.14.attn_output.weightF16[6144 6144]
-
blk.14.attn_output.biasF32[6144]
-
blk.14.ffn_norm.weightF32[6144]
-
blk.14.ffn_norm.biasF32[6144]
-
blk.14.ffn_up.weightF16[6144 24576]
-
blk.14.ffn_up.biasF32[24576]
-
blk.14.ffn_down.weightF16[24576 6144]
-
blk.14.ffn_down.biasF32[6144]
-
blk.15.attn_norm.weightF32[6144]
-
blk.15.attn_norm.biasF32[6144]
-
blk.15.attn_qkv.weightF16[6144 6400]
-
blk.15.attn_qkv.biasF32[6400]
-
blk.15.attn_output.weightF16[6144 6144]
-
blk.15.attn_output.biasF32[6144]
-
blk.15.ffn_norm.weightF32[6144]
-
blk.15.ffn_norm.biasF32[6144]
-
blk.15.ffn_up.weightF16[6144 24576]
-
blk.15.ffn_up.biasF32[24576]
-
blk.15.ffn_down.weightF16[24576 6144]
-
blk.15.ffn_down.biasF32[6144]
-
blk.16.attn_norm.weightF32[6144]
-
blk.16.attn_norm.biasF32[6144]
-
blk.16.attn_qkv.weightF16[6144 6400]
-
blk.16.attn_qkv.biasF32[6400]
-
blk.16.attn_output.weightF16[6144 6144]
-
blk.16.attn_output.biasF32[6144]
-
blk.16.ffn_norm.weightF32[6144]
-
blk.16.ffn_norm.biasF32[6144]
-
blk.16.ffn_up.weightF16[6144 24576]
-
blk.16.ffn_up.biasF32[24576]
-
blk.16.ffn_down.weightF16[24576 6144]
-
blk.16.ffn_down.biasF32[6144]
-
blk.17.attn_norm.weightF32[6144]
-
blk.17.attn_norm.biasF32[6144]
-
blk.17.attn_qkv.weightF16[6144 6400]
-
blk.17.attn_qkv.biasF32[6400]
-
blk.17.attn_output.weightF16[6144 6144]
-
blk.17.attn_output.biasF32[6144]
-
blk.17.ffn_norm.weightF32[6144]
-
blk.17.ffn_norm.biasF32[6144]
-
blk.17.ffn_up.weightF16[6144 24576]
-
blk.17.ffn_up.biasF32[24576]
-
blk.17.ffn_down.weightF16[24576 6144]
-
blk.17.ffn_down.biasF32[6144]
-
blk.18.attn_norm.weightF32[6144]
-
blk.18.attn_norm.biasF32[6144]
-
blk.18.attn_qkv.weightF16[6144 6400]
-
blk.18.attn_qkv.biasF32[6400]
-
blk.18.attn_output.weightF16[6144 6144]
-
blk.18.attn_output.biasF32[6144]
-
blk.18.ffn_norm.weightF32[6144]
-
blk.18.ffn_norm.biasF32[6144]
-
blk.18.ffn_up.weightF16[6144 24576]
-
blk.18.ffn_up.biasF32[24576]
-
blk.18.ffn_down.weightF16[24576 6144]
-
blk.18.ffn_down.biasF32[6144]
-
blk.19.attn_norm.weightF32[6144]
-
blk.19.attn_norm.biasF32[6144]
-
blk.19.attn_qkv.weightF16[6144 6400]
-
blk.19.attn_qkv.biasF32[6400]
-
blk.19.attn_output.weightF16[6144 6144]
-
blk.19.attn_output.biasF32[6144]
-
blk.19.ffn_norm.weightF32[6144]
-
blk.19.ffn_norm.biasF32[6144]
-
blk.19.ffn_up.weightF16[6144 24576]
-
blk.19.ffn_up.biasF32[24576]
-
blk.19.ffn_down.weightF16[24576 6144]
-
blk.19.ffn_down.biasF32[6144]
-
blk.20.attn_norm.weightF32[6144]
-
blk.20.attn_norm.biasF32[6144]
-
blk.20.attn_qkv.weightF16[6144 6400]
-
blk.20.attn_qkv.biasF32[6400]
-
blk.20.attn_output.weightF16[6144 6144]
-
blk.20.attn_output.biasF32[6144]
-
blk.20.ffn_norm.weightF32[6144]
-
blk.20.ffn_norm.biasF32[6144]
-
blk.20.ffn_up.weightF16[6144 24576]
-
blk.20.ffn_up.biasF32[24576]
-
blk.20.ffn_down.weightF16[24576 6144]
-
blk.20.ffn_down.biasF32[6144]
-
blk.21.attn_norm.weightF32[6144]
-
blk.21.attn_norm.biasF32[6144]
-
blk.21.attn_qkv.weightF16[6144 6400]
-
blk.21.attn_qkv.biasF32[6400]
-
blk.21.attn_output.weightF16[6144 6144]
-
blk.21.attn_output.biasF32[6144]
-
blk.21.ffn_norm.weightF32[6144]
-
blk.21.ffn_norm.biasF32[6144]
-
blk.21.ffn_up.weightF16[6144 24576]
-
blk.21.ffn_up.biasF32[24576]
-
blk.21.ffn_down.weightF16[24576 6144]
-
blk.21.ffn_down.biasF32[6144]
-
blk.22.attn_norm.weightF32[6144]
-
blk.22.attn_norm.biasF32[6144]
-
blk.22.attn_qkv.weightF16[6144 6400]
-
blk.22.attn_qkv.biasF32[6400]
-
blk.22.attn_output.weightF16[6144 6144]
-
blk.22.attn_output.biasF32[6144]
-
blk.22.ffn_norm.weightF32[6144]
-
blk.22.ffn_norm.biasF32[6144]
-
blk.22.ffn_up.weightF16[6144 24576]
-
blk.22.ffn_up.biasF32[24576]
-
blk.22.ffn_down.weightF16[24576 6144]
-
blk.22.ffn_down.biasF32[6144]
-
blk.23.attn_norm.weightF32[6144]
-
blk.23.attn_norm.biasF32[6144]
-
blk.23.attn_qkv.weightF16[6144 6400]
-
blk.23.attn_qkv.biasF32[6400]
-
blk.23.attn_output.weightF16[6144 6144]
-
blk.23.attn_output.biasF32[6144]
-
blk.23.ffn_norm.weightF32[6144]
-
blk.23.ffn_norm.biasF32[6144]
-
blk.23.ffn_up.weightF16[6144 24576]
-
blk.23.ffn_up.biasF32[24576]
-
blk.23.ffn_down.weightF16[24576 6144]
-
blk.23.ffn_down.biasF32[6144]
-
blk.24.attn_norm.weightF32[6144]
-
blk.24.attn_norm.biasF32[6144]
-
blk.24.attn_qkv.weightF16[6144 6400]
-
blk.24.attn_qkv.biasF32[6400]
-
blk.24.attn_output.weightF16[6144 6144]
-
blk.24.attn_output.biasF32[6144]
-
blk.24.ffn_norm.weightF32[6144]
-
blk.24.ffn_norm.biasF32[6144]
-
blk.24.ffn_up.weightF16[6144 24576]
-
blk.24.ffn_up.biasF32[24576]
-
blk.24.ffn_down.weightF16[24576 6144]
-
blk.24.ffn_down.biasF32[6144]
-
blk.25.attn_norm.weightF32[6144]
-
blk.25.attn_norm.biasF32[6144]
-
blk.25.attn_qkv.weightF16[6144 6400]
-
blk.25.attn_qkv.biasF32[6400]
-
blk.25.attn_output.weightF16[6144 6144]
-
blk.25.attn_output.biasF32[6144]
-
blk.25.ffn_norm.weightF32[6144]
-
blk.25.ffn_norm.biasF32[6144]
-
blk.25.ffn_up.weightF16[6144 24576]
-
blk.25.ffn_up.biasF32[24576]
-
blk.25.ffn_down.weightF16[24576 6144]
-
blk.25.ffn_down.biasF32[6144]
-
blk.26.attn_norm.weightF32[6144]
-
blk.26.attn_norm.biasF32[6144]
-
blk.26.attn_qkv.weightF16[6144 6400]
-
blk.26.attn_qkv.biasF32[6400]
-
blk.26.attn_output.weightF16[6144 6144]
-
blk.26.attn_output.biasF32[6144]
-
blk.26.ffn_norm.weightF32[6144]
-
blk.26.ffn_norm.biasF32[6144]
-
blk.26.ffn_up.weightF16[6144 24576]
-
blk.26.ffn_up.biasF32[24576]
-
blk.26.ffn_down.weightF16[24576 6144]
-
blk.26.ffn_down.biasF32[6144]
-
blk.27.attn_norm.weightF32[6144]
-
blk.27.attn_norm.biasF32[6144]
-
blk.27.attn_qkv.weightF16[6144 6400]
-
blk.27.attn_qkv.biasF32[6400]
-
blk.27.attn_output.weightF16[6144 6144]
-
blk.27.attn_output.biasF32[6144]
-
blk.27.ffn_norm.weightF32[6144]
-
blk.27.ffn_norm.biasF32[6144]
-
blk.27.ffn_up.weightF16[6144 24576]
-
blk.27.ffn_up.biasF32[24576]
-
blk.27.ffn_down.weightF16[24576 6144]
-
blk.27.ffn_down.biasF32[6144]
-
blk.28.attn_norm.weightF32[6144]
-
blk.28.attn_norm.biasF32[6144]
-
blk.28.attn_qkv.weightF16[6144 6400]
-
blk.28.attn_qkv.biasF32[6400]
-
blk.28.attn_output.weightF16[6144 6144]
-
blk.28.attn_output.biasF32[6144]
-
blk.28.ffn_norm.weightF32[6144]
-
blk.28.ffn_norm.biasF32[6144]
-
blk.28.ffn_up.weightF16[6144 24576]
-
blk.28.ffn_up.biasF32[24576]
-
blk.28.ffn_down.weightF16[24576 6144]
-
blk.28.ffn_down.biasF32[6144]
-
blk.29.attn_norm.weightF32[6144]
-
blk.29.attn_norm.biasF32[6144]
-
blk.29.attn_qkv.weightF16[6144 6400]
-
blk.29.attn_qkv.biasF32[6400]
-
blk.29.attn_output.weightF16[6144 6144]
-
blk.29.attn_output.biasF32[6144]
-
blk.29.ffn_norm.weightF32[6144]
-
blk.29.ffn_norm.biasF32[6144]
-
blk.29.ffn_up.weightF16[6144 24576]
-
blk.29.ffn_up.biasF32[24576]
-
blk.29.ffn_down.weightF16[24576 6144]
-
blk.29.ffn_down.biasF32[6144]
-
blk.30.attn_norm.weightF32[6144]
-
blk.30.attn_norm.biasF32[6144]
-
blk.30.attn_qkv.weightF16[6144 6400]
-
blk.30.attn_qkv.biasF32[6400]
-
blk.30.attn_output.weightF16[6144 6144]
-
blk.30.attn_output.biasF32[6144]
-
blk.30.ffn_norm.weightF32[6144]
-
blk.30.ffn_norm.biasF32[6144]
-
blk.30.ffn_up.weightF16[6144 24576]
-
blk.30.ffn_up.biasF32[24576]
-
blk.30.ffn_down.weightF16[24576 6144]
-
blk.30.ffn_down.biasF32[6144]
-
blk.31.attn_norm.weightF32[6144]
-
blk.31.attn_norm.biasF32[6144]
-
blk.31.attn_qkv.weightF16[6144 6400]
-
blk.31.attn_qkv.biasF32[6400]
-
blk.31.attn_output.weightF16[6144 6144]
-
blk.31.attn_output.biasF32[6144]
-
blk.31.ffn_norm.weightF32[6144]
-
blk.31.ffn_norm.biasF32[6144]
-
blk.31.ffn_up.weightF16[6144 24576]
-
blk.31.ffn_up.biasF32[24576]
-
blk.31.ffn_down.weightF16[24576 6144]
-
blk.31.ffn_down.biasF32[6144]
-
blk.32.attn_norm.weightF32[6144]
-
blk.32.attn_norm.biasF32[6144]
-
blk.32.attn_qkv.weightF16[6144 6400]
-
blk.32.attn_qkv.biasF32[6400]
-
blk.32.attn_output.weightF16[6144 6144]
-
blk.32.attn_output.biasF32[6144]
-
blk.32.ffn_norm.weightF32[6144]
-
blk.32.ffn_norm.biasF32[6144]
-
blk.32.ffn_up.weightF16[6144 24576]
-
blk.32.ffn_up.biasF32[24576]
-
blk.32.ffn_down.weightF16[24576 6144]
-
blk.32.ffn_down.biasF32[6144]
-
blk.33.attn_norm.weightF32[6144]
-
blk.33.attn_norm.biasF32[6144]
-
blk.33.attn_qkv.weightF16[6144 6400]
-
blk.33.attn_qkv.biasF32[6400]
-
blk.33.attn_output.weightF16[6144 6144]
-
blk.33.attn_output.biasF32[6144]
-
blk.33.ffn_norm.weightF32[6144]
-
blk.33.ffn_norm.biasF32[6144]
-
blk.33.ffn_up.weightF16[6144 24576]
-
blk.33.ffn_up.biasF32[24576]
-
blk.33.ffn_down.weightF16[24576 6144]
-
blk.33.ffn_down.biasF32[6144]
-
blk.34.attn_norm.weightF32[6144]
-
blk.34.attn_norm.biasF32[6144]
-
blk.34.attn_qkv.weightF16[6144 6400]
-
blk.34.attn_qkv.biasF32[6400]
-
blk.34.attn_output.weightF16[6144 6144]
-
blk.34.attn_output.biasF32[6144]
-
blk.34.ffn_norm.weightF32[6144]
-
blk.34.ffn_norm.biasF32[6144]
-
blk.34.ffn_up.weightF16[6144 24576]
-
blk.34.ffn_up.biasF32[24576]
-
blk.34.ffn_down.weightF16[24576 6144]
-
blk.34.ffn_down.biasF32[6144]
-
blk.35.attn_norm.weightF32[6144]
-
blk.35.attn_norm.biasF32[6144]
-
blk.35.attn_qkv.weightF16[6144 6400]
-
blk.35.attn_qkv.biasF32[6400]
-
blk.35.attn_output.weightF16[6144 6144]
-
blk.35.attn_output.biasF32[6144]
-
blk.35.ffn_norm.weightF32[6144]
-
blk.35.ffn_norm.biasF32[6144]
-
blk.35.ffn_up.weightF16[6144 24576]
-
blk.35.ffn_up.biasF32[24576]
-
blk.35.ffn_down.weightF16[24576 6144]
-
blk.35.ffn_down.biasF32[6144]
-
blk.36.attn_norm.weightF32[6144]
-
blk.36.attn_norm.biasF32[6144]
-
blk.36.attn_qkv.weightF16[6144 6400]
-
blk.36.attn_qkv.biasF32[6400]
-
blk.36.attn_output.weightF16[6144 6144]
-
blk.36.attn_output.biasF32[6144]
-
blk.36.ffn_norm.weightF32[6144]
-
blk.36.ffn_norm.biasF32[6144]
-
blk.36.ffn_up.weightF16[6144 24576]
-
blk.36.ffn_up.biasF32[24576]
-
blk.36.ffn_down.weightF16[24576 6144]
-
blk.36.ffn_down.biasF32[6144]
-
blk.37.attn_norm.weightF32[6144]
-
blk.37.attn_norm.biasF32[6144]
-
blk.37.attn_qkv.weightF16[6144 6400]
-
blk.37.attn_qkv.biasF32[6400]
-
blk.37.attn_output.weightF16[6144 6144]
-
blk.37.attn_output.biasF32[6144]
-
blk.37.ffn_norm.weightF32[6144]
-
blk.37.ffn_norm.biasF32[6144]
-
blk.37.ffn_up.weightF16[6144 24576]
-
blk.37.ffn_up.biasF32[24576]
-
blk.37.ffn_down.weightF16[24576 6144]
-
blk.37.ffn_down.biasF32[6144]
-
blk.38.attn_norm.weightF32[6144]
-
blk.38.attn_norm.biasF32[6144]
-
blk.38.attn_qkv.weightF16[6144 6400]
-
blk.38.attn_qkv.biasF32[6400]
-
blk.38.attn_output.weightF16[6144 6144]
-
blk.38.attn_output.biasF32[6144]
-
blk.38.ffn_norm.weightF32[6144]
-
blk.38.ffn_norm.biasF32[6144]
-
blk.38.ffn_up.weightF16[6144 24576]
-
blk.38.ffn_up.biasF32[24576]
-
blk.38.ffn_down.weightF16[24576 6144]
-
blk.38.ffn_down.biasF32[6144]
-
blk.39.attn_norm.weightF32[6144]
-
blk.39.attn_norm.biasF32[6144]
-
blk.39.attn_qkv.weightF16[6144 6400]
-
blk.39.attn_qkv.biasF32[6400]
-
blk.39.attn_output.weightF16[6144 6144]
-
blk.39.attn_output.biasF32[6144]
-
blk.39.ffn_norm.weightF32[6144]
-
blk.39.ffn_norm.biasF32[6144]
-
blk.39.ffn_up.weightF16[6144 24576]
-
blk.39.ffn_up.biasF32[24576]
-
blk.39.ffn_down.weightF16[24576 6144]
-
blk.39.ffn_down.biasF32[6144]
-
output_norm.weightF32[6144]
-
output_norm.biasF32[6144]
-
output.weightF16[6144 49156]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39