Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
ShreyanGondaliya
/
s5-reduced
:latest
63
Downloads
Updated
2 months ago
reduced s5 default context to 4096 tokens to allow better local inference thinks in chinese somtimes??
reduced s5 default context to 4096 tokens to allow better local inference thinks in chinese somtimes??
Cancel
s5-reduced:latest
...
/
model
39f5fb7fdbfc · 7.1GB
Metadata
general.architecture
glm4
glm4
general.file_type
Q5_K_M
Q5_K_M
glm4.attention.head_count
32
32
glm4.attention.head_count_kv
2
2
glm4.attention.layer_norm_rms_epsilon
1e-05
1e-05
glm4.block_count
40
40
glm4.context_length
131072
131072
glm4.embedding_length
4096
4096
glm4.feed_forward_length
13696
13696
glm4.rope.dimension_count
64
64
glm4.rope.dimension_sections
[8, 12, 12, 0]
[8, 12, 12, 0]
glm4.rope.freq_base
500000
500000
tokenizer.ggml.bos_token_id
151329
151329
tokenizer.ggml.eos_token_id
151329
151329
tokenizer.ggml.eot_token_id
151336
151336
tokenizer.ggml.merges
[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
151329
151329
tokenizer.ggml.pre
glm4
glm4
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
tokenizer.ggml.unknown_token_id
151329
151329
Tensor
Name
Type
Shape
token_embd.weight
Q5_K
Q5_K
[4096, 151552]
blk.0
blk.0.attn_k.bias
F32
F32
[256]
blk.0.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.0.attn_norm.weight
F32
F32
[4096]
blk.0.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.0.attn_q.bias
F32
F32
[4096]
blk.0.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.0.attn_v.bias
F32
F32
[256]
blk.0.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.0.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.0.ffn_norm.weight
F32
F32
[4096]
blk.0.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.0.post_attention_norm.weight
F32
F32
[4096]
blk.0.post_ffw_norm.weight
F32
F32
[4096]
blk.1
blk.1.attn_k.bias
F32
F32
[256]
blk.1.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.1.attn_norm.weight
F32
F32
[4096]
blk.1.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.1.attn_q.bias
F32
F32
[4096]
blk.1.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.1.attn_v.bias
F32
F32
[256]
blk.1.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.1.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.1.ffn_norm.weight
F32
F32
[4096]
blk.1.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.1.post_attention_norm.weight
F32
F32
[4096]
blk.1.post_ffw_norm.weight
F32
F32
[4096]
blk.2
blk.2.attn_k.bias
F32
F32
[256]
blk.2.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.2.attn_norm.weight
F32
F32
[4096]
blk.2.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.2.attn_q.bias
F32
F32
[4096]
blk.2.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.2.attn_v.bias
F32
F32
[256]
blk.2.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.2.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.2.ffn_norm.weight
F32
F32
[4096]
blk.2.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.2.post_attention_norm.weight
F32
F32
[4096]
blk.2.post_ffw_norm.weight
F32
F32
[4096]
blk.3
blk.3.attn_k.bias
F32
F32
[256]
blk.3.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.3.attn_norm.weight
F32
F32
[4096]
blk.3.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.3.attn_q.bias
F32
F32
[4096]
blk.3.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.3.attn_v.bias
F32
F32
[256]
blk.3.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.3.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.3.ffn_norm.weight
F32
F32
[4096]
blk.3.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.3.post_attention_norm.weight
F32
F32
[4096]
blk.3.post_ffw_norm.weight
F32
F32
[4096]
blk.4
blk.4.attn_k.bias
F32
F32
[256]
blk.4.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.4.attn_norm.weight
F32
F32
[4096]
blk.4.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.4.attn_q.bias
F32
F32
[4096]
blk.4.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.4.attn_v.bias
F32
F32
[256]
blk.4.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.4.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.4.ffn_norm.weight
F32
F32
[4096]
blk.4.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.4.post_attention_norm.weight
F32
F32
[4096]
blk.4.post_ffw_norm.weight
F32
F32
[4096]
blk.5
blk.5.attn_k.bias
F32
F32
[256]
blk.5.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.5.attn_norm.weight
F32
F32
[4096]
blk.5.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.5.attn_q.bias
F32
F32
[4096]
blk.5.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.5.attn_v.bias
F32
F32
[256]
blk.5.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.5.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.5.ffn_norm.weight
F32
F32
[4096]
blk.5.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.5.post_attention_norm.weight
F32
F32
[4096]
blk.5.post_ffw_norm.weight
F32
F32
[4096]
blk.6
blk.6.attn_k.bias
F32
F32
[256]
blk.6.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.6.attn_norm.weight
F32
F32
[4096]
blk.6.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.6.attn_q.bias
F32
F32
[4096]
blk.6.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.6.attn_v.bias
F32
F32
[256]
blk.6.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.6.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.6.ffn_norm.weight
F32
F32
[4096]
blk.6.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.6.post_attention_norm.weight
F32
F32
[4096]
blk.6.post_ffw_norm.weight
F32
F32
[4096]
blk.7
blk.7.attn_k.bias
F32
F32
[256]
blk.7.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.7.attn_norm.weight
F32
F32
[4096]
blk.7.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.7.attn_q.bias
F32
F32
[4096]
blk.7.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.7.attn_v.bias
F32
F32
[256]
blk.7.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.7.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.7.ffn_norm.weight
F32
F32
[4096]
blk.7.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.7.post_attention_norm.weight
F32
F32
[4096]
blk.7.post_ffw_norm.weight
F32
F32
[4096]
blk.8
blk.8.attn_k.bias
F32
F32
[256]
blk.8.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.8.attn_norm.weight
F32
F32
[4096]
blk.8.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.8.attn_q.bias
F32
F32
[4096]
blk.8.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.8.attn_v.bias
F32
F32
[256]
blk.8.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.8.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.8.ffn_norm.weight
F32
F32
[4096]
blk.8.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.8.post_attention_norm.weight
F32
F32
[4096]
blk.8.post_ffw_norm.weight
F32
F32
[4096]
blk.9
blk.9.attn_k.bias
F32
F32
[256]
blk.9.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.9.attn_norm.weight
F32
F32
[4096]
blk.9.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.9.attn_q.bias
F32
F32
[4096]
blk.9.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.9.attn_v.bias
F32
F32
[256]
blk.9.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.9.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.9.ffn_norm.weight
F32
F32
[4096]
blk.9.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.9.post_attention_norm.weight
F32
F32
[4096]
blk.9.post_ffw_norm.weight
F32
F32
[4096]
blk.10
blk.10.attn_k.bias
F32
F32
[256]
blk.10.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.10.attn_norm.weight
F32
F32
[4096]
blk.10.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.10.attn_q.bias
F32
F32
[4096]
blk.10.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.10.attn_v.bias
F32
F32
[256]
blk.10.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.10.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.10.ffn_norm.weight
F32
F32
[4096]
blk.10.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.10.post_attention_norm.weight
F32
F32
[4096]
blk.10.post_ffw_norm.weight
F32
F32
[4096]
blk.11
blk.11.attn_k.bias
F32
F32
[256]
blk.11.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.11.attn_norm.weight
F32
F32
[4096]
blk.11.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.11.attn_q.bias
F32
F32
[4096]
blk.11.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.11.attn_v.bias
F32
F32
[256]
blk.11.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.11.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.11.ffn_norm.weight
F32
F32
[4096]
blk.11.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.11.post_attention_norm.weight
F32
F32
[4096]
blk.11.post_ffw_norm.weight
F32
F32
[4096]
blk.12
blk.12.attn_k.bias
F32
F32
[256]
blk.12.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.12.attn_norm.weight
F32
F32
[4096]
blk.12.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.12.attn_q.bias
F32
F32
[4096]
blk.12.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.12.attn_v.bias
F32
F32
[256]
blk.12.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.12.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.12.ffn_norm.weight
F32
F32
[4096]
blk.12.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.12.post_attention_norm.weight
F32
F32
[4096]
blk.12.post_ffw_norm.weight
F32
F32
[4096]
blk.13
blk.13.attn_k.bias
F32
F32
[256]
blk.13.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.13.attn_norm.weight
F32
F32
[4096]
blk.13.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.13.attn_q.bias
F32
F32
[4096]
blk.13.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.13.attn_v.bias
F32
F32
[256]
blk.13.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.13.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.13.ffn_norm.weight
F32
F32
[4096]
blk.13.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.13.post_attention_norm.weight
F32
F32
[4096]
blk.13.post_ffw_norm.weight
F32
F32
[4096]
blk.14
blk.14.attn_k.bias
F32
F32
[256]
blk.14.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.14.attn_norm.weight
F32
F32
[4096]
blk.14.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.14.attn_q.bias
F32
F32
[4096]
blk.14.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.14.attn_v.bias
F32
F32
[256]
blk.14.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.14.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.14.ffn_norm.weight
F32
F32
[4096]
blk.14.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.14.post_attention_norm.weight
F32
F32
[4096]
blk.14.post_ffw_norm.weight
F32
F32
[4096]
blk.15
blk.15.attn_k.bias
F32
F32
[256]
blk.15.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.15.attn_norm.weight
F32
F32
[4096]
blk.15.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.15.attn_q.bias
F32
F32
[4096]
blk.15.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.15.attn_v.bias
F32
F32
[256]
blk.15.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.15.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.15.ffn_norm.weight
F32
F32
[4096]
blk.15.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.15.post_attention_norm.weight
F32
F32
[4096]
blk.15.post_ffw_norm.weight
F32
F32
[4096]
blk.16
blk.16.attn_k.bias
F32
F32
[256]
blk.16.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.16.attn_norm.weight
F32
F32
[4096]
blk.16.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.16.attn_q.bias
F32
F32
[4096]
blk.16.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.16.attn_v.bias
F32
F32
[256]
blk.16.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.16.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.16.ffn_norm.weight
F32
F32
[4096]
blk.16.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.16.post_attention_norm.weight
F32
F32
[4096]
blk.16.post_ffw_norm.weight
F32
F32
[4096]
blk.17
blk.17.attn_k.bias
F32
F32
[256]
blk.17.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.17.attn_norm.weight
F32
F32
[4096]
blk.17.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.17.attn_q.bias
F32
F32
[4096]
blk.17.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.17.attn_v.bias
F32
F32
[256]
blk.17.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.17.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.17.ffn_norm.weight
F32
F32
[4096]
blk.17.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.17.post_attention_norm.weight
F32
F32
[4096]
blk.17.post_ffw_norm.weight
F32
F32
[4096]
blk.18
blk.18.attn_k.bias
F32
F32
[256]
blk.18.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.18.attn_norm.weight
F32
F32
[4096]
blk.18.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.18.attn_q.bias
F32
F32
[4096]
blk.18.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.18.attn_v.bias
F32
F32
[256]
blk.18.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.18.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.18.ffn_norm.weight
F32
F32
[4096]
blk.18.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.18.post_attention_norm.weight
F32
F32
[4096]
blk.18.post_ffw_norm.weight
F32
F32
[4096]
blk.19
blk.19.attn_k.bias
F32
F32
[256]
blk.19.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.19.attn_norm.weight
F32
F32
[4096]
blk.19.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.19.attn_q.bias
F32
F32
[4096]
blk.19.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.19.attn_v.bias
F32
F32
[256]
blk.19.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.19.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.19.ffn_norm.weight
F32
F32
[4096]
blk.19.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.19.post_attention_norm.weight
F32
F32
[4096]
blk.19.post_ffw_norm.weight
F32
F32
[4096]
blk.20
blk.20.attn_k.bias
F32
F32
[256]
blk.20.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.20.attn_norm.weight
F32
F32
[4096]
blk.20.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.20.attn_q.bias
F32
F32
[4096]
blk.20.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.20.attn_v.bias
F32
F32
[256]
blk.20.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.20.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.20.ffn_norm.weight
F32
F32
[4096]
blk.20.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.20.post_attention_norm.weight
F32
F32
[4096]
blk.20.post_ffw_norm.weight
F32
F32
[4096]
blk.21
blk.21.attn_k.bias
F32
F32
[256]
blk.21.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.21.attn_norm.weight
F32
F32
[4096]
blk.21.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.21.attn_q.bias
F32
F32
[4096]
blk.21.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.21.attn_v.bias
F32
F32
[256]
blk.21.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.21.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.21.ffn_norm.weight
F32
F32
[4096]
blk.21.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.21.post_attention_norm.weight
F32
F32
[4096]
blk.21.post_ffw_norm.weight
F32
F32
[4096]
blk.22
blk.22.attn_k.bias
F32
F32
[256]
blk.22.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.22.attn_norm.weight
F32
F32
[4096]
blk.22.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.22.attn_q.bias
F32
F32
[4096]
blk.22.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.22.attn_v.bias
F32
F32
[256]
blk.22.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.22.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.22.ffn_norm.weight
F32
F32
[4096]
blk.22.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.22.post_attention_norm.weight
F32
F32
[4096]
blk.22.post_ffw_norm.weight
F32
F32
[4096]
blk.23
blk.23.attn_k.bias
F32
F32
[256]
blk.23.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.23.attn_norm.weight
F32
F32
[4096]
blk.23.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.23.attn_q.bias
F32
F32
[4096]
blk.23.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.23.attn_v.bias
F32
F32
[256]
blk.23.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.23.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.23.ffn_norm.weight
F32
F32
[4096]
blk.23.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.23.post_attention_norm.weight
F32
F32
[4096]
blk.23.post_ffw_norm.weight
F32
F32
[4096]
blk.24
blk.24.attn_k.bias
F32
F32
[256]
blk.24.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.24.attn_norm.weight
F32
F32
[4096]
blk.24.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.24.attn_q.bias
F32
F32
[4096]
blk.24.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.24.attn_v.bias
F32
F32
[256]
blk.24.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.24.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.24.ffn_norm.weight
F32
F32
[4096]
blk.24.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.24.post_attention_norm.weight
F32
F32
[4096]
blk.24.post_ffw_norm.weight
F32
F32
[4096]
blk.25
blk.25.attn_k.bias
F32
F32
[256]
blk.25.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.25.attn_norm.weight
F32
F32
[4096]
blk.25.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.25.attn_q.bias
F32
F32
[4096]
blk.25.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.25.attn_v.bias
F32
F32
[256]
blk.25.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.25.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.25.ffn_norm.weight
F32
F32
[4096]
blk.25.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.25.post_attention_norm.weight
F32
F32
[4096]
blk.25.post_ffw_norm.weight
F32
F32
[4096]
blk.26
blk.26.attn_k.bias
F32
F32
[256]
blk.26.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.26.attn_norm.weight
F32
F32
[4096]
blk.26.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.26.attn_q.bias
F32
F32
[4096]
blk.26.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.26.attn_v.bias
F32
F32
[256]
blk.26.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.26.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.26.ffn_norm.weight
F32
F32
[4096]
blk.26.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.26.post_attention_norm.weight
F32
F32
[4096]
blk.26.post_ffw_norm.weight
F32
F32
[4096]
blk.27
blk.27.attn_k.bias
F32
F32
[256]
blk.27.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.27.attn_norm.weight
F32
F32
[4096]
blk.27.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.27.attn_q.bias
F32
F32
[4096]
blk.27.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.27.attn_v.bias
F32
F32
[256]
blk.27.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.27.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.27.ffn_norm.weight
F32
F32
[4096]
blk.27.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.27.post_attention_norm.weight
F32
F32
[4096]
blk.27.post_ffw_norm.weight
F32
F32
[4096]
blk.28
blk.28.attn_k.bias
F32
F32
[256]
blk.28.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.28.attn_norm.weight
F32
F32
[4096]
blk.28.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.28.attn_q.bias
F32
F32
[4096]
blk.28.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.28.attn_v.bias
F32
F32
[256]
blk.28.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.28.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.28.ffn_norm.weight
F32
F32
[4096]
blk.28.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.28.post_attention_norm.weight
F32
F32
[4096]
blk.28.post_ffw_norm.weight
F32
F32
[4096]
blk.29
blk.29.attn_k.bias
F32
F32
[256]
blk.29.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.29.attn_norm.weight
F32
F32
[4096]
blk.29.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.29.attn_q.bias
F32
F32
[4096]
blk.29.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.29.attn_v.bias
F32
F32
[256]
blk.29.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.29.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.29.ffn_norm.weight
F32
F32
[4096]
blk.29.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.29.post_attention_norm.weight
F32
F32
[4096]
blk.29.post_ffw_norm.weight
F32
F32
[4096]
blk.30
blk.30.attn_k.bias
F32
F32
[256]
blk.30.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.30.attn_norm.weight
F32
F32
[4096]
blk.30.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.30.attn_q.bias
F32
F32
[4096]
blk.30.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.30.attn_v.bias
F32
F32
[256]
blk.30.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.30.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.30.ffn_norm.weight
F32
F32
[4096]
blk.30.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.30.post_attention_norm.weight
F32
F32
[4096]
blk.30.post_ffw_norm.weight
F32
F32
[4096]
blk.31
blk.31.attn_k.bias
F32
F32
[256]
blk.31.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.31.attn_norm.weight
F32
F32
[4096]
blk.31.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.31.attn_q.bias
F32
F32
[4096]
blk.31.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.31.attn_v.bias
F32
F32
[256]
blk.31.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.31.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.31.ffn_norm.weight
F32
F32
[4096]
blk.31.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.31.post_attention_norm.weight
F32
F32
[4096]
blk.31.post_ffw_norm.weight
F32
F32
[4096]
blk.32
blk.32.attn_k.bias
F32
F32
[256]
blk.32.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.32.attn_norm.weight
F32
F32
[4096]
blk.32.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.32.attn_q.bias
F32
F32
[4096]
blk.32.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.32.attn_v.bias
F32
F32
[256]
blk.32.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.32.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.32.ffn_norm.weight
F32
F32
[4096]
blk.32.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.32.post_attention_norm.weight
F32
F32
[4096]
blk.32.post_ffw_norm.weight
F32
F32
[4096]
blk.33
blk.33.attn_k.bias
F32
F32
[256]
blk.33.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.33.attn_norm.weight
F32
F32
[4096]
blk.33.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.33.attn_q.bias
F32
F32
[4096]
blk.33.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.33.attn_v.bias
F32
F32
[256]
blk.33.attn_v.weight
Q5_K
Q5_K
[4096, 256]
blk.33.ffn_down.weight
Q5_1
Q5_1
[13696, 4096]
blk.33.ffn_norm.weight
F32
F32
[4096]
blk.33.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.33.post_attention_norm.weight
F32
F32
[4096]
blk.33.post_ffw_norm.weight
F32
F32
[4096]
blk.34
blk.34.attn_k.bias
F32
F32
[256]
blk.34.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.34.attn_norm.weight
F32
F32
[4096]
blk.34.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.34.attn_q.bias
F32
F32
[4096]
blk.34.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.34.attn_v.bias
F32
F32
[256]
blk.34.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.34.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.34.ffn_norm.weight
F32
F32
[4096]
blk.34.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.34.post_attention_norm.weight
F32
F32
[4096]
blk.34.post_ffw_norm.weight
F32
F32
[4096]
blk.35
blk.35.attn_k.bias
F32
F32
[256]
blk.35.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.35.attn_norm.weight
F32
F32
[4096]
blk.35.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.35.attn_q.bias
F32
F32
[4096]
blk.35.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.35.attn_v.bias
F32
F32
[256]
blk.35.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.35.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.35.ffn_norm.weight
F32
F32
[4096]
blk.35.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.35.post_attention_norm.weight
F32
F32
[4096]
blk.35.post_ffw_norm.weight
F32
F32
[4096]
blk.36
blk.36.attn_k.bias
F32
F32
[256]
blk.36.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.36.attn_norm.weight
F32
F32
[4096]
blk.36.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.36.attn_q.bias
F32
F32
[4096]
blk.36.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.36.attn_v.bias
F32
F32
[256]
blk.36.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.36.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.36.ffn_norm.weight
F32
F32
[4096]
blk.36.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.36.post_attention_norm.weight
F32
F32
[4096]
blk.36.post_ffw_norm.weight
F32
F32
[4096]
blk.37
blk.37.attn_k.bias
F32
F32
[256]
blk.37.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.37.attn_norm.weight
F32
F32
[4096]
blk.37.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.37.attn_q.bias
F32
F32
[4096]
blk.37.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.37.attn_v.bias
F32
F32
[256]
blk.37.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.37.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.37.ffn_norm.weight
F32
F32
[4096]
blk.37.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.37.post_attention_norm.weight
F32
F32
[4096]
blk.37.post_ffw_norm.weight
F32
F32
[4096]
blk.38
blk.38.attn_k.bias
F32
F32
[256]
blk.38.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.38.attn_norm.weight
F32
F32
[4096]
blk.38.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.38.attn_q.bias
F32
F32
[4096]
blk.38.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.38.attn_v.bias
F32
F32
[256]
blk.38.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.38.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.38.ffn_norm.weight
F32
F32
[4096]
blk.38.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.38.post_attention_norm.weight
F32
F32
[4096]
blk.38.post_ffw_norm.weight
F32
F32
[4096]
blk.39
blk.39.attn_k.bias
F32
F32
[256]
blk.39.attn_k.weight
Q5_K
Q5_K
[4096, 256]
blk.39.attn_norm.weight
F32
F32
[4096]
blk.39.attn_output.weight
Q5_K
Q5_K
[4096, 4096]
blk.39.attn_q.bias
F32
F32
[4096]
blk.39.attn_q.weight
Q5_K
Q5_K
[4096, 4096]
blk.39.attn_v.bias
F32
F32
[256]
blk.39.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.39.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.39.ffn_norm.weight
F32
F32
[4096]
blk.39.ffn_up.weight
Q5_K
Q5_K
[4096, 27392]
blk.39.post_attention_norm.weight
F32
F32
[4096]
blk.39.post_ffw_norm.weight
F32
F32
[4096]
output.weight
Q6_K
Q6_K
[4096, 151552]
output_norm.weight
F32
F32
[4096]