Models
GitHub
Discord
Docs
Pricing
Sign in
Download
Models
Download
GitHub
Discord
Docs
Pricing
Sign in
haervwe
/
GLM-4.6V-Flash-9B
:latest
754
Downloads
Updated
1 month ago
GLM 4.6V Flash 9B model with vision, tools, and hybrid thinking enabled. using custom template to align it to ollama and the recomended sampling settigns by default. using unsloth quants at q4K_M
GLM 4.6V Flash 9B model with vision, tools, and hybrid thinking enabled. using custom template to align it to ollama and the recomended sampling settigns by default. using unsloth quants at q4K_M
Cancel
vision
tools
thinking
GLM-4.6V-Flash-9B:latest
...
/
model
4f06cac67c0a · 6.2GB
Metadata
general.architecture
glm4
glm4
general.file_type
Q4_K_M
Q4_K_M
glm4.attention.head_count
32
32
glm4.attention.head_count_kv
2
2
glm4.attention.layer_norm_rms_epsilon
1e-05
1e-05
glm4.block_count
40
40
glm4.context_length
131072
131072
glm4.embedding_length
4096
4096
glm4.feed_forward_length
13696
13696
glm4.rope.dimension_count
64
64
glm4.rope.dimension_sections
[8, 12, 12, 0]
[8, 12, 12, 0]
glm4.rope.freq_base
500000
500000
tokenizer.ggml.bos_token_id
151329
151329
tokenizer.ggml.eos_token_id
151329
151329
tokenizer.ggml.eot_token_id
151336
151336
tokenizer.ggml.merges
[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
[Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
151330
151330
tokenizer.ggml.pre
glm4
glm4
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
tokenizer.ggml.unknown_token_id
151329
151329
quantize.imatrix.chunks_count
90
90
quantize.imatrix.dataset
unsloth_calibration_GLM-4.6V-Flash.txt
unsloth_calibration_GLM-4.6V-Flash.txt
quantize.imatrix.entries_count
240
240
quantize.imatrix.file
GLM-4.6V-Flash-GGUF/imatrix_unsloth.gguf
GLM-4.6V-Flash-GGUF/imatrix_unsloth.gguf
Tensor
Name
Type
Shape
token_embd.weight
Q4_K
Q4_K
[4096, 151552]
blk.0
blk.0.attn_k.bias
F32
F32
[256]
blk.0.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.0.attn_norm.weight
F32
F32
[4096]
blk.0.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.0.attn_q.bias
F32
F32
[4096]
blk.0.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.0.attn_v.bias
F32
F32
[256]
blk.0.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.0.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.0.ffn_norm.weight
F32
F32
[4096]
blk.0.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.0.post_attention_norm.weight
F32
F32
[4096]
blk.0.post_ffw_norm.weight
F32
F32
[4096]
blk.1
blk.1.attn_k.bias
F32
F32
[256]
blk.1.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.1.attn_norm.weight
F32
F32
[4096]
blk.1.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.1.attn_q.bias
F32
F32
[4096]
blk.1.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.1.attn_v.bias
F32
F32
[256]
blk.1.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.1.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.1.ffn_norm.weight
F32
F32
[4096]
blk.1.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.1.post_attention_norm.weight
F32
F32
[4096]
blk.1.post_ffw_norm.weight
F32
F32
[4096]
blk.2
blk.2.attn_k.bias
F32
F32
[256]
blk.2.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.2.attn_norm.weight
F32
F32
[4096]
blk.2.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.2.attn_q.bias
F32
F32
[4096]
blk.2.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.2.attn_v.bias
F32
F32
[256]
blk.2.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.2.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.2.ffn_norm.weight
F32
F32
[4096]
blk.2.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.2.post_attention_norm.weight
F32
F32
[4096]
blk.2.post_ffw_norm.weight
F32
F32
[4096]
blk.3
blk.3.attn_k.bias
F32
F32
[256]
blk.3.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.3.attn_norm.weight
F32
F32
[4096]
blk.3.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.3.attn_q.bias
F32
F32
[4096]
blk.3.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.3.attn_v.bias
F32
F32
[256]
blk.3.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.3.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.3.ffn_norm.weight
F32
F32
[4096]
blk.3.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.3.post_attention_norm.weight
F32
F32
[4096]
blk.3.post_ffw_norm.weight
F32
F32
[4096]
blk.4
blk.4.attn_k.bias
F32
F32
[256]
blk.4.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.4.attn_norm.weight
F32
F32
[4096]
blk.4.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.4.attn_q.bias
F32
F32
[4096]
blk.4.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.4.attn_v.bias
F32
F32
[256]
blk.4.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.4.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.4.ffn_norm.weight
F32
F32
[4096]
blk.4.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.4.post_attention_norm.weight
F32
F32
[4096]
blk.4.post_ffw_norm.weight
F32
F32
[4096]
blk.5
blk.5.attn_k.bias
F32
F32
[256]
blk.5.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.5.attn_norm.weight
F32
F32
[4096]
blk.5.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.5.attn_q.bias
F32
F32
[4096]
blk.5.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.5.attn_v.bias
F32
F32
[256]
blk.5.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.5.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.5.ffn_norm.weight
F32
F32
[4096]
blk.5.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.5.post_attention_norm.weight
F32
F32
[4096]
blk.5.post_ffw_norm.weight
F32
F32
[4096]
blk.6
blk.6.attn_k.bias
F32
F32
[256]
blk.6.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.6.attn_norm.weight
F32
F32
[4096]
blk.6.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.6.attn_q.bias
F32
F32
[4096]
blk.6.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.6.attn_v.bias
F32
F32
[256]
blk.6.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.6.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.6.ffn_norm.weight
F32
F32
[4096]
blk.6.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.6.post_attention_norm.weight
F32
F32
[4096]
blk.6.post_ffw_norm.weight
F32
F32
[4096]
blk.7
blk.7.attn_k.bias
F32
F32
[256]
blk.7.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.7.attn_norm.weight
F32
F32
[4096]
blk.7.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.7.attn_q.bias
F32
F32
[4096]
blk.7.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.7.attn_v.bias
F32
F32
[256]
blk.7.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.7.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.7.ffn_norm.weight
F32
F32
[4096]
blk.7.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.7.post_attention_norm.weight
F32
F32
[4096]
blk.7.post_ffw_norm.weight
F32
F32
[4096]
blk.8
blk.8.attn_k.bias
F32
F32
[256]
blk.8.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.8.attn_norm.weight
F32
F32
[4096]
blk.8.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.8.attn_q.bias
F32
F32
[4096]
blk.8.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.8.attn_v.bias
F32
F32
[256]
blk.8.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.8.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.8.ffn_norm.weight
F32
F32
[4096]
blk.8.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.8.post_attention_norm.weight
F32
F32
[4096]
blk.8.post_ffw_norm.weight
F32
F32
[4096]
blk.9
blk.9.attn_k.bias
F32
F32
[256]
blk.9.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.9.attn_norm.weight
F32
F32
[4096]
blk.9.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.9.attn_q.bias
F32
F32
[4096]
blk.9.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.9.attn_v.bias
F32
F32
[256]
blk.9.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.9.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.9.ffn_norm.weight
F32
F32
[4096]
blk.9.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.9.post_attention_norm.weight
F32
F32
[4096]
blk.9.post_ffw_norm.weight
F32
F32
[4096]
blk.10
blk.10.attn_k.bias
F32
F32
[256]
blk.10.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.10.attn_norm.weight
F32
F32
[4096]
blk.10.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.10.attn_q.bias
F32
F32
[4096]
blk.10.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.10.attn_v.bias
F32
F32
[256]
blk.10.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.10.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.10.ffn_norm.weight
F32
F32
[4096]
blk.10.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.10.post_attention_norm.weight
F32
F32
[4096]
blk.10.post_ffw_norm.weight
F32
F32
[4096]
blk.11
blk.11.attn_k.bias
F32
F32
[256]
blk.11.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.11.attn_norm.weight
F32
F32
[4096]
blk.11.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.11.attn_q.bias
F32
F32
[4096]
blk.11.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.11.attn_v.bias
F32
F32
[256]
blk.11.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.11.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.11.ffn_norm.weight
F32
F32
[4096]
blk.11.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.11.post_attention_norm.weight
F32
F32
[4096]
blk.11.post_ffw_norm.weight
F32
F32
[4096]
blk.12
blk.12.attn_k.bias
F32
F32
[256]
blk.12.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.12.attn_norm.weight
F32
F32
[4096]
blk.12.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.12.attn_q.bias
F32
F32
[4096]
blk.12.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.12.attn_v.bias
F32
F32
[256]
blk.12.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.12.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.12.ffn_norm.weight
F32
F32
[4096]
blk.12.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.12.post_attention_norm.weight
F32
F32
[4096]
blk.12.post_ffw_norm.weight
F32
F32
[4096]
blk.13
blk.13.attn_k.bias
F32
F32
[256]
blk.13.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.13.attn_norm.weight
F32
F32
[4096]
blk.13.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.13.attn_q.bias
F32
F32
[4096]
blk.13.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.13.attn_v.bias
F32
F32
[256]
blk.13.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.13.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.13.ffn_norm.weight
F32
F32
[4096]
blk.13.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.13.post_attention_norm.weight
F32
F32
[4096]
blk.13.post_ffw_norm.weight
F32
F32
[4096]
blk.14
blk.14.attn_k.bias
F32
F32
[256]
blk.14.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.14.attn_norm.weight
F32
F32
[4096]
blk.14.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.14.attn_q.bias
F32
F32
[4096]
blk.14.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.14.attn_v.bias
F32
F32
[256]
blk.14.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.14.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.14.ffn_norm.weight
F32
F32
[4096]
blk.14.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.14.post_attention_norm.weight
F32
F32
[4096]
blk.14.post_ffw_norm.weight
F32
F32
[4096]
blk.15
blk.15.attn_k.bias
F32
F32
[256]
blk.15.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.15.attn_norm.weight
F32
F32
[4096]
blk.15.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.15.attn_q.bias
F32
F32
[4096]
blk.15.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.15.attn_v.bias
F32
F32
[256]
blk.15.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.15.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.15.ffn_norm.weight
F32
F32
[4096]
blk.15.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.15.post_attention_norm.weight
F32
F32
[4096]
blk.15.post_ffw_norm.weight
F32
F32
[4096]
blk.16
blk.16.attn_k.bias
F32
F32
[256]
blk.16.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.16.attn_norm.weight
F32
F32
[4096]
blk.16.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.16.attn_q.bias
F32
F32
[4096]
blk.16.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.16.attn_v.bias
F32
F32
[256]
blk.16.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.16.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.16.ffn_norm.weight
F32
F32
[4096]
blk.16.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.16.post_attention_norm.weight
F32
F32
[4096]
blk.16.post_ffw_norm.weight
F32
F32
[4096]
blk.17
blk.17.attn_k.bias
F32
F32
[256]
blk.17.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.17.attn_norm.weight
F32
F32
[4096]
blk.17.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.17.attn_q.bias
F32
F32
[4096]
blk.17.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.17.attn_v.bias
F32
F32
[256]
blk.17.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.17.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.17.ffn_norm.weight
F32
F32
[4096]
blk.17.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.17.post_attention_norm.weight
F32
F32
[4096]
blk.17.post_ffw_norm.weight
F32
F32
[4096]
blk.18
blk.18.attn_k.bias
F32
F32
[256]
blk.18.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.18.attn_norm.weight
F32
F32
[4096]
blk.18.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.18.attn_q.bias
F32
F32
[4096]
blk.18.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.18.attn_v.bias
F32
F32
[256]
blk.18.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.18.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.18.ffn_norm.weight
F32
F32
[4096]
blk.18.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.18.post_attention_norm.weight
F32
F32
[4096]
blk.18.post_ffw_norm.weight
F32
F32
[4096]
blk.19
blk.19.attn_k.bias
F32
F32
[256]
blk.19.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.19.attn_norm.weight
F32
F32
[4096]
blk.19.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.19.attn_q.bias
F32
F32
[4096]
blk.19.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.19.attn_v.bias
F32
F32
[256]
blk.19.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.19.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.19.ffn_norm.weight
F32
F32
[4096]
blk.19.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.19.post_attention_norm.weight
F32
F32
[4096]
blk.19.post_ffw_norm.weight
F32
F32
[4096]
blk.20
blk.20.attn_k.bias
F32
F32
[256]
blk.20.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.20.attn_norm.weight
F32
F32
[4096]
blk.20.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.20.attn_q.bias
F32
F32
[4096]
blk.20.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.20.attn_v.bias
F32
F32
[256]
blk.20.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.20.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.20.ffn_norm.weight
F32
F32
[4096]
blk.20.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.20.post_attention_norm.weight
F32
F32
[4096]
blk.20.post_ffw_norm.weight
F32
F32
[4096]
blk.21
blk.21.attn_k.bias
F32
F32
[256]
blk.21.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.21.attn_norm.weight
F32
F32
[4096]
blk.21.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.21.attn_q.bias
F32
F32
[4096]
blk.21.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.21.attn_v.bias
F32
F32
[256]
blk.21.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.21.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.21.ffn_norm.weight
F32
F32
[4096]
blk.21.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.21.post_attention_norm.weight
F32
F32
[4096]
blk.21.post_ffw_norm.weight
F32
F32
[4096]
blk.22
blk.22.attn_k.bias
F32
F32
[256]
blk.22.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.22.attn_norm.weight
F32
F32
[4096]
blk.22.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.22.attn_q.bias
F32
F32
[4096]
blk.22.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.22.attn_v.bias
F32
F32
[256]
blk.22.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.22.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.22.ffn_norm.weight
F32
F32
[4096]
blk.22.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.22.post_attention_norm.weight
F32
F32
[4096]
blk.22.post_ffw_norm.weight
F32
F32
[4096]
blk.23
blk.23.attn_k.bias
F32
F32
[256]
blk.23.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.23.attn_norm.weight
F32
F32
[4096]
blk.23.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.23.attn_q.bias
F32
F32
[4096]
blk.23.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.23.attn_v.bias
F32
F32
[256]
blk.23.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.23.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.23.ffn_norm.weight
F32
F32
[4096]
blk.23.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.23.post_attention_norm.weight
F32
F32
[4096]
blk.23.post_ffw_norm.weight
F32
F32
[4096]
blk.24
blk.24.attn_k.bias
F32
F32
[256]
blk.24.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.24.attn_norm.weight
F32
F32
[4096]
blk.24.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.24.attn_q.bias
F32
F32
[4096]
blk.24.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.24.attn_v.bias
F32
F32
[256]
blk.24.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.24.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.24.ffn_norm.weight
F32
F32
[4096]
blk.24.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.24.post_attention_norm.weight
F32
F32
[4096]
blk.24.post_ffw_norm.weight
F32
F32
[4096]
blk.25
blk.25.attn_k.bias
F32
F32
[256]
blk.25.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.25.attn_norm.weight
F32
F32
[4096]
blk.25.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.25.attn_q.bias
F32
F32
[4096]
blk.25.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.25.attn_v.bias
F32
F32
[256]
blk.25.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.25.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.25.ffn_norm.weight
F32
F32
[4096]
blk.25.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.25.post_attention_norm.weight
F32
F32
[4096]
blk.25.post_ffw_norm.weight
F32
F32
[4096]
blk.26
blk.26.attn_k.bias
F32
F32
[256]
blk.26.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.26.attn_norm.weight
F32
F32
[4096]
blk.26.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.26.attn_q.bias
F32
F32
[4096]
blk.26.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.26.attn_v.bias
F32
F32
[256]
blk.26.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.26.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.26.ffn_norm.weight
F32
F32
[4096]
blk.26.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.26.post_attention_norm.weight
F32
F32
[4096]
blk.26.post_ffw_norm.weight
F32
F32
[4096]
blk.27
blk.27.attn_k.bias
F32
F32
[256]
blk.27.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.27.attn_norm.weight
F32
F32
[4096]
blk.27.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.27.attn_q.bias
F32
F32
[4096]
blk.27.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.27.attn_v.bias
F32
F32
[256]
blk.27.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.27.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.27.ffn_norm.weight
F32
F32
[4096]
blk.27.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.27.post_attention_norm.weight
F32
F32
[4096]
blk.27.post_ffw_norm.weight
F32
F32
[4096]
blk.28
blk.28.attn_k.bias
F32
F32
[256]
blk.28.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.28.attn_norm.weight
F32
F32
[4096]
blk.28.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.28.attn_q.bias
F32
F32
[4096]
blk.28.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.28.attn_v.bias
F32
F32
[256]
blk.28.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.28.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.28.ffn_norm.weight
F32
F32
[4096]
blk.28.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.28.post_attention_norm.weight
F32
F32
[4096]
blk.28.post_ffw_norm.weight
F32
F32
[4096]
blk.29
blk.29.attn_k.bias
F32
F32
[256]
blk.29.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.29.attn_norm.weight
F32
F32
[4096]
blk.29.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.29.attn_q.bias
F32
F32
[4096]
blk.29.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.29.attn_v.bias
F32
F32
[256]
blk.29.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.29.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.29.ffn_norm.weight
F32
F32
[4096]
blk.29.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.29.post_attention_norm.weight
F32
F32
[4096]
blk.29.post_ffw_norm.weight
F32
F32
[4096]
blk.30
blk.30.attn_k.bias
F32
F32
[256]
blk.30.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.30.attn_norm.weight
F32
F32
[4096]
blk.30.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.30.attn_q.bias
F32
F32
[4096]
blk.30.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.30.attn_v.bias
F32
F32
[256]
blk.30.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.30.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.30.ffn_norm.weight
F32
F32
[4096]
blk.30.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.30.post_attention_norm.weight
F32
F32
[4096]
blk.30.post_ffw_norm.weight
F32
F32
[4096]
blk.31
blk.31.attn_k.bias
F32
F32
[256]
blk.31.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.31.attn_norm.weight
F32
F32
[4096]
blk.31.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.31.attn_q.bias
F32
F32
[4096]
blk.31.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.31.attn_v.bias
F32
F32
[256]
blk.31.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.31.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.31.ffn_norm.weight
F32
F32
[4096]
blk.31.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.31.post_attention_norm.weight
F32
F32
[4096]
blk.31.post_ffw_norm.weight
F32
F32
[4096]
blk.32
blk.32.attn_k.bias
F32
F32
[256]
blk.32.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.32.attn_norm.weight
F32
F32
[4096]
blk.32.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.32.attn_q.bias
F32
F32
[4096]
blk.32.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.32.attn_v.bias
F32
F32
[256]
blk.32.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.32.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.32.ffn_norm.weight
F32
F32
[4096]
blk.32.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.32.post_attention_norm.weight
F32
F32
[4096]
blk.32.post_ffw_norm.weight
F32
F32
[4096]
blk.33
blk.33.attn_k.bias
F32
F32
[256]
blk.33.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.33.attn_norm.weight
F32
F32
[4096]
blk.33.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.33.attn_q.bias
F32
F32
[4096]
blk.33.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.33.attn_v.bias
F32
F32
[256]
blk.33.attn_v.weight
Q4_K
Q4_K
[4096, 256]
blk.33.ffn_down.weight
Q5_0
Q5_0
[13696, 4096]
blk.33.ffn_norm.weight
F32
F32
[4096]
blk.33.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.33.post_attention_norm.weight
F32
F32
[4096]
blk.33.post_ffw_norm.weight
F32
F32
[4096]
blk.34
blk.34.attn_k.bias
F32
F32
[256]
blk.34.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.34.attn_norm.weight
F32
F32
[4096]
blk.34.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.34.attn_q.bias
F32
F32
[4096]
blk.34.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.34.attn_v.bias
F32
F32
[256]
blk.34.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.34.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.34.ffn_norm.weight
F32
F32
[4096]
blk.34.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.34.post_attention_norm.weight
F32
F32
[4096]
blk.34.post_ffw_norm.weight
F32
F32
[4096]
blk.35
blk.35.attn_k.bias
F32
F32
[256]
blk.35.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.35.attn_norm.weight
F32
F32
[4096]
blk.35.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.35.attn_q.bias
F32
F32
[4096]
blk.35.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.35.attn_v.bias
F32
F32
[256]
blk.35.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.35.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.35.ffn_norm.weight
F32
F32
[4096]
blk.35.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.35.post_attention_norm.weight
F32
F32
[4096]
blk.35.post_ffw_norm.weight
F32
F32
[4096]
blk.36
blk.36.attn_k.bias
F32
F32
[256]
blk.36.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.36.attn_norm.weight
F32
F32
[4096]
blk.36.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.36.attn_q.bias
F32
F32
[4096]
blk.36.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.36.attn_v.bias
F32
F32
[256]
blk.36.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.36.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.36.ffn_norm.weight
F32
F32
[4096]
blk.36.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.36.post_attention_norm.weight
F32
F32
[4096]
blk.36.post_ffw_norm.weight
F32
F32
[4096]
blk.37
blk.37.attn_k.bias
F32
F32
[256]
blk.37.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.37.attn_norm.weight
F32
F32
[4096]
blk.37.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.37.attn_q.bias
F32
F32
[4096]
blk.37.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.37.attn_v.bias
F32
F32
[256]
blk.37.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.37.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.37.ffn_norm.weight
F32
F32
[4096]
blk.37.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.37.post_attention_norm.weight
F32
F32
[4096]
blk.37.post_ffw_norm.weight
F32
F32
[4096]
blk.38
blk.38.attn_k.bias
F32
F32
[256]
blk.38.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.38.attn_norm.weight
F32
F32
[4096]
blk.38.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.38.attn_q.bias
F32
F32
[4096]
blk.38.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.38.attn_v.bias
F32
F32
[256]
blk.38.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.38.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.38.ffn_norm.weight
F32
F32
[4096]
blk.38.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.38.post_attention_norm.weight
F32
F32
[4096]
blk.38.post_ffw_norm.weight
F32
F32
[4096]
blk.39
blk.39.attn_k.bias
F32
F32
[256]
blk.39.attn_k.weight
Q4_K
Q4_K
[4096, 256]
blk.39.attn_norm.weight
F32
F32
[4096]
blk.39.attn_output.weight
Q4_K
Q4_K
[4096, 4096]
blk.39.attn_q.bias
F32
F32
[4096]
blk.39.attn_q.weight
Q4_K
Q4_K
[4096, 4096]
blk.39.attn_v.bias
F32
F32
[256]
blk.39.attn_v.weight
Q6_K
Q6_K
[4096, 256]
blk.39.ffn_down.weight
Q8_0
Q8_0
[13696, 4096]
blk.39.ffn_norm.weight
F32
F32
[4096]
blk.39.ffn_up.weight
Q4_K
Q4_K
[4096, 27392]
blk.39.post_attention_norm.weight
F32
F32
[4096]
blk.39.post_ffw_norm.weight
F32
F32
[4096]
output.weight
Q6_K
Q6_K
[4096, 151552]
output_norm.weight
F32
F32
[4096]