Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
starsnatched
/
thinkergemma
:latest
25
Downloads
Updated
11 months ago
Trained on my Thinker dataset to replicate the thought traces of OpenAI's o1. Very smol model, very nice.
Trained on my Thinker dataset to replicate the thought traces of OpenAI's o1. Very smol model, very nice.
Cancel
thinkergemma:latest
...
/
model
ae439ee6b231 · 2.8GB
Metadata
general.architecture
gemma2
gemma2
general.file_type
Q8_0
Q8_0
gemma2.attention.head_count
8
8
gemma2.attention.head_count_kv
4
4
gemma2.attention.key_length
256
256
gemma2.attention.layer_norm_rms_epsilon
1e-06
1e-06
gemma2.attention.sliding_window
4096
4096
gemma2.attention.value_length
256
256
gemma2.attn_logit_softcapping
50
50
gemma2.block_count
26
26
gemma2.context_length
8192
8192
gemma2.embedding_length
2304
2304
gemma2.feed_forward_length
9216
9216
gemma2.final_logit_softcapping
30
30
tokenizer.ggml.add_bos_token
true
true
tokenizer.ggml.add_eos_token
false
false
tokenizer.ggml.add_space_prefix
false
false
tokenizer.ggml.bos_token_id
2
2
tokenizer.ggml.eos_token_id
1
1
tokenizer.ggml.model
llama
llama
tokenizer.ggml.padding_token_id
0
0
tokenizer.ggml.pre
default
default
tokenizer.ggml.scores
[-1000, -1000, -1000, -1000, -1000, ...]
[-1000, -1000, -1000, -1000, -1000, ...]
tokenizer.ggml.token_type
[3, 3, 3, 3, 3, ...]
[3, 3, 3, 3, 3, ...]
tokenizer.ggml.tokens
[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
tokenizer.ggml.unknown_token_id
3
3
Tensor
Name
Type
Shape
token_embd.weight
Q8_0
Q8_0
[2304, 256000]
blk.0
blk.0.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.0.attn_norm.weight
F32
F32
[2304]
blk.0.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.0.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.0.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.0.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.0.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.0.ffn_norm.weight
F32
F32
[2304]
blk.0.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.0.post_attention_norm.weight
F32
F32
[2304]
blk.0.post_ffw_norm.weight
F32
F32
[2304]
blk.1
blk.1.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.1.attn_norm.weight
F32
F32
[2304]
blk.1.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.1.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.1.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.1.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.1.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.1.ffn_norm.weight
F32
F32
[2304]
blk.1.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.1.post_attention_norm.weight
F32
F32
[2304]
blk.1.post_ffw_norm.weight
F32
F32
[2304]
blk.2
blk.2.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.2.attn_norm.weight
F32
F32
[2304]
blk.2.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.2.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.2.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.2.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.2.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.2.ffn_norm.weight
F32
F32
[2304]
blk.2.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.2.post_attention_norm.weight
F32
F32
[2304]
blk.2.post_ffw_norm.weight
F32
F32
[2304]
blk.3
blk.3.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.3.attn_norm.weight
F32
F32
[2304]
blk.3.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.3.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.3.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.3.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.3.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.3.ffn_norm.weight
F32
F32
[2304]
blk.3.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.3.post_attention_norm.weight
F32
F32
[2304]
blk.3.post_ffw_norm.weight
F32
F32
[2304]
blk.4
blk.4.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.4.attn_norm.weight
F32
F32
[2304]
blk.4.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.4.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.4.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.4.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.4.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.4.ffn_norm.weight
F32
F32
[2304]
blk.4.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.4.post_attention_norm.weight
F32
F32
[2304]
blk.4.post_ffw_norm.weight
F32
F32
[2304]
blk.5
blk.5.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.5.attn_norm.weight
F32
F32
[2304]
blk.5.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.5.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.5.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.5.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.5.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.5.ffn_norm.weight
F32
F32
[2304]
blk.5.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.5.post_attention_norm.weight
F32
F32
[2304]
blk.5.post_ffw_norm.weight
F32
F32
[2304]
blk.6
blk.6.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.6.attn_norm.weight
F32
F32
[2304]
blk.6.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.6.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.6.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.6.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.6.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.6.ffn_norm.weight
F32
F32
[2304]
blk.6.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.6.post_attention_norm.weight
F32
F32
[2304]
blk.6.post_ffw_norm.weight
F32
F32
[2304]
blk.7
blk.7.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.7.attn_norm.weight
F32
F32
[2304]
blk.7.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.7.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.7.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.7.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.7.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.7.ffn_norm.weight
F32
F32
[2304]
blk.7.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.7.post_attention_norm.weight
F32
F32
[2304]
blk.7.post_ffw_norm.weight
F32
F32
[2304]
blk.8
blk.8.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.8.attn_norm.weight
F32
F32
[2304]
blk.8.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.8.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.8.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.8.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.8.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.8.ffn_norm.weight
F32
F32
[2304]
blk.8.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.8.post_attention_norm.weight
F32
F32
[2304]
blk.8.post_ffw_norm.weight
F32
F32
[2304]
blk.9
blk.9.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.9.attn_norm.weight
F32
F32
[2304]
blk.9.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.9.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.9.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.9.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.9.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.9.ffn_norm.weight
F32
F32
[2304]
blk.9.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.9.post_attention_norm.weight
F32
F32
[2304]
blk.9.post_ffw_norm.weight
F32
F32
[2304]
blk.10
blk.10.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.10.attn_norm.weight
F32
F32
[2304]
blk.10.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.10.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.10.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.10.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.10.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.10.ffn_norm.weight
F32
F32
[2304]
blk.10.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.10.post_attention_norm.weight
F32
F32
[2304]
blk.10.post_ffw_norm.weight
F32
F32
[2304]
blk.11
blk.11.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.11.attn_norm.weight
F32
F32
[2304]
blk.11.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.11.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.11.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.11.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.11.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.11.ffn_norm.weight
F32
F32
[2304]
blk.11.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.11.post_attention_norm.weight
F32
F32
[2304]
blk.11.post_ffw_norm.weight
F32
F32
[2304]
blk.12
blk.12.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.12.attn_norm.weight
F32
F32
[2304]
blk.12.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.12.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.12.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.12.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.12.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.12.ffn_norm.weight
F32
F32
[2304]
blk.12.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.12.post_attention_norm.weight
F32
F32
[2304]
blk.12.post_ffw_norm.weight
F32
F32
[2304]
blk.13
blk.13.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.13.attn_norm.weight
F32
F32
[2304]
blk.13.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.13.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.13.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.13.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.13.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.13.ffn_norm.weight
F32
F32
[2304]
blk.13.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.13.post_attention_norm.weight
F32
F32
[2304]
blk.13.post_ffw_norm.weight
F32
F32
[2304]
blk.14
blk.14.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.14.attn_norm.weight
F32
F32
[2304]
blk.14.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.14.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.14.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.14.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.14.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.14.ffn_norm.weight
F32
F32
[2304]
blk.14.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.14.post_attention_norm.weight
F32
F32
[2304]
blk.14.post_ffw_norm.weight
F32
F32
[2304]
blk.15
blk.15.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.15.attn_norm.weight
F32
F32
[2304]
blk.15.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.15.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.15.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.15.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.15.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.15.ffn_norm.weight
F32
F32
[2304]
blk.15.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.15.post_attention_norm.weight
F32
F32
[2304]
blk.15.post_ffw_norm.weight
F32
F32
[2304]
blk.16
blk.16.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.16.attn_norm.weight
F32
F32
[2304]
blk.16.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.16.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.16.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.16.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.16.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.16.ffn_norm.weight
F32
F32
[2304]
blk.16.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.16.post_attention_norm.weight
F32
F32
[2304]
blk.16.post_ffw_norm.weight
F32
F32
[2304]
blk.17
blk.17.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.17.attn_norm.weight
F32
F32
[2304]
blk.17.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.17.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.17.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.17.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.17.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.17.ffn_norm.weight
F32
F32
[2304]
blk.17.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.17.post_attention_norm.weight
F32
F32
[2304]
blk.17.post_ffw_norm.weight
F32
F32
[2304]
blk.18
blk.18.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.18.attn_norm.weight
F32
F32
[2304]
blk.18.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.18.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.18.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.18.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.18.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.18.ffn_norm.weight
F32
F32
[2304]
blk.18.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.18.post_attention_norm.weight
F32
F32
[2304]
blk.18.post_ffw_norm.weight
F32
F32
[2304]
blk.19
blk.19.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.19.attn_norm.weight
F32
F32
[2304]
blk.19.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.19.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.19.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.19.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.19.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.19.ffn_norm.weight
F32
F32
[2304]
blk.19.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.19.post_attention_norm.weight
F32
F32
[2304]
blk.19.post_ffw_norm.weight
F32
F32
[2304]
blk.20
blk.20.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.20.attn_norm.weight
F32
F32
[2304]
blk.20.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.20.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.20.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.20.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.20.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.20.ffn_norm.weight
F32
F32
[2304]
blk.20.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.20.post_attention_norm.weight
F32
F32
[2304]
blk.20.post_ffw_norm.weight
F32
F32
[2304]
blk.21
blk.21.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.21.attn_norm.weight
F32
F32
[2304]
blk.21.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.21.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.21.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.21.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.21.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.21.ffn_norm.weight
F32
F32
[2304]
blk.21.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.21.post_attention_norm.weight
F32
F32
[2304]
blk.21.post_ffw_norm.weight
F32
F32
[2304]
blk.22
blk.22.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.22.attn_norm.weight
F32
F32
[2304]
blk.22.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.22.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.22.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.22.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.22.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.22.ffn_norm.weight
F32
F32
[2304]
blk.22.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.22.post_attention_norm.weight
F32
F32
[2304]
blk.22.post_ffw_norm.weight
F32
F32
[2304]
blk.23
blk.23.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.23.attn_norm.weight
F32
F32
[2304]
blk.23.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.23.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.23.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.23.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.23.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.23.ffn_norm.weight
F32
F32
[2304]
blk.23.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.23.post_attention_norm.weight
F32
F32
[2304]
blk.23.post_ffw_norm.weight
F32
F32
[2304]
blk.24
blk.24.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.24.attn_norm.weight
F32
F32
[2304]
blk.24.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.24.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.24.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.24.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.24.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.24.ffn_norm.weight
F32
F32
[2304]
blk.24.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.24.post_attention_norm.weight
F32
F32
[2304]
blk.24.post_ffw_norm.weight
F32
F32
[2304]
blk.25
blk.25.attn_k.weight
Q8_0
Q8_0
[2304, 1024]
blk.25.attn_norm.weight
F32
F32
[2304]
blk.25.attn_output.weight
Q8_0
Q8_0
[2048, 2304]
blk.25.attn_q.weight
Q8_0
Q8_0
[2304, 2048]
blk.25.attn_v.weight
Q8_0
Q8_0
[2304, 1024]
blk.25.ffn_down.weight
Q8_0
Q8_0
[9216, 2304]
blk.25.ffn_gate.weight
Q8_0
Q8_0
[2304, 9216]
blk.25.ffn_norm.weight
F32
F32
[2304]
blk.25.ffn_up.weight
Q8_0
Q8_0
[2304, 9216]
blk.25.post_attention_norm.weight
F32
F32
[2304]
blk.25.post_ffw_norm.weight
F32
F32
[2304]
output_norm.weight
F32
F32
[2304]