Models
GitHub
Discord
Docs
Cloud
Sign in
Download
Models
Download
GitHub
Discord
Docs
Cloud
Sign in
muhammadrafikov
/
Bootstrap
:latest
1
Download
Updated
2 weeks ago
This is a Qwen 2 based LLM that I trained on my own Laptop. Pre-trained with C4 and fine-tuned with dolly 15k
This is a Qwen 2 based LLM that I trained on my own Laptop. Pre-trained with C4 and fine-tuned with dolly 15k
Cancel
Bootstrap:latest
...
/
model
b12541cd50d9 · 270MB
Metadata
general.architecture
qwen2
qwen2
general.file_type
F16
F16
qwen2.attention.head_count
6
6
qwen2.attention.head_count_kv
6
6
qwen2.attention.layer_norm_rms_epsilon
1e-06
1e-06
qwen2.block_count
6
6
qwen2.context_length
1024
1024
qwen2.embedding_length
768
768
qwen2.feed_forward_length
3072
3072
qwen2.rope.freq_base
10000
10000
tokenizer.ggml.bos_token_id
50257
50257
tokenizer.ggml.eos_token_id
50256
50256
tokenizer.ggml.merges
[Ġ t, Ġ a, h e, i n, r e, ...]
[Ġ t, Ġ a, h e, i n, r e, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
50258
50258
tokenizer.ggml.pre
gpt-2
gpt-2
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
tokenizer.ggml.unknown_token_id
50259
50259
Tensor
Name
Type
Shape
token_embd.weight
F16
F16
[768, 50262]
blk.0
blk.0.attn_k.bias
F32
F32
[768]
blk.0.attn_k.weight
F16
F16
[768, 768]
blk.0.attn_norm.weight
F32
F32
[768]
blk.0.attn_output.weight
F16
F16
[768, 768]
blk.0.attn_q.bias
F32
F32
[768]
blk.0.attn_q.weight
F16
F16
[768, 768]
blk.0.attn_v.bias
F32
F32
[768]
blk.0.attn_v.weight
F16
F16
[768, 768]
blk.0.ffn_down.weight
F16
F16
[3072, 768]
blk.0.ffn_gate.weight
F16
F16
[768, 3072]
blk.0.ffn_norm.weight
F32
F32
[768]
blk.0.ffn_up.weight
F16
F16
[768, 3072]
blk.1
blk.1.attn_k.bias
F32
F32
[768]
blk.1.attn_k.weight
F16
F16
[768, 768]
blk.1.attn_norm.weight
F32
F32
[768]
blk.1.attn_output.weight
F16
F16
[768, 768]
blk.1.attn_q.bias
F32
F32
[768]
blk.1.attn_q.weight
F16
F16
[768, 768]
blk.1.attn_v.bias
F32
F32
[768]
blk.1.attn_v.weight
F16
F16
[768, 768]
blk.1.ffn_down.weight
F16
F16
[3072, 768]
blk.1.ffn_gate.weight
F16
F16
[768, 3072]
blk.1.ffn_norm.weight
F32
F32
[768]
blk.1.ffn_up.weight
F16
F16
[768, 3072]
blk.2
blk.2.attn_k.bias
F32
F32
[768]
blk.2.attn_k.weight
F16
F16
[768, 768]
blk.2.attn_norm.weight
F32
F32
[768]
blk.2.attn_output.weight
F16
F16
[768, 768]
blk.2.attn_q.bias
F32
F32
[768]
blk.2.attn_q.weight
F16
F16
[768, 768]
blk.2.attn_v.bias
F32
F32
[768]
blk.2.attn_v.weight
F16
F16
[768, 768]
blk.2.ffn_down.weight
F16
F16
[3072, 768]
blk.2.ffn_gate.weight
F16
F16
[768, 3072]
blk.2.ffn_norm.weight
F32
F32
[768]
blk.2.ffn_up.weight
F16
F16
[768, 3072]
blk.3
blk.3.attn_k.bias
F32
F32
[768]
blk.3.attn_k.weight
F16
F16
[768, 768]
blk.3.attn_norm.weight
F32
F32
[768]
blk.3.attn_output.weight
F16
F16
[768, 768]
blk.3.attn_q.bias
F32
F32
[768]
blk.3.attn_q.weight
F16
F16
[768, 768]
blk.3.attn_v.bias
F32
F32
[768]
blk.3.attn_v.weight
F16
F16
[768, 768]
blk.3.ffn_down.weight
F16
F16
[3072, 768]
blk.3.ffn_gate.weight
F16
F16
[768, 3072]
blk.3.ffn_norm.weight
F32
F32
[768]
blk.3.ffn_up.weight
F16
F16
[768, 3072]
blk.4
blk.4.attn_k.bias
F32
F32
[768]
blk.4.attn_k.weight
F16
F16
[768, 768]
blk.4.attn_norm.weight
F32
F32
[768]
blk.4.attn_output.weight
F16
F16
[768, 768]
blk.4.attn_q.bias
F32
F32
[768]
blk.4.attn_q.weight
F16
F16
[768, 768]
blk.4.attn_v.bias
F32
F32
[768]
blk.4.attn_v.weight
F16
F16
[768, 768]
blk.4.ffn_down.weight
F16
F16
[3072, 768]
blk.4.ffn_gate.weight
F16
F16
[768, 3072]
blk.4.ffn_norm.weight
F32
F32
[768]
blk.4.ffn_up.weight
F16
F16
[768, 3072]
blk.5
blk.5.attn_k.bias
F32
F32
[768]
blk.5.attn_k.weight
F16
F16
[768, 768]
blk.5.attn_norm.weight
F32
F32
[768]
blk.5.attn_output.weight
F16
F16
[768, 768]
blk.5.attn_q.bias
F32
F32
[768]
blk.5.attn_q.weight
F16
F16
[768, 768]
blk.5.attn_v.bias
F32
F32
[768]
blk.5.attn_v.weight
F16
F16
[768, 768]
blk.5.ffn_down.weight
F16
F16
[3072, 768]
blk.5.ffn_gate.weight
F16
F16
[768, 3072]
blk.5.ffn_norm.weight
F32
F32
[768]
blk.5.ffn_up.weight
F16
F16
[768, 3072]
output.weight
F16
F16
[768, 50262]
output_norm.weight
F32
F32
[768]