latest
5.6GB
The first chat language model based on a state-space model architecture!
15 Pulls Updated 4 days ago
720c4865c9d1 · 5.6GB
-
general.architecturemamba
-
general.file_typeF16
-
mamba.attention.head_count0
-
mamba.attention.layer_norm_rms_epsilon1e-05
-
mamba.block_count64
-
mamba.context_length1048576
-
mamba.embedding_length2560
-
mamba.feed_forward_length0
-
mamba.ssm.conv_kernel4
-
mamba.ssm.dt_b_c_rmsfalse
-
mamba.ssm.inner_size5120
-
mamba.ssm.state_size16
-
mamba.ssm.time_step_rank160
-
tokenizer.ggml.bos_token_id0
-
tokenizer.ggml.eos_token_id0
-
tokenizer.ggml.merges[Ġ Ġ Ġ t Ġ a h e i n ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.preolmo
-
tokenizer.ggml.token_type[3 3 1 1 1 ...]
-
tokenizer.ggml.tokens[<|endoftext|> <|padding|> ! " # ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[2560 50280]
-
blk.0.ssm_aF32[16 5120]
-
blk.0.ssm_dF32[5120]
-
blk.0.ssm_conv1d.biasF32[5120]
-
blk.0.ssm_conv1d.weightF32[4 5120]
-
blk.0.ssm_dt.biasF32[5120]
-
blk.0.ssm_dt.weightF16[160 5120]
-
blk.0.ssm_in.weightF16[2560 10240]
-
blk.0.ssm_out.weightF16[5120 2560]
-
blk.0.ssm_x.weightF16[5120 192]
-
blk.0.attn_norm.weightF32[2560]
-
blk.1.ssm_aF32[16 5120]
-
blk.1.ssm_dF32[5120]
-
blk.1.ssm_conv1d.biasF32[5120]
-
blk.1.ssm_conv1d.weightF32[4 5120]
-
blk.1.ssm_dt.biasF32[5120]
-
blk.1.ssm_dt.weightF16[160 5120]
-
blk.1.ssm_in.weightF16[2560 10240]
-
blk.1.ssm_out.weightF16[5120 2560]
-
blk.1.ssm_x.weightF16[5120 192]
-
blk.1.attn_norm.weightF32[2560]
-
blk.2.ssm_aF32[16 5120]
-
blk.2.ssm_dF32[5120]
-
blk.2.ssm_conv1d.biasF32[5120]
-
blk.2.ssm_conv1d.weightF32[4 5120]
-
blk.2.ssm_dt.biasF32[5120]
-
blk.2.ssm_dt.weightF16[160 5120]
-
blk.2.ssm_in.weightF16[2560 10240]
-
blk.2.ssm_out.weightF16[5120 2560]
-
blk.2.ssm_x.weightF16[5120 192]
-
blk.2.attn_norm.weightF32[2560]
-
blk.3.ssm_aF32[16 5120]
-
blk.3.ssm_dF32[5120]
-
blk.3.ssm_conv1d.biasF32[5120]
-
blk.3.ssm_conv1d.weightF32[4 5120]
-
blk.3.ssm_dt.biasF32[5120]
-
blk.3.ssm_dt.weightF16[160 5120]
-
blk.3.ssm_in.weightF16[2560 10240]
-
blk.3.ssm_out.weightF16[5120 2560]
-
blk.3.ssm_x.weightF16[5120 192]
-
blk.3.attn_norm.weightF32[2560]
-
blk.4.ssm_aF32[16 5120]
-
blk.4.ssm_dF32[5120]
-
blk.4.ssm_conv1d.biasF32[5120]
-
blk.4.ssm_conv1d.weightF32[4 5120]
-
blk.4.ssm_dt.biasF32[5120]
-
blk.4.ssm_dt.weightF16[160 5120]
-
blk.4.ssm_in.weightF16[2560 10240]
-
blk.4.ssm_out.weightF16[5120 2560]
-
blk.4.ssm_x.weightF16[5120 192]
-
blk.4.attn_norm.weightF32[2560]
-
blk.5.ssm_aF32[16 5120]
-
blk.5.ssm_dF32[5120]
-
blk.5.ssm_conv1d.biasF32[5120]
-
blk.5.ssm_conv1d.weightF32[4 5120]
-
blk.5.ssm_dt.biasF32[5120]
-
blk.5.ssm_dt.weightF16[160 5120]
-
blk.5.ssm_in.weightF16[2560 10240]
-
blk.5.ssm_out.weightF16[5120 2560]
-
blk.5.ssm_x.weightF16[5120 192]
-
blk.5.attn_norm.weightF32[2560]
-
blk.6.ssm_aF32[16 5120]
-
blk.6.ssm_dF32[5120]
-
blk.6.ssm_conv1d.biasF32[5120]
-
blk.6.ssm_conv1d.weightF32[4 5120]
-
blk.6.ssm_dt.biasF32[5120]
-
blk.6.ssm_dt.weightF16[160 5120]
-
blk.6.ssm_in.weightF16[2560 10240]
-
blk.6.ssm_out.weightF16[5120 2560]
-
blk.6.ssm_x.weightF16[5120 192]
-
blk.6.attn_norm.weightF32[2560]
-
blk.7.ssm_aF32[16 5120]
-
blk.7.ssm_dF32[5120]
-
blk.7.ssm_conv1d.biasF32[5120]
-
blk.7.ssm_conv1d.weightF32[4 5120]
-
blk.7.ssm_dt.biasF32[5120]
-
blk.7.ssm_dt.weightF16[160 5120]
-
blk.7.ssm_in.weightF16[2560 10240]
-
blk.7.ssm_out.weightF16[5120 2560]
-
blk.7.ssm_x.weightF16[5120 192]
-
blk.7.attn_norm.weightF32[2560]
-
blk.8.ssm_aF32[16 5120]
-
blk.8.ssm_dF32[5120]
-
blk.8.ssm_conv1d.biasF32[5120]
-
blk.8.ssm_conv1d.weightF32[4 5120]
-
blk.8.ssm_dt.biasF32[5120]
-
blk.8.ssm_dt.weightF16[160 5120]
-
blk.8.ssm_in.weightF16[2560 10240]
-
blk.8.ssm_out.weightF16[5120 2560]
-
blk.8.ssm_x.weightF16[5120 192]
-
blk.8.attn_norm.weightF32[2560]
-
blk.9.ssm_aF32[16 5120]
-
blk.9.ssm_dF32[5120]
-
blk.9.ssm_conv1d.biasF32[5120]
-
blk.9.ssm_conv1d.weightF32[4 5120]
-
blk.9.ssm_dt.biasF32[5120]
-
blk.9.ssm_dt.weightF16[160 5120]
-
blk.9.ssm_in.weightF16[2560 10240]
-
blk.9.ssm_out.weightF16[5120 2560]
-
blk.9.ssm_x.weightF16[5120 192]
-
blk.9.attn_norm.weightF32[2560]
-
blk.10.ssm_aF32[16 5120]
-
blk.10.ssm_dF32[5120]
-
blk.10.ssm_conv1d.biasF32[5120]
-
blk.10.ssm_conv1d.weightF32[4 5120]
-
blk.10.ssm_dt.biasF32[5120]
-
blk.10.ssm_dt.weightF16[160 5120]
-
blk.10.ssm_in.weightF16[2560 10240]
-
blk.10.ssm_out.weightF16[5120 2560]
-
blk.10.ssm_x.weightF16[5120 192]
-
blk.10.attn_norm.weightF32[2560]
-
blk.11.ssm_aF32[16 5120]
-
blk.11.ssm_dF32[5120]
-
blk.11.ssm_conv1d.biasF32[5120]
-
blk.11.ssm_conv1d.weightF32[4 5120]
-
blk.11.ssm_dt.biasF32[5120]
-
blk.11.ssm_dt.weightF16[160 5120]
-
blk.11.ssm_in.weightF16[2560 10240]
-
blk.11.ssm_out.weightF16[5120 2560]
-
blk.11.ssm_x.weightF16[5120 192]
-
blk.11.attn_norm.weightF32[2560]
-
blk.12.ssm_aF32[16 5120]
-
blk.12.ssm_dF32[5120]
-
blk.12.ssm_conv1d.biasF32[5120]
-
blk.12.ssm_conv1d.weightF32[4 5120]
-
blk.12.ssm_dt.biasF32[5120]
-
blk.12.ssm_dt.weightF16[160 5120]
-
blk.12.ssm_in.weightF16[2560 10240]
-
blk.12.ssm_out.weightF16[5120 2560]
-
blk.12.ssm_x.weightF16[5120 192]
-
blk.12.attn_norm.weightF32[2560]
-
blk.13.ssm_aF32[16 5120]
-
blk.13.ssm_dF32[5120]
-
blk.13.ssm_conv1d.biasF32[5120]
-
blk.13.ssm_conv1d.weightF32[4 5120]
-
blk.13.ssm_dt.biasF32[5120]
-
blk.13.ssm_dt.weightF16[160 5120]
-
blk.13.ssm_in.weightF16[2560 10240]
-
blk.13.ssm_out.weightF16[5120 2560]
-
blk.13.ssm_x.weightF16[5120 192]
-
blk.13.attn_norm.weightF32[2560]
-
blk.14.ssm_aF32[16 5120]
-
blk.14.ssm_dF32[5120]
-
blk.14.ssm_conv1d.biasF32[5120]
-
blk.14.ssm_conv1d.weightF32[4 5120]
-
blk.14.ssm_dt.biasF32[5120]
-
blk.14.ssm_dt.weightF16[160 5120]
-
blk.14.ssm_in.weightF16[2560 10240]
-
blk.14.ssm_out.weightF16[5120 2560]
-
blk.14.ssm_x.weightF16[5120 192]
-
blk.14.attn_norm.weightF32[2560]
-
blk.15.ssm_aF32[16 5120]
-
blk.15.ssm_dF32[5120]
-
blk.15.ssm_conv1d.biasF32[5120]
-
blk.15.ssm_conv1d.weightF32[4 5120]
-
blk.15.ssm_dt.biasF32[5120]
-
blk.15.ssm_dt.weightF16[160 5120]
-
blk.15.ssm_in.weightF16[2560 10240]
-
blk.15.ssm_out.weightF16[5120 2560]
-
blk.15.ssm_x.weightF16[5120 192]
-
blk.15.attn_norm.weightF32[2560]
-
blk.16.ssm_aF32[16 5120]
-
blk.16.ssm_dF32[5120]
-
blk.16.ssm_conv1d.biasF32[5120]
-
blk.16.ssm_conv1d.weightF32[4 5120]
-
blk.16.ssm_dt.biasF32[5120]
-
blk.16.ssm_dt.weightF16[160 5120]
-
blk.16.ssm_in.weightF16[2560 10240]
-
blk.16.ssm_out.weightF16[5120 2560]
-
blk.16.ssm_x.weightF16[5120 192]
-
blk.16.attn_norm.weightF32[2560]
-
blk.17.ssm_aF32[16 5120]
-
blk.17.ssm_dF32[5120]
-
blk.17.ssm_conv1d.biasF32[5120]
-
blk.17.ssm_conv1d.weightF32[4 5120]
-
blk.17.ssm_dt.biasF32[5120]
-
blk.17.ssm_dt.weightF16[160 5120]
-
blk.17.ssm_in.weightF16[2560 10240]
-
blk.17.ssm_out.weightF16[5120 2560]
-
blk.17.ssm_x.weightF16[5120 192]
-
blk.17.attn_norm.weightF32[2560]
-
blk.18.ssm_aF32[16 5120]
-
blk.18.ssm_dF32[5120]
-
blk.18.ssm_conv1d.biasF32[5120]
-
blk.18.ssm_conv1d.weightF32[4 5120]
-
blk.18.ssm_dt.biasF32[5120]
-
blk.18.ssm_dt.weightF16[160 5120]
-
blk.18.ssm_in.weightF16[2560 10240]
-
blk.18.ssm_out.weightF16[5120 2560]
-
blk.18.ssm_x.weightF16[5120 192]
-
blk.18.attn_norm.weightF32[2560]
-
blk.19.ssm_aF32[16 5120]
-
blk.19.ssm_dF32[5120]
-
blk.19.ssm_conv1d.biasF32[5120]
-
blk.19.ssm_conv1d.weightF32[4 5120]
-
blk.19.ssm_dt.biasF32[5120]
-
blk.19.ssm_dt.weightF16[160 5120]
-
blk.19.ssm_in.weightF16[2560 10240]
-
blk.19.ssm_out.weightF16[5120 2560]
-
blk.19.ssm_x.weightF16[5120 192]
-
blk.19.attn_norm.weightF32[2560]
-
blk.20.ssm_aF32[16 5120]
-
blk.20.ssm_dF32[5120]
-
blk.20.ssm_conv1d.biasF32[5120]
-
blk.20.ssm_conv1d.weightF32[4 5120]
-
blk.20.ssm_dt.biasF32[5120]
-
blk.20.ssm_dt.weightF16[160 5120]
-
blk.20.ssm_in.weightF16[2560 10240]
-
blk.20.ssm_out.weightF16[5120 2560]
-
blk.20.ssm_x.weightF16[5120 192]
-
blk.20.attn_norm.weightF32[2560]
-
blk.21.ssm_aF32[16 5120]
-
blk.21.ssm_dF32[5120]
-
blk.21.ssm_conv1d.biasF32[5120]
-
blk.21.ssm_conv1d.weightF32[4 5120]
-
blk.21.ssm_dt.biasF32[5120]
-
blk.21.ssm_dt.weightF16[160 5120]
-
blk.21.ssm_in.weightF16[2560 10240]
-
blk.21.ssm_out.weightF16[5120 2560]
-
blk.21.ssm_x.weightF16[5120 192]
-
blk.21.attn_norm.weightF32[2560]
-
blk.22.ssm_aF32[16 5120]
-
blk.22.ssm_dF32[5120]
-
blk.22.ssm_conv1d.biasF32[5120]
-
blk.22.ssm_conv1d.weightF32[4 5120]
-
blk.22.ssm_dt.biasF32[5120]
-
blk.22.ssm_dt.weightF16[160 5120]
-
blk.22.ssm_in.weightF16[2560 10240]
-
blk.22.ssm_out.weightF16[5120 2560]
-
blk.22.ssm_x.weightF16[5120 192]
-
blk.22.attn_norm.weightF32[2560]
-
blk.23.ssm_aF32[16 5120]
-
blk.23.ssm_dF32[5120]
-
blk.23.ssm_conv1d.biasF32[5120]
-
blk.23.ssm_conv1d.weightF32[4 5120]
-
blk.23.ssm_dt.biasF32[5120]
-
blk.23.ssm_dt.weightF16[160 5120]
-
blk.23.ssm_in.weightF16[2560 10240]
-
blk.23.ssm_out.weightF16[5120 2560]
-
blk.23.ssm_x.weightF16[5120 192]
-
blk.23.attn_norm.weightF32[2560]
-
blk.24.ssm_aF32[16 5120]
-
blk.24.ssm_dF32[5120]
-
blk.24.ssm_conv1d.biasF32[5120]
-
blk.24.ssm_conv1d.weightF32[4 5120]
-
blk.24.ssm_dt.biasF32[5120]
-
blk.24.ssm_dt.weightF16[160 5120]
-
blk.24.ssm_in.weightF16[2560 10240]
-
blk.24.ssm_out.weightF16[5120 2560]
-
blk.24.ssm_x.weightF16[5120 192]
-
blk.24.attn_norm.weightF32[2560]
-
blk.25.ssm_aF32[16 5120]
-
blk.25.ssm_dF32[5120]
-
blk.25.ssm_conv1d.biasF32[5120]
-
blk.25.ssm_conv1d.weightF32[4 5120]
-
blk.25.ssm_dt.biasF32[5120]
-
blk.25.ssm_dt.weightF16[160 5120]
-
blk.25.ssm_in.weightF16[2560 10240]
-
blk.25.ssm_out.weightF16[5120 2560]
-
blk.25.ssm_x.weightF16[5120 192]
-
blk.25.attn_norm.weightF32[2560]
-
blk.26.ssm_aF32[16 5120]
-
blk.26.ssm_dF32[5120]
-
blk.26.ssm_conv1d.biasF32[5120]
-
blk.26.ssm_conv1d.weightF32[4 5120]
-
blk.26.ssm_dt.biasF32[5120]
-
blk.26.ssm_dt.weightF16[160 5120]
-
blk.26.ssm_in.weightF16[2560 10240]
-
blk.26.ssm_out.weightF16[5120 2560]
-
blk.26.ssm_x.weightF16[5120 192]
-
blk.26.attn_norm.weightF32[2560]
-
blk.27.ssm_aF32[16 5120]
-
blk.27.ssm_dF32[5120]
-
blk.27.ssm_conv1d.biasF32[5120]
-
blk.27.ssm_conv1d.weightF32[4 5120]
-
blk.27.ssm_dt.biasF32[5120]
-
blk.27.ssm_dt.weightF16[160 5120]
-
blk.27.ssm_in.weightF16[2560 10240]
-
blk.27.ssm_out.weightF16[5120 2560]
-
blk.27.ssm_x.weightF16[5120 192]
-
blk.27.attn_norm.weightF32[2560]
-
blk.28.ssm_aF32[16 5120]
-
blk.28.ssm_dF32[5120]
-
blk.28.ssm_conv1d.biasF32[5120]
-
blk.28.ssm_conv1d.weightF32[4 5120]
-
blk.28.ssm_dt.biasF32[5120]
-
blk.28.ssm_dt.weightF16[160 5120]
-
blk.28.ssm_in.weightF16[2560 10240]
-
blk.28.ssm_out.weightF16[5120 2560]
-
blk.28.ssm_x.weightF16[5120 192]
-
blk.28.attn_norm.weightF32[2560]
-
blk.29.ssm_aF32[16 5120]
-
blk.29.ssm_dF32[5120]
-
blk.29.ssm_conv1d.biasF32[5120]
-
blk.29.ssm_conv1d.weightF32[4 5120]
-
blk.29.ssm_dt.biasF32[5120]
-
blk.29.ssm_dt.weightF16[160 5120]
-
blk.29.ssm_in.weightF16[2560 10240]
-
blk.29.ssm_out.weightF16[5120 2560]
-
blk.29.ssm_x.weightF16[5120 192]
-
blk.29.attn_norm.weightF32[2560]
-
blk.30.ssm_aF32[16 5120]
-
blk.30.ssm_dF32[5120]
-
blk.30.ssm_conv1d.biasF32[5120]
-
blk.30.ssm_conv1d.weightF32[4 5120]
-
blk.30.ssm_dt.biasF32[5120]
-
blk.30.ssm_dt.weightF16[160 5120]
-
blk.30.ssm_in.weightF16[2560 10240]
-
blk.30.ssm_out.weightF16[5120 2560]
-
blk.30.ssm_x.weightF16[5120 192]
-
blk.30.attn_norm.weightF32[2560]
-
blk.31.ssm_aF32[16 5120]
-
blk.31.ssm_dF32[5120]
-
blk.31.ssm_conv1d.biasF32[5120]
-
blk.31.ssm_conv1d.weightF32[4 5120]
-
blk.31.ssm_dt.biasF32[5120]
-
blk.31.ssm_dt.weightF16[160 5120]
-
blk.31.ssm_in.weightF16[2560 10240]
-
blk.31.ssm_out.weightF16[5120 2560]
-
blk.31.ssm_x.weightF16[5120 192]
-
blk.31.attn_norm.weightF32[2560]
-
blk.32.ssm_aF32[16 5120]
-
blk.32.ssm_dF32[5120]
-
blk.32.ssm_conv1d.biasF32[5120]
-
blk.32.ssm_conv1d.weightF32[4 5120]
-
blk.32.ssm_dt.biasF32[5120]
-
blk.32.ssm_dt.weightF16[160 5120]
-
blk.32.ssm_in.weightF16[2560 10240]
-
blk.32.ssm_out.weightF16[5120 2560]
-
blk.32.ssm_x.weightF16[5120 192]
-
blk.32.attn_norm.weightF32[2560]
-
blk.33.ssm_aF32[16 5120]
-
blk.33.ssm_dF32[5120]
-
blk.33.ssm_conv1d.biasF32[5120]
-
blk.33.ssm_conv1d.weightF32[4 5120]
-
blk.33.ssm_dt.biasF32[5120]
-
blk.33.ssm_dt.weightF16[160 5120]
-
blk.33.ssm_in.weightF16[2560 10240]
-
blk.33.ssm_out.weightF16[5120 2560]
-
blk.33.ssm_x.weightF16[5120 192]
-
blk.33.attn_norm.weightF32[2560]
-
blk.34.ssm_aF32[16 5120]
-
blk.34.ssm_dF32[5120]
-
blk.34.ssm_conv1d.biasF32[5120]
-
blk.34.ssm_conv1d.weightF32[4 5120]
-
blk.34.ssm_dt.biasF32[5120]
-
blk.34.ssm_dt.weightF16[160 5120]
-
blk.34.ssm_in.weightF16[2560 10240]
-
blk.34.ssm_out.weightF16[5120 2560]
-
blk.34.ssm_x.weightF16[5120 192]
-
blk.34.attn_norm.weightF32[2560]
-
blk.35.ssm_aF32[16 5120]
-
blk.35.ssm_dF32[5120]
-
blk.35.ssm_conv1d.biasF32[5120]
-
blk.35.ssm_conv1d.weightF32[4 5120]
-
blk.35.ssm_dt.biasF32[5120]
-
blk.35.ssm_dt.weightF16[160 5120]
-
blk.35.ssm_in.weightF16[2560 10240]
-
blk.35.ssm_out.weightF16[5120 2560]
-
blk.35.ssm_x.weightF16[5120 192]
-
blk.35.attn_norm.weightF32[2560]
-
blk.36.ssm_aF32[16 5120]
-
blk.36.ssm_dF32[5120]
-
blk.36.ssm_conv1d.biasF32[5120]
-
blk.36.ssm_conv1d.weightF32[4 5120]
-
blk.36.ssm_dt.biasF32[5120]
-
blk.36.ssm_dt.weightF16[160 5120]
-
blk.36.ssm_in.weightF16[2560 10240]
-
blk.36.ssm_out.weightF16[5120 2560]
-
blk.36.ssm_x.weightF16[5120 192]
-
blk.36.attn_norm.weightF32[2560]
-
blk.37.ssm_aF32[16 5120]
-
blk.37.ssm_dF32[5120]
-
blk.37.ssm_conv1d.biasF32[5120]
-
blk.37.ssm_conv1d.weightF32[4 5120]
-
blk.37.ssm_dt.biasF32[5120]
-
blk.37.ssm_dt.weightF16[160 5120]
-
blk.37.ssm_in.weightF16[2560 10240]
-
blk.37.ssm_out.weightF16[5120 2560]
-
blk.37.ssm_x.weightF16[5120 192]
-
blk.37.attn_norm.weightF32[2560]
-
blk.38.ssm_aF32[16 5120]
-
blk.38.ssm_dF32[5120]
-
blk.38.ssm_conv1d.biasF32[5120]
-
blk.38.ssm_conv1d.weightF32[4 5120]
-
blk.38.ssm_dt.biasF32[5120]
-
blk.38.ssm_dt.weightF16[160 5120]
-
blk.38.ssm_in.weightF16[2560 10240]
-
blk.38.ssm_out.weightF16[5120 2560]
-
blk.38.ssm_x.weightF16[5120 192]
-
blk.38.attn_norm.weightF32[2560]
-
blk.39.ssm_aF32[16 5120]
-
blk.39.ssm_dF32[5120]
-
blk.39.ssm_conv1d.biasF32[5120]
-
blk.39.ssm_conv1d.weightF32[4 5120]
-
blk.39.ssm_dt.biasF32[5120]
-
blk.39.ssm_dt.weightF16[160 5120]
-
blk.39.ssm_in.weightF16[2560 10240]
-
blk.39.ssm_out.weightF16[5120 2560]
-
blk.39.ssm_x.weightF16[5120 192]
-
blk.39.attn_norm.weightF32[2560]
-
blk.40.ssm_aF32[16 5120]
-
blk.40.ssm_dF32[5120]
-
blk.40.ssm_conv1d.biasF32[5120]
-
blk.40.ssm_conv1d.weightF32[4 5120]
-
blk.40.ssm_dt.biasF32[5120]
-
blk.40.ssm_dt.weightF16[160 5120]
-
blk.40.ssm_in.weightF16[2560 10240]
-
blk.40.ssm_out.weightF16[5120 2560]
-
blk.40.ssm_x.weightF16[5120 192]
-
blk.40.attn_norm.weightF32[2560]
-
blk.41.ssm_aF32[16 5120]
-
blk.41.ssm_dF32[5120]
-
blk.41.ssm_conv1d.biasF32[5120]
-
blk.41.ssm_conv1d.weightF32[4 5120]
-
blk.41.ssm_dt.biasF32[5120]
-
blk.41.ssm_dt.weightF16[160 5120]
-
blk.41.ssm_in.weightF16[2560 10240]
-
blk.41.ssm_out.weightF16[5120 2560]
-
blk.41.ssm_x.weightF16[5120 192]
-
blk.41.attn_norm.weightF32[2560]
-
blk.42.ssm_aF32[16 5120]
-
blk.42.ssm_dF32[5120]
-
blk.42.ssm_conv1d.biasF32[5120]
-
blk.42.ssm_conv1d.weightF32[4 5120]
-
blk.42.ssm_dt.biasF32[5120]
-
blk.42.ssm_dt.weightF16[160 5120]
-
blk.42.ssm_in.weightF16[2560 10240]
-
blk.42.ssm_out.weightF16[5120 2560]
-
blk.42.ssm_x.weightF16[5120 192]
-
blk.42.attn_norm.weightF32[2560]
-
blk.43.ssm_aF32[16 5120]
-
blk.43.ssm_dF32[5120]
-
blk.43.ssm_conv1d.biasF32[5120]
-
blk.43.ssm_conv1d.weightF32[4 5120]
-
blk.43.ssm_dt.biasF32[5120]
-
blk.43.ssm_dt.weightF16[160 5120]
-
blk.43.ssm_in.weightF16[2560 10240]
-
blk.43.ssm_out.weightF16[5120 2560]
-
blk.43.ssm_x.weightF16[5120 192]
-
blk.43.attn_norm.weightF32[2560]
-
blk.44.ssm_aF32[16 5120]
-
blk.44.ssm_dF32[5120]
-
blk.44.ssm_conv1d.biasF32[5120]
-
blk.44.ssm_conv1d.weightF32[4 5120]
-
blk.44.ssm_dt.biasF32[5120]
-
blk.44.ssm_dt.weightF16[160 5120]
-
blk.44.ssm_in.weightF16[2560 10240]
-
blk.44.ssm_out.weightF16[5120 2560]
-
blk.44.ssm_x.weightF16[5120 192]
-
blk.44.attn_norm.weightF32[2560]
-
blk.45.ssm_aF32[16 5120]
-
blk.45.ssm_dF32[5120]
-
blk.45.ssm_conv1d.biasF32[5120]
-
blk.45.ssm_conv1d.weightF32[4 5120]
-
blk.45.ssm_dt.biasF32[5120]
-
blk.45.ssm_dt.weightF16[160 5120]
-
blk.45.ssm_in.weightF16[2560 10240]
-
blk.45.ssm_out.weightF16[5120 2560]
-
blk.45.ssm_x.weightF16[5120 192]
-
blk.45.attn_norm.weightF32[2560]
-
blk.46.ssm_aF32[16 5120]
-
blk.46.ssm_dF32[5120]
-
blk.46.ssm_conv1d.biasF32[5120]
-
blk.46.ssm_conv1d.weightF32[4 5120]
-
blk.46.ssm_dt.biasF32[5120]
-
blk.46.ssm_dt.weightF16[160 5120]
-
blk.46.ssm_in.weightF16[2560 10240]
-
blk.46.ssm_out.weightF16[5120 2560]
-
blk.46.ssm_x.weightF16[5120 192]
-
blk.46.attn_norm.weightF32[2560]
-
blk.47.ssm_aF32[16 5120]
-
blk.47.ssm_dF32[5120]
-
blk.47.ssm_conv1d.biasF32[5120]
-
blk.47.ssm_conv1d.weightF32[4 5120]
-
blk.47.ssm_dt.biasF32[5120]
-
blk.47.ssm_dt.weightF16[160 5120]
-
blk.47.ssm_in.weightF16[2560 10240]
-
blk.47.ssm_out.weightF16[5120 2560]
-
blk.47.ssm_x.weightF16[5120 192]
-
blk.47.attn_norm.weightF32[2560]
-
blk.48.ssm_aF32[16 5120]
-
blk.48.ssm_dF32[5120]
-
blk.48.ssm_conv1d.biasF32[5120]
-
blk.48.ssm_conv1d.weightF32[4 5120]
-
blk.48.ssm_dt.biasF32[5120]
-
blk.48.ssm_dt.weightF16[160 5120]
-
blk.48.ssm_in.weightF16[2560 10240]
-
blk.48.ssm_out.weightF16[5120 2560]
-
blk.48.ssm_x.weightF16[5120 192]
-
blk.48.attn_norm.weightF32[2560]
-
blk.49.ssm_aF32[16 5120]
-
blk.49.ssm_dF32[5120]
-
blk.49.ssm_conv1d.biasF32[5120]
-
blk.49.ssm_conv1d.weightF32[4 5120]
-
blk.49.ssm_dt.biasF32[5120]
-
blk.49.ssm_dt.weightF16[160 5120]
-
blk.49.ssm_in.weightF16[2560 10240]
-
blk.49.ssm_out.weightF16[5120 2560]
-
blk.49.ssm_x.weightF16[5120 192]
-
blk.49.attn_norm.weightF32[2560]
-
blk.50.ssm_aF32[16 5120]
-
blk.50.ssm_dF32[5120]
-
blk.50.ssm_conv1d.biasF32[5120]
-
blk.50.ssm_conv1d.weightF32[4 5120]
-
blk.50.ssm_dt.biasF32[5120]
-
blk.50.ssm_dt.weightF16[160 5120]
-
blk.50.ssm_in.weightF16[2560 10240]
-
blk.50.ssm_out.weightF16[5120 2560]
-
blk.50.ssm_x.weightF16[5120 192]
-
blk.50.attn_norm.weightF32[2560]
-
blk.51.ssm_aF32[16 5120]
-
blk.51.ssm_dF32[5120]
-
blk.51.ssm_conv1d.biasF32[5120]
-
blk.51.ssm_conv1d.weightF32[4 5120]
-
blk.51.ssm_dt.biasF32[5120]
-
blk.51.ssm_dt.weightF16[160 5120]
-
blk.51.ssm_in.weightF16[2560 10240]
-
blk.51.ssm_out.weightF16[5120 2560]
-
blk.51.ssm_x.weightF16[5120 192]
-
blk.51.attn_norm.weightF32[2560]
-
blk.52.ssm_aF32[16 5120]
-
blk.52.ssm_dF32[5120]
-
blk.52.ssm_conv1d.biasF32[5120]
-
blk.52.ssm_conv1d.weightF32[4 5120]
-
blk.52.ssm_dt.biasF32[5120]
-
blk.52.ssm_dt.weightF16[160 5120]
-
blk.52.ssm_in.weightF16[2560 10240]
-
blk.52.ssm_out.weightF16[5120 2560]
-
blk.52.ssm_x.weightF16[5120 192]
-
blk.52.attn_norm.weightF32[2560]
-
blk.53.ssm_aF32[16 5120]
-
blk.53.ssm_dF32[5120]
-
blk.53.ssm_conv1d.biasF32[5120]
-
blk.53.ssm_conv1d.weightF32[4 5120]
-
blk.53.ssm_dt.biasF32[5120]
-
blk.53.ssm_dt.weightF16[160 5120]
-
blk.53.ssm_in.weightF16[2560 10240]
-
blk.53.ssm_out.weightF16[5120 2560]
-
blk.53.ssm_x.weightF16[5120 192]
-
blk.53.attn_norm.weightF32[2560]
-
blk.54.ssm_aF32[16 5120]
-
blk.54.ssm_dF32[5120]
-
blk.54.ssm_conv1d.biasF32[5120]
-
blk.54.ssm_conv1d.weightF32[4 5120]
-
blk.54.ssm_dt.biasF32[5120]
-
blk.54.ssm_dt.weightF16[160 5120]
-
blk.54.ssm_in.weightF16[2560 10240]
-
blk.54.ssm_out.weightF16[5120 2560]
-
blk.54.ssm_x.weightF16[5120 192]
-
blk.54.attn_norm.weightF32[2560]
-
blk.55.ssm_aF32[16 5120]
-
blk.55.ssm_dF32[5120]
-
blk.55.ssm_conv1d.biasF32[5120]
-
blk.55.ssm_conv1d.weightF32[4 5120]
-
blk.55.ssm_dt.biasF32[5120]
-
blk.55.ssm_dt.weightF16[160 5120]
-
blk.55.ssm_in.weightF16[2560 10240]
-
blk.55.ssm_out.weightF16[5120 2560]
-
blk.55.ssm_x.weightF16[5120 192]
-
blk.55.attn_norm.weightF32[2560]
-
blk.56.ssm_aF32[16 5120]
-
blk.56.ssm_dF32[5120]
-
blk.56.ssm_conv1d.biasF32[5120]
-
blk.56.ssm_conv1d.weightF32[4 5120]
-
blk.56.ssm_dt.biasF32[5120]
-
blk.56.ssm_dt.weightF16[160 5120]
-
blk.56.ssm_in.weightF16[2560 10240]
-
blk.56.ssm_out.weightF16[5120 2560]
-
blk.56.ssm_x.weightF16[5120 192]
-
blk.56.attn_norm.weightF32[2560]
-
blk.57.ssm_aF32[16 5120]
-
blk.57.ssm_dF32[5120]
-
blk.57.ssm_conv1d.biasF32[5120]
-
blk.57.ssm_conv1d.weightF32[4 5120]
-
blk.57.ssm_dt.biasF32[5120]
-
blk.57.ssm_dt.weightF16[160 5120]
-
blk.57.ssm_in.weightF16[2560 10240]
-
blk.57.ssm_out.weightF16[5120 2560]
-
blk.57.ssm_x.weightF16[5120 192]
-
blk.57.attn_norm.weightF32[2560]
-
blk.58.ssm_aF32[16 5120]
-
blk.58.ssm_dF32[5120]
-
blk.58.ssm_conv1d.biasF32[5120]
-
blk.58.ssm_conv1d.weightF32[4 5120]
-
blk.58.ssm_dt.biasF32[5120]
-
blk.58.ssm_dt.weightF16[160 5120]
-
blk.58.ssm_in.weightF16[2560 10240]
-
blk.58.ssm_out.weightF16[5120 2560]
-
blk.58.ssm_x.weightF16[5120 192]
-
blk.58.attn_norm.weightF32[2560]
-
blk.59.ssm_aF32[16 5120]
-
blk.59.ssm_dF32[5120]
-
blk.59.ssm_conv1d.biasF32[5120]
-
blk.59.ssm_conv1d.weightF32[4 5120]
-
blk.59.ssm_dt.biasF32[5120]
-
blk.59.ssm_dt.weightF16[160 5120]
-
blk.59.ssm_in.weightF16[2560 10240]
-
blk.59.ssm_out.weightF16[5120 2560]
-
blk.59.ssm_x.weightF16[5120 192]
-
blk.59.attn_norm.weightF32[2560]
-
blk.60.ssm_aF32[16 5120]
-
blk.60.ssm_dF32[5120]
-
blk.60.ssm_conv1d.biasF32[5120]
-
blk.60.ssm_conv1d.weightF32[4 5120]
-
blk.60.ssm_dt.biasF32[5120]
-
blk.60.ssm_dt.weightF16[160 5120]
-
blk.60.ssm_in.weightF16[2560 10240]
-
blk.60.ssm_out.weightF16[5120 2560]
-
blk.60.ssm_x.weightF16[5120 192]
-
blk.60.attn_norm.weightF32[2560]
-
blk.61.ssm_aF32[16 5120]
-
blk.61.ssm_dF32[5120]
-
blk.61.ssm_conv1d.biasF32[5120]
-
blk.61.ssm_conv1d.weightF32[4 5120]
-
blk.61.ssm_dt.biasF32[5120]
-
blk.61.ssm_dt.weightF16[160 5120]
-
blk.61.ssm_in.weightF16[2560 10240]
-
blk.61.ssm_out.weightF16[5120 2560]
-
blk.61.ssm_x.weightF16[5120 192]
-
blk.61.attn_norm.weightF32[2560]
-
blk.62.ssm_aF32[16 5120]
-
blk.62.ssm_dF32[5120]
-
blk.62.ssm_conv1d.biasF32[5120]
-
blk.62.ssm_conv1d.weightF32[4 5120]
-
blk.62.ssm_dt.biasF32[5120]
-
blk.62.ssm_dt.weightF16[160 5120]
-
blk.62.ssm_in.weightF16[2560 10240]
-
blk.62.ssm_out.weightF16[5120 2560]
-
blk.62.ssm_x.weightF16[5120 192]
-
blk.62.attn_norm.weightF32[2560]
-
blk.63.ssm_aF32[16 5120]
-
blk.63.ssm_dF32[5120]
-
blk.63.ssm_conv1d.biasF32[5120]
-
blk.63.ssm_conv1d.weightF32[4 5120]
-
blk.63.ssm_dt.biasF32[5120]
-
blk.63.ssm_dt.weightF16[160 5120]
-
blk.63.ssm_in.weightF16[2560 10240]
-
blk.63.ssm_out.weightF16[5120 2560]
-
blk.63.ssm_x.weightF16[5120 192]
-
blk.63.attn_norm.weightF32[2560]
-
output_norm.weightF32[2560]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41
blk.42
blk.43
blk.44
blk.45
blk.46
blk.47
blk.48
blk.49
blk.50
blk.51
blk.52
blk.53
blk.54
blk.55
blk.56
blk.57
blk.58
blk.59
blk.60
blk.61
blk.62
blk.63