Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
qwen3.6
:35b-a3b-coding-bf16
62.6K
Downloads
Updated
44 minutes ago
Qwen3.6 delivers substantial upgrades in agentic coding and thinking preservation than previous Qwen models.
Qwen3.6 delivers substantial upgrades in agentic coding and thinking preservation than previous Qwen models.
Cancel
vision
tools
thinking
35b
qwen3.6:35b-a3b-coding-bf16
...
/
json
93a4693fa9d8 · 3.7kB
{
"architectures": [
"Qwen3_5MoeForConditionalGeneration"
],
"image_token_id": 248056,
"model_type": "qwen3_5_moe",
"text_config": {
"attention_bias": false,
"attention_dropout": 0.0,
"attn_output_gate": true,
"bos_token_id": 248044,
"dtype": "bfloat16",
"eos_token_id": 248044,
"full_attention_interval": 4,
"head_dim": 256,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"layer_types": [
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention",
"linear_attention",
"linear_attention",
"linear_attention",
"full_attention"
],
"linear_conv_kernel_dim": 4,
"linear_key_head_dim": 128,
"linear_num_key_heads": 16,
"linear_num_value_heads": 32,
"linear_value_head_dim": 128,
"mamba_ssm_dtype": "float32",
"max_position_embeddings": 262144,
"model_type": "qwen3_5_moe_text",
"moe_intermediate_size": 512,
"mtp_num_hidden_layers": 1,
"mtp_use_dedicated_embeddings": false,
"num_attention_heads": 16,
"num_experts": 256,
"num_experts_per_tok": 8,
"num_hidden_layers": 40,
"num_key_value_heads": 2,
"output_router_logits": false,
"pad_token_id": null,
"partial_rotary_factor": 0.25,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"mrope_interleaved": true,
"mrope_section": [
11,
11,
10
],
"partial_rotary_factor": 0.25,
"rope_theta": 10000000,
"rope_type": "default"
},
"router_aux_loss_coef": 0.001,
"shared_expert_intermediate_size": 512,
"tie_word_embeddings": false,
"use_cache": true,
"vocab_size": 248320
},
"tie_word_embeddings": false,
"transformers_version": "4.57.1",
"video_token_id": 248057,
"vision_config": {
"deepstack_visual_indexes": [],
"depth": 27,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"in_channels": 3,
"initializer_range": 0.02,
"intermediate_size": 4304,
"model_type": "qwen3_5_moe",
"num_heads": 16,
"num_position_embeddings": 2304,
"out_hidden_size": 2048,
"patch_size": 16,
"spatial_merge_size": 2,
"temporal_patch_size": 2
},
"vision_end_token_id": 248054,
"vision_start_token_id": 248053
}