laguna-xs.2:nvfp4/json

laguna-xs.2:nvfp4

231 Downloads Updated 19 hours ago

Laguna XS.2 is a 33B total parameter Mixture-of-Experts model with 3B activated parameters per token designed for agentic coding and long-horizon work on a local machine.

tools thinking

laguna-xs.2:nvfp4 ... /

json

f42d36014db8 · 3.7kB

{

"architectures": [

"LagunaForCausalLM"

"auto_map": {

"AutoConfig": "configuration_laguna.LagunaConfig",

"AutoModelForCausalLM": "modeling_laguna.LagunaForCausalLM"

"model_type": "laguna",

"vocab_size": 100352,

"hidden_size": 2048,

"intermediate_size": 8192,

"num_hidden_layers": 40,

"num_attention_heads": 48,

"num_key_value_heads": 8,

"head_dim": 128,

"max_position_embeddings": 131072,

"attention_bias": false,

"attention_dropout": 0.0,

"rms_norm_eps": 1e-06,

"num_experts": 256,

"num_experts_per_tok": 8,

"moe_intermediate_size": 512,

"shared_expert_intermediate_size": 512,

"router_aux_loss_coef": 0.0,

"bos_token_id": 2,

"eos_token_id": [

"pad_token_id": 9,

"tie_word_embeddings": false,

"use_cache": true,

"torch_dtype": "bfloat16",

"gating": true,

"sliding_window": 512,

"rope_parameters": {

"full_attention": {

"rope_theta": 500000.0,

"rope_type": "yarn",

"factor": 32.0,

"original_max_position_embeddings": 4096,

"beta_slow": 1.0,

"beta_fast": 64.0,

"attention_factor": 1.0,

"partial_rotary_factor": 0.5

"sliding_attention": {

"rope_type": "default",

"rope_theta": 10000.0,

"partial_rotary_factor": 1.0

}

"layer_types": [

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"full_attention",

"sliding_attention",

"sliding_attention"

"moe_apply_router_weight_on_input": false,

"partial_rotary_factor": 0.5,

"mlp_layer_types": [

"dense",

"sparse",

"sparse"

"use_bidirectional_attention": false,

"moe_routed_scaling_factor": 2.5,

"num_attention_heads_per_layer": [

48,

64,

48,

64,

48,

64,

48,

64,

48,

64,

48,

64,

48,

64,

48,

64,

48,

64,

48,

64,

"compression_config": {

"mode": null,

"group_size": 32,

"eps": 1e-05,

"filter_fqns": [

"output"

"recompute_fake_quantize": false

"quantization_config": {

"mode": null,

"group_size": 32,

"eps": 1e-05,

"filter_fqns": [

"output"

"recompute_fake_quantize": false

}