Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
charaf
/
gemma4-31b-claude-opus-abliterated
:latest
31
Downloads
Updated
5 hours ago
Cancel
gemma4-31b-claude-opus-abliterated:latest
...
/
json
8c0a85f62133 · 2.0kB
{
"source_model": "TeichAI/gemma-4-31B-it-Claude-Opus-Distill",
"technique": "refusal_direction_ablation",
"method": "advanced",
"method_config": {
"n_directions": 4,
"direction_method": "svd",
"norm_preserve": true,
"regularization": 0.3,
"refinement_passes": 2,
"project_biases": true,
"use_chat_template": true,
"use_whitened_svd": false,
"true_iterative_refinement": false,
"winsorize_activations": false,
"float_layer_interpolation": false,
"cot_aware": false,
"use_kl_optimization": false,
"use_lora_ablation": false,
"spectral_cascade": false,
"spectral_bands": 3,
"spectral_threshold": 0.05
},
"references": [
"Arditi et al., Refusal in Language Models Is Mediated by a Single Direction (NeurIPS 2024)",
"Gabliteration: SVD-based multi-direction extraction (arXiv:2512.18901)",
"Norm-Preserving Biprojected Abliteration (grimjim, 2025)",
"Young, Comparative Analysis of LLM Abliteration Methods (arXiv:2512.13655)",
"Joad et al., More to Refusal than a Single Direction (2026)",
"Heretic (p-e-w, 2025): Bayesian optimization, LoRA-mediated ablation, winsorization",
"OBLITERATUS: Whitened SVD, EGA, CoT-aware, KL co-optimization, float interpolation (novel)"
],
"strong_layers": [
40,
39,
42,
41,
43,
52,
44,
45,
46,
51,
53,
50,
48,
49,
47,
38,
55,
54,
56,
57,
34,
58,
36,
35,
33,
37,
32,
31,
30,
27,
26,
25,
29,
24,
28,
23,
22,
21
],
"n_harmful_prompts": 512,
"n_harmless_prompts": 512,
"quality_metrics": {
"perplexity": 6432.466652702751,
"coherence": 0.2,
"refusal_rate": 0.0,
"kl_divergence": 6.550369739532471,
"spectral_certification": "RED"
},
"kl_contributions": {},
"cot_preserved_layers": [],
"float_layer_weights": {},
"lora_adapters_saved": false
}