403 Downloads Updated 8 months ago
Name
4 models
llm4decompile-22b-v2:latest
44GB · 32K context window · Text · 8 months ago
llm4decompile-22b-v2:q4_K_S
13GB · 32K context window · Text · 8 months ago
llm4decompile-22b-v2:q6_K
18GB · 32K context window · Text · 8 months ago
llm4decompile-22b-v2:q8_0
24GB · 32K context window · Text · 8 months ago
LLM4Decompile aims to decompile x86 assembly instructions into C. The newly released V2 series are trained with a larger dataset (2B tokens) and a maximum token length of 4,096, with remarkable performance (up to 100% improvement) compared to the previous model.
| Metrics | Re-executability Rate | Edit Similarity | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| Optimization Level | O0 | O1 | O2 | O3 | AVG | O0 | O1 | O2 | O3 | AVG |
| LLM4Decompile-End-6.7B | 0.6805 | 0.3951 | 0.3671 | 0.3720 | 0.4537 | 0.1557 | 0.1292 | 0.1293 | 0.1269 | 0.1353 |
| Ghidra | 0.3476 | 0.1646 | 0.1524 | 0.1402 | 0.2012 | 0.0699 | 0.0613 | 0.0619 | 0.0547 | 0.0620 |
| +GPT-4o | 0.4695 | 0.3415 | 0.2866 | 0.3110 | 0.3522 | 0.0660 | 0.0563 | 0.0567 | 0.0499 | 0.0572 |
| +LLM4Decompile-Ref-1.3B | 0.6890 | 0.3720 | 0.4085 | 0.3720 | 0.4604 | 0.1517 | 0.1325 | 0.1292 | 0.1267 | 0.1350 |
| +LLM4Decompile-Ref-6.7B | 0.7439 | 0.4695 | 0.4756 | 0.4207 | 0.5274 | 0.1559 | 0.1353 | 0.1342 | 0.1273 | 0.1382 |
| +LLM4Decompile-Ref-33B | 0.7073 | 0.4756 | 0.4390 | 0.4146 | 0.5091 | 0.1540 | 0.1379 | 0.1363 | 0.1307 | 0.1397 |
ollama pull MHKetbi/llm4decompile-22b-v2.pip install ollamadecompiler.py).my_assembly_file_O0.pseudo (replace my_assembly_file with your desired filename) containing the assembly code of the function you want to decompile.python decompiler.py my_assembly_fileimport ollama
import sys
MODEL_NAME = 'MHKetbi/llm4decompile-22b-v2'
def decompile_with_ollama(asm_func, max_tokens=2048):
"""
Decompiles an assembly function using Ollama.
Args:
asm_func: The assembly function as a string.
max_tokens: The maximum number of tokens to generate.
Returns:
The decompiled C code as a string, or None if an error occurred.
"""
try:
# Construct a prompt for the Ollama model. This is *crucial* for good results.
# We're using a system prompt and a user prompt. The system prompt sets the context.
# The user prompt provides the input and asks for the output.
messages = [
{
'role': 'system',
'content': 'You are a helpful assistant that decompiles assembly code to C code. '
'Provide only the C code, without any extra explanation or comments. '
'Do not include markdown formatting. Do not wrap the output in ```.'
},
{
'role': 'user',
'content': f'Decompile the following assembly code to C:\n\n{asm_func}'
}
]
response = ollama.chat(model=MODEL_NAME, messages=messages, stream=False)
# Check if the response is valid and contains the decompiled code.
if response and response.get('message') and response['message'].get('content'):
c_func_decompile = response['message']['content'].strip()
return c_func_decompile
else:
print("Error: Ollama did not return a valid response.", file=sys.stderr)
return None
except ollama.ResponseError as e:
print(f"Error during Ollama request: {e}", file=sys.stderr)
return None
except Exception as e:
print(f"An unexpected error occurred: {e}", file=sys.stderr)
return None
def decompile_with_ollama_streaming(asm_func, max_tokens=2048):
"""
Decompiles an assembly function using Ollama with streaming.
Args:
asm_func: The assembly function as a string.
max_tokens: (Not directly used in streaming, but kept for consistency).
Returns:
The decompiled C code as a string, or None if an error occurred.
"""
try:
messages = [
{
'role': 'system',
'content': 'You are a helpful assistant that decompiles assembly code to C code. '
'Provide only the C code, without any extra explanation or comments. '
'Do not include markdown formatting. Do not wrap the output in ```.'
},
{
'role': 'user',
'content': f'Decompile the following assembly code to C:\n\n{asm_func}'
}
]
stream = ollama.chat(model=MODEL_NAME, messages=messages, stream=True)
c_func_decompile = ""
for chunk in stream:
if chunk and chunk.get('message') and chunk['message'].get('content'):
c_func_decompile += chunk['message']['content']
return c_func_decompile.strip()
except ollama.ResponseError as e:
print(f"Error during Ollama request: {e}", file=sys.stderr)
return None
except Exception as e:
print(f"An unexpected error occurred: {e}", file=sys.stderr)
return None
def main():
if len(sys.argv) != 2:
print("Usage: python script.py <filename>")
sys.exit(1)
file_name = sys.argv[1]
opt = ['O0'] # Keep this for consistency with the original script's file naming.
try:
with open(f'{file_name}_{opt[0]}.pseudo', 'r') as f:
asm_func = f.read()
except FileNotFoundError:
print(f"Error: File '{file_name}_{opt[0]}.pseudo' not found.", file=sys.stderr)
sys.exit(1)
# Choose either streaming or non-streaming version
# c_func_decompile = decompile_with_ollama(asm_func)
c_func_decompile = decompile_with_ollama_streaming(asm_func)
if c_func_decompile:
try:
with open(f'{file_name}_{opt[0]}.pseudo', 'r') as f: # original file
func = f.read()
print(f'pseudo function:\n{func}')
print(f'refined function:\n{c_func_decompile}')
except FileNotFoundError:
print(f"Error: Could not read original pseudo file (for display only).", file=sys.stderr)
print(f'refined function:\n{c_func_decompile}') # Still print the decompiled output
else:
print("Decompilation failed.", file=sys.stderr)
if __name__ == "__main__":
main()
This code repository is licensed under the MIT License.
If you have any questions, please raise an issue.