MHKetbi/llm4decompile-22b-v2

LLM4Decompile aims to decompile x86 assembly instructions into C. The newly released V2 series are trained with a larger dataset (2B tokens) and a maximum token length of 4,096, with remarkable performance (up to 100% improvement) compared to the previous model.

Github Repository: LLM4Decompile

2. Evaluation Results

Metrics	Re-executability Rate					Edit Similarity
Optimization Level	O0	O1	O2	O3	AVG	O0	O1	O2	O3	AVG
LLM4Decompile-End-6.7B	0.6805	0.3951	0.3671	0.3720	0.4537	0.1557	0.1292	0.1293	0.1269	0.1353
Ghidra	0.3476	0.1646	0.1524	0.1402	0.2012	0.0699	0.0613	0.0619	0.0547	0.0620
+GPT-4o	0.4695	0.3415	0.2866	0.3110	0.3522	0.0660	0.0563	0.0567	0.0499	0.0572
+LLM4Decompile-Ref-1.3B	0.6890	0.3720	0.4085	0.3720	0.4604	0.1517	0.1325	0.1292	0.1267	0.1350
+LLM4Decompile-Ref-6.7B	0.7439	0.4695	0.4756	0.4207	0.5274	0.1559	0.1353	0.1342	0.1273	0.1382
+LLM4Decompile-Ref-33B	0.7073	0.4756	0.4390	0.4146	0.5091	0.1540	0.1379	0.1363	0.1307	0.1397

3. How to Use

Install Ollama: Follow the instructions on the Ollama website.
Pull a model: ollama pull MHKetbi/llm4decompile-22b-v2.
Install the Ollama Python library: pip install ollama
Save the script: Save the code as a Python file (e.g., decompiler.py).
Create an input file: Create a file named my_assembly_file_O0.pseudo (replace my_assembly_file with your desired filename) containing the assembly code of the function you want to decompile.
Run the script: python decompiler.py my_assembly_file

import ollama
import sys

MODEL_NAME = 'MHKetbi/llm4decompile-22b-v2'


def decompile_with_ollama(asm_func, max_tokens=2048):
    """
    Decompiles an assembly function using Ollama.

    Args:
        asm_func: The assembly function as a string.
        max_tokens: The maximum number of tokens to generate.

    Returns:
        The decompiled C code as a string, or None if an error occurred.
    """
    try:
        # Construct a prompt for the Ollama model.  This is *crucial* for good results.
        # We're using a system prompt and a user prompt.  The system prompt sets the context.
        # The user prompt provides the input and asks for the output.
        messages = [
            {
                'role': 'system',
                'content': 'You are a helpful assistant that decompiles assembly code to C code.  '
                           'Provide only the C code, without any extra explanation or comments. '
                           'Do not include markdown formatting. Do not wrap the output in ```.'
            },
            {
                'role': 'user',
                'content': f'Decompile the following assembly code to C:\n\n{asm_func}'
            }
        ]

        response = ollama.chat(model=MODEL_NAME, messages=messages, stream=False)

        # Check if the response is valid and contains the decompiled code.
        if response and response.get('message') and response['message'].get('content'):
            c_func_decompile = response['message']['content'].strip()
            return c_func_decompile
        else:
            print("Error: Ollama did not return a valid response.", file=sys.stderr)
            return None

    except ollama.ResponseError as e:
        print(f"Error during Ollama request: {e}", file=sys.stderr)
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        return None


def decompile_with_ollama_streaming(asm_func, max_tokens=2048):
    """
    Decompiles an assembly function using Ollama with streaming.

    Args:
        asm_func: The assembly function as a string.
        max_tokens:  (Not directly used in streaming, but kept for consistency).

    Returns:
        The decompiled C code as a string, or None if an error occurred.
    """
    try:
        messages = [
            {
                'role': 'system',
                'content': 'You are a helpful assistant that decompiles assembly code to C code.  '
                           'Provide only the C code, without any extra explanation or comments. '
                           'Do not include markdown formatting. Do not wrap the output in ```.'
            },
            {
                'role': 'user',
                'content': f'Decompile the following assembly code to C:\n\n{asm_func}'
            }
        ]

        stream = ollama.chat(model=MODEL_NAME, messages=messages, stream=True)

        c_func_decompile = ""
        for chunk in stream:
            if chunk and chunk.get('message') and chunk['message'].get('content'):
                c_func_decompile += chunk['message']['content']

        return c_func_decompile.strip()

    except ollama.ResponseError as e:
        print(f"Error during Ollama request: {e}", file=sys.stderr)
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        return None



def main():
    if len(sys.argv) != 2:
        print("Usage: python script.py <filename>")
        sys.exit(1)

    file_name = sys.argv[1]
    opt = ['O0']  #  Keep this for consistency with the original script's file naming.

    try:
        with open(f'{file_name}_{opt[0]}.pseudo', 'r') as f:
            asm_func = f.read()
    except FileNotFoundError:
        print(f"Error: File '{file_name}_{opt[0]}.pseudo' not found.", file=sys.stderr)
        sys.exit(1)

    # Choose either streaming or non-streaming version
    # c_func_decompile = decompile_with_ollama(asm_func)
    c_func_decompile = decompile_with_ollama_streaming(asm_func)


    if c_func_decompile:
        try:
            with open(f'{file_name}_{opt[0]}.pseudo', 'r') as f:  # original file
                func = f.read()

            print(f'pseudo function:\n{func}')
            print(f'refined function:\n{c_func_decompile}')

        except FileNotFoundError:
            print(f"Error: Could not read original pseudo file (for display only).", file=sys.stderr)
            print(f'refined function:\n{c_func_decompile}') # Still print the decompiled output

    else:
        print("Decompilation failed.", file=sys.stderr)


if __name__ == "__main__":
    main()

4. License

This code repository is licensed under the MIT License.

5. Contact

If you have any questions, please raise an issue.

[![Support Me - Donate](https://img.shields.io/badge/Support_Me-Donate-9626ff?style=for-the-badge&logo=https%3A%2F%2Fimgur.com%2FvwC39JY)](https://pay.ziina.com/MubarakHAlketbi)

### 1. Introduction of LLM4Decompile

- **Github Repository:** [LLM4Decompile](https://github.com/albertan017/LLM4Decompile)

### 2. Evaluation Results

|         Metrics         | Re-executability Rate |         |         |         |         | Edit Similarity |         |         |         |         |
|:-----------------------:|:---------------------:|:-------:|:-------:|:-------:|:-------:|:---------------:|:-------:|:-------:|:-------:|:-------:|
|    Optimization Level   |           O0          |    O1   |    O2   |    O3   |   AVG   |        O0       |    O1   |    O2   |    O3   |   AVG   |
|  LLM4Decompile-End-6.7B |        0.6805         | 0.3951  | 0.3671  | 0.3720  | 0.4537  |     0.1557      | 0.1292  | 0.1293  | 0.1269  | 0.1353  |
|          Ghidra         |        0.3476         | 0.1646  | 0.1524  | 0.1402  | 0.2012  |     0.0699      | 0.0613  | 0.0619  | 0.0547  | 0.0620  |
|         +GPT-4o         |        0.4695         | 0.3415  | 0.2866  | 0.3110  | 0.3522  |     0.0660      | 0.0563  | 0.0567  | 0.0499  | 0.0572  |
| +LLM4Decompile-Ref-1.3B |        0.6890         | 0.3720  | 0.4085  | 0.3720  | 0.4604  |     0.1517      | 0.1325  | 0.1292  | 0.1267  | 0.1350  |
| +LLM4Decompile-Ref-6.7B |        0.7439         | 0.4695  | 0.4756  | 0.4207  | 0.5274  |     0.1559      | 0.1353  | 0.1342  | 0.1273  | 0.1382  |
|  +LLM4Decompile-Ref-33B |        0.7073         | 0.4756  | 0.4390  | 0.4146  | 0.5091  |     0.1540      | 0.1379  | 0.1363  | 0.1307  | 0.1397  |

### 3. How to Use

1.  **Install Ollama:** Follow the instructions on the Ollama website.
2.  **Pull a model:**  `ollama pull MHKetbi/llm4decompile-22b-v2`.
3.  **Install the Ollama Python library:** `pip install ollama`
4.  **Save the script:** Save the code as a Python file (e.g., `decompiler.py`).
5.  **Create an input file:** Create a file named `my_assembly_file_O0.pseudo` (replace `my_assembly_file` with your desired filename) containing the assembly code of the function you want to decompile.
6.  **Run the script:** `python decompiler.py my_assembly_file`

```
import ollama
import sys

MODEL_NAME = 'MHKetbi/llm4decompile-22b-v2'

def decompile_with_ollama(asm_func, max_tokens=2048):
    """
    Decompiles an assembly function using Ollama.

Args:
        asm_func: The assembly function as a string.
        max_tokens: The maximum number of tokens to generate.

Returns:
        The decompiled C code as a string, or None if an error occurred.
    """
    try:
        # Construct a prompt for the Ollama model.  This is *crucial* for good results.
        # We're using a system prompt and a user prompt.  The system prompt sets the context.
        # The user prompt provides the input and asks for the output.
        messages = [
            {
                'role': 'system',
                'content': 'You are a helpful assistant that decompiles assembly code to C code.  '
                           'Provide only the C code, without any extra explanation or comments. '
                           'Do not include markdown formatting. Do not wrap the output in ```.'
            },
            {
                'role': 'user',
                'content': f'Decompile the following assembly code to C:\n\n{asm_func}'
            }
        ]

response = ollama.chat(model=MODEL_NAME, messages=messages, stream=False)

# Check if the response is valid and contains the decompiled code.
        if response and response.get('message') and response['message'].get('content'):
            c_func_decompile = response['message']['content'].strip()
            return c_func_decompile
        else:
            print("Error: Ollama did not return a valid response.", file=sys.stderr)
            return None

except ollama.ResponseError as e:
        print(f"Error during Ollama request: {e}", file=sys.stderr)
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        return None

def decompile_with_ollama_streaming(asm_func, max_tokens=2048):
    """
    Decompiles an assembly function using Ollama with streaming.

Args:
        asm_func: The assembly function as a string.
        max_tokens:  (Not directly used in streaming, but kept for consistency).

Returns:
        The decompiled C code as a string, or None if an error occurred.
    """
    try:
        messages = [
            {
                'role': 'system',
                'content': 'You are a helpful assistant that decompiles assembly code to C code.  '
                           'Provide only the C code, without any extra explanation or comments. '
                           'Do not include markdown formatting. Do not wrap the output in ```.'
            },
            {
                'role': 'user',
                'content': f'Decompile the following assembly code to C:\n\n{asm_func}'
            }
        ]

stream = ollama.chat(model=MODEL_NAME, messages=messages, stream=True)

c_func_decompile = ""
        for chunk in stream:
            if chunk and chunk.get('message') and chunk['message'].get('content'):
                c_func_decompile += chunk['message']['content']

return c_func_decompile.strip()

def main():
    if len(sys.argv) != 2:
        print("Usage: python script.py <filename>")
        sys.exit(1)

file_name = sys.argv[1]
    opt = ['O0']  #  Keep this for consistency with the original script's file naming.

try:
        with open(f'{file_name}_{opt[0]}.pseudo', 'r') as f:
            asm_func = f.read()
    except FileNotFoundError:
        print(f"Error: File '{file_name}_{opt[0]}.pseudo' not found.", file=sys.stderr)
        sys.exit(1)

# Choose either streaming or non-streaming version
    # c_func_decompile = decompile_with_ollama(asm_func)
    c_func_decompile = decompile_with_ollama_streaming(asm_func)

if c_func_decompile:
        try:
            with open(f'{file_name}_{opt[0]}.pseudo', 'r') as f:  # original file
                func = f.read()

print(f'pseudo function:\n{func}')
            print(f'refined function:\n{c_func_decompile}')

except FileNotFoundError:
            print(f"Error: Could not read original pseudo file (for display only).", file=sys.stderr)
            print(f'refined function:\n{c_func_decompile}') # Still print the decompiled output

else:
        print("Decompilation failed.", file=sys.stderr)

if __name__ == "__main__":
    main()
```

### 4. License
This code repository is licensed under the MIT License.

### 5. Contact

If you have any questions, please raise an issue.

Paste, drop or click to upload images (.png, .jpeg, .jpg, .svg, .gif)