diff mbox series

[RFC,v1,09/43] helper-to-tcg: Introduce get-llvm-ir.py

Message ID 20241121014947.18666-10-anjo@rev.ng (mailing list archive)
State New
Headers show
Series Introduce helper-to-tcg | expand

Commit Message

Anton Johansson Nov. 21, 2024, 1:49 a.m. UTC
Introduces a new python helper script to convert a set of QEMU .c files to
LLVM IR .ll using clang.  Compile flags are found by looking at
compile_commands.json, and llvm-link is used to link together all LLVM
modules into a single module.

Signed-off-by: Anton Johansson <anjo@rev.ng>
---
 subprojects/helper-to-tcg/get-llvm-ir.py | 143 +++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100755 subprojects/helper-to-tcg/get-llvm-ir.py
diff mbox series

Patch

diff --git a/subprojects/helper-to-tcg/get-llvm-ir.py b/subprojects/helper-to-tcg/get-llvm-ir.py
new file mode 100755
index 0000000000..9ee5d0e136
--- /dev/null
+++ b/subprojects/helper-to-tcg/get-llvm-ir.py
@@ -0,0 +1,143 @@ 
+#!/usr/bin/env python3
+
+##
+##  Copyright(c) 2024 rev.ng Labs Srl. All Rights Reserved.
+##
+##  This program is free software; you can redistribute it and/or modify
+##  it under the terms of the GNU General Public License as published by
+##  the Free Software Foundation; either version 2 of the License, or
+##  (at your option) any later version.
+##
+##  This program is distributed in the hope that it will be useful,
+##  but WITHOUT ANY WARRANTY; without even the implied warranty of
+##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##  GNU General Public License for more details.
+##
+##  You should have received a copy of the GNU General Public License
+##  along with this program; if not, see <http://www.gnu.org/licenses/>.
+##
+
+import argparse
+import json
+import os
+import shlex
+import sys
+import subprocess
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+
+
+def run_command(command):
+    proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    out = proc.communicate()
+    if proc.wait() != 0:
+        log(f"Command: {' '.join(command)} exited with {proc.returncode}\n")
+        log(f"output:\n{out}\n")
+
+
+def find_compile_commands(compile_commands_path, clang_path, input_path, target):
+    with open(compile_commands_path, "r") as f:
+        compile_commands = json.load(f)
+        for compile_command in compile_commands:
+            path = compile_command["file"]
+            if os.path.basename(path) != os.path.basename(input_path):
+                continue
+
+            os.chdir(compile_command["directory"])
+            command = compile_command["command"]
+
+            # If building multiple targets there's a chance
+            # input files share the same path and name.
+            # This could cause us to find the wrong compile
+            # command, we use the target path to distinguish
+            # between these.
+            if not target in command:
+                continue
+
+            argv = shlex.split(command)
+            argv[0] = clang_path
+
+            return argv
+
+    raise ValueError(f"Unable to find compile command for {input_path}")
+
+
+def generate_llvm_ir(
+    compile_commands_path, clang_path, output_path, input_path, target
+):
+    command = find_compile_commands(
+        compile_commands_path, clang_path, input_path, target
+    )
+
+    flags_to_remove = {
+        "-ftrivial-auto-var-init=zero",
+        "-fzero-call-used-regs=used-gpr",
+        "-Wimplicit-fallthrough=2",
+        "-Wold-style-declaration",
+        "-Wno-psabi",
+        "-Wshadow=local",
+    }
+
+    # Remove
+    #   - output of makefile rules (-MQ,-MF target);
+    #   - output of object files (-o target);
+    #   - excessive zero-initialization of block-scope variables
+    #     (-ftrivial-auto-var-init=zero);
+    #   - and any optimization flags (-O).
+    for i, arg in reversed(list(enumerate(command))):
+        if arg in {"-MQ", "-o", "-MF"}:
+            del command[i : i + 2]
+        elif arg.startswith("-O") or arg in flags_to_remove:
+            del command[i]
+
+    # Define a HELPER_TO_TCG macro for translation units wanting to
+    # conditionally include or exclude code during translation to TCG.
+    # Disable optimization (-O0) and make sure clang doesn't emit optnone
+    # attributes (-disable-O0-optnone) which inhibit further optimization.
+    # Optimization will be performed at a later stage in the helper-to-tcg
+    # pipeline.
+    command += [
+        "-S",
+        "-emit-llvm",
+        "-DHELPER_TO_TCG",
+        "-O0",
+        "-Xclang",
+        "-disable-O0-optnone",
+    ]
+    if output_path:
+        command += ["-o", output_path]
+
+    run_command(command)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Produce the LLVM IR of a given .c file."
+    )
+    parser.add_argument(
+        "--compile-commands", required=True, help="Path to compile_commands.json"
+    )
+    parser.add_argument("--clang", default="clang", help="Path to clang.")
+    parser.add_argument("--llvm-link", default="llvm-link", help="Path to llvm-link.")
+    parser.add_argument("-o", "--output", required=True, help="Output .ll file path")
+    parser.add_argument(
+        "--target-path", help="Path to QEMU target dir. (e.q. target/i386)"
+    )
+    parser.add_argument("inputs", nargs="+", help=".c file inputs")
+    args = parser.parse_args()
+
+    outputs = []
+    for input in args.inputs:
+        output = os.path.basename(input) + ".ll"
+        generate_llvm_ir(
+            args.compile_commands, args.clang, output, input, args.target_path
+        )
+        outputs.append(output)
+
+    run_command([args.llvm_link] + outputs + ["-S", "-o", args.output])
+
+
+if __name__ == "__main__":
+    sys.exit(main())