llvm · adam-smnk · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/lighthouse/ingress/torch/compile.py b/lighthouse/ingress/torch/compile.py
@@ -3,6 +3,7 @@
 import contextlib
 from dataclasses import dataclass
 from enum import Enum
+import sys
 
 from lighthouse import utils as lh_utils
 from lighthouse.ingress.torch import import_from_model
@@ -58,6 +59,12 @@ class JITFunction:
         shared_libs: Paths to external runtime libraries used to execute
             compiled MLIR function.
         entry_func: Name of the entry function.
+        n_outputs: Number of last N outputs to return.
+            Used to skip extra torch-mlir prepended results that might not
+            be necessary.
+        dump_obj: Output compiled object file.
+            Optionally, a file name can be provided. Otherwise, a temporary
+            file is created.
     """
 
     def __init__(
@@ -67,10 +74,17 @@ def __init__(
         shared_libs: Sequence[str] = [],
         entry_func: str = "main",
         n_outputs: int | None = None,
+        dump_obj: str | bool = False,
     ):
         self.runner = Runner(
             module, mem_manager_cls=TorchMemoryManager, shared_libs=shared_libs
         )
+        if dump_obj:
+            file_name = ""
+            if isinstance(dump_obj, str):
+                file_name = dump_obj
+            file_name = self.runner.dump_object_file(file_name)
+            print(f"MLIR object file: {file_name}", file=sys.stderr)
         self.entry_func = entry_func
         self.results = results
         self.n_outputs = n_outputs if n_outputs is not None else len(results)
@@ -131,6 +145,9 @@ class MLIRBackend:
         shared_libs: Paths to external runtime libraries used to execute
             compiled MLIR function.
         entry_func: Name of the entry function.
+        dump_obj: Output compiled object file.
+            Optionally, a file name can be provided. Otherwise, a temporary
+            file is created.
     """
 
     def __init__(
@@ -141,13 +158,15 @@ def __init__(
         ir_context: ir.Context | None = None,
         shared_libs: Sequence[str] = [],
         entry_func: str = "main",
+        dump_obj: str | bool = False,
     ):
         self.device = device
         self.fn_compile = fn_compile
         self.dialect = dialect
         self.ctx = ir_context if ir_context is not None else ir.Context()
         self.shared_libs = list(shared_libs)
         self.entry_func = entry_func
+        self.dump_obj = dump_obj
 
     def get_entry_func(self, module: ir.Module) -> func.FuncOp | None:
         """
@@ -338,6 +357,7 @@ def __call__(
             shared_libs=self.shared_libs,
             entry_func=self.entry_func,
             n_outputs=n_fx_outputs,
+            dump_obj=self.dump_obj,
         )
 
 
@@ -347,6 +367,7 @@ def cpu_backend(
     ir_context: ir.Context | None = None,
     shared_libs: Sequence[str] = [],
     entry_func: str = "main",
+    dump_obj: str | bool = False,
 ) -> Callable[[torch.fx.GraphModule, list[torch.Tensor]], Callable]:
     """
     CPU backend for JIT-compiling a PyTorch model using MLIR.
@@ -360,6 +381,9 @@ def cpu_backend(
         shared_libs: Paths to external runtime libraries used to execute
             compiled MLIR function.
         entry_func: Name of the entry function.
+        dump_obj: Output compiled object file.
+            Optionally, a file name can be provided. Otherwise, a temporary
+            file is created.
 
     Returns:
         A torch.compile backend object.
@@ -371,6 +395,7 @@ def cpu_backend(
         ir_context=ir_context,
         shared_libs=shared_libs,
         entry_func=entry_func,
+        dump_obj=dump_obj,
     )
 
 
@@ -381,6 +406,7 @@ def gpu_backend(
     ir_context: ir.Context | None = None,
     shared_libs: Sequence[str] = [],
     entry_func: str = "main",
+    dump_obj: str | bool = False,
 ) -> Callable[[torch.fx.GraphModule, list[torch.Tensor]], Callable]:
     """
     GPU backend for JIT-compiling a PyTorch model using MLIR.
@@ -395,6 +421,9 @@ def gpu_backend(
         shared_libs: Paths to external runtime libraries used to execute
             compiled MLIR function.
         entry_func: Name of the entry function.
+        dump_obj: Output compiled object file.
+            Optionally, a file name can be provided. Otherwise, a temporary
+            file is created.
 
     Returns:
         A torch.compile backend object.
@@ -408,4 +437,5 @@ def gpu_backend(
         ir_context=ir_context,
         shared_libs=shared_libs,
         entry_func=entry_func,
+        dump_obj=dump_obj,
     )
diff --git a/tools/kernel-bench b/tools/kernel-bench
@@ -8,6 +8,7 @@ import ml_dtypes
 import numpy as np
 from pathlib import Path
 import torch
+import tempfile
 
 from mlir import ir
 from lighthouse.execution.runner import Runner
@@ -201,6 +202,11 @@ def torch_compile(args, model: torch.nn.Module, sample_tensors: list):
             print(module)
         return module
 
+    obj_file = ""
+    if args.dump_assembly:
+        with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as tmp:
+            obj_file = tmp.name
+
     # TODO: Implement benchmarking
     if args.benchmark:
         raise NotImplementedError(
@@ -210,10 +216,15 @@ def torch_compile(args, model: torch.nn.Module, sample_tensors: list):
         # Reconfigure the model to be compiled using torch.compile, take the compiled output.
         model.compile(
             dynamic=False,
-            backend=cpu_backend(compiler_pipeline, shared_libs=[c_runner_lib]),
+            backend=cpu_backend(
+                compiler_pipeline, shared_libs=[c_runner_lib], dump_obj=obj_file
+            ),
         )
         out = model(*sample_tensors, **sample_kwargs)
 
+        if args.dump_assembly and obj_file:
+            dump_assembly(args, obj_file)
+
     return out