1//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a base class for a pass to serialize a gpu module
10// into a binary blob that can be executed on a GPU. The binary blob is added
11// as a string attribute to the gpu module.
12//
13//===----------------------------------------------------------------------===//
14
15#include "mlir/Dialect/GPU/IR/GPUDialect.h"
16#include "mlir/Dialect/GPU/Transforms/Passes.h"
17#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
18#include "mlir/ExecutionEngine/OptUtils.h"
19#include "mlir/Pass/Pass.h"
20#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
21#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
22#include "mlir/Target/LLVMIR/Export.h"
23#include "llvm/IR/LegacyPassManager.h"
24#include "llvm/MC/TargetRegistry.h"
25#include "llvm/Support/TargetSelect.h"
26#include "llvm/Target/TargetMachine.h"
27
28#include <optional>
29#include <string>
30
31#define DEBUG_TYPE "serialize-to-blob"
32
33using namespace mlir;
34
35std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
36
37gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
38 : OperationPass<gpu::GPUModuleOp>(passID) {}
39
40gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
41 : OperationPass<gpu::GPUModuleOp>(other) {}
42
43std::optional<std::string>
44gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
45 llvm::TargetMachine &targetMachine) {
46 llvmModule.setDataLayout(targetMachine.createDataLayout());
47
48 if (failed(result: optimizeLlvm(llvmModule, targetMachine)))
49 return std::nullopt;
50
51 std::string targetISA;
52 llvm::raw_string_ostream stream(targetISA);
53
54 { // Drop pstream after this to prevent the ISA from being stuck buffering
55 llvm::buffer_ostream pstream(stream);
56 llvm::legacy::PassManager codegenPasses;
57
58 if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
59 llvm::CodeGenFileType::AssemblyFile))
60 return std::nullopt;
61
62 codegenPasses.run(M&: llvmModule);
63 }
64 return stream.str();
65}
66
67void gpu::SerializeToBlobPass::runOnOperation() {
68 // Lower the module to an LLVM IR module using a separate context to enable
69 // multi-threaded processing.
70 llvm::LLVMContext llvmContext;
71 std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
72 if (!llvmModule)
73 return signalPassFailure();
74
75 // Lower the LLVM IR module to target ISA.
76 std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
77 if (!targetMachine)
78 return signalPassFailure();
79
80 std::optional<std::string> maybeTargetISA =
81 translateToISA(llvmModule&: *llvmModule, targetMachine&: *targetMachine);
82
83 if (!maybeTargetISA.has_value())
84 return signalPassFailure();
85
86 std::string targetISA = std::move(*maybeTargetISA);
87
88 LLVM_DEBUG({
89 llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
90 llvm::dbgs() << targetISA << "\n";
91 llvm::dbgs().flush();
92 });
93
94 // Serialize the target ISA.
95 std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
96 if (!blob)
97 return signalPassFailure();
98
99 // Add the blob as module attribute.
100 auto attr =
101 StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
102 getOperation()->setAttr(gpuBinaryAnnotation, attr);
103}
104
105LogicalResult
106gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
107 llvm::TargetMachine &targetMachine) {
108 int optLevel = this->optLevel.getValue();
109 if (optLevel < 0 || optLevel > 3)
110 return getOperation().emitError()
111 << "invalid optimization level " << optLevel;
112
113 targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));
114
115 auto transformer =
116 makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, targetMachine: &targetMachine);
117 auto error = transformer(&llvmModule);
118 if (error) {
119 InFlightDiagnostic mlirError = getOperation()->emitError();
120 llvm::handleAllErrors(
121 std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
122 mlirError << "could not optimize LLVM IR: " << ei.message();
123 });
124 return mlirError;
125 }
126 return success();
127}
128
129std::unique_ptr<llvm::TargetMachine>
130gpu::SerializeToBlobPass::createTargetMachine() {
131 Location loc = getOperation().getLoc();
132 std::string error;
133 const llvm::Target *target =
134 llvm::TargetRegistry::lookupTarget(triple, error);
135 if (!target) {
136 emitError(loc, message: Twine("failed to lookup target: ") + error);
137 return {};
138 }
139 llvm::TargetMachine *machine =
140 target->createTargetMachine(triple, chip, features, {}, {});
141 if (!machine) {
142 emitError(loc, message: "failed to create target machine");
143 return {};
144 }
145
146 return std::unique_ptr<llvm::TargetMachine>{machine};
147}
148
149std::unique_ptr<llvm::Module>
150gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
151 return translateModuleToLLVMIR(getOperation(), llvmContext,
152 "LLVMDialectModule");
153}
154

source code of mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp