1 | //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a base class for a pass to serialize a gpu module |
10 | // into a binary blob that can be executed on a GPU. The binary blob is added |
11 | // as a string attribute to the gpu module. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
16 | #include "mlir/Dialect/GPU/Transforms/Passes.h" |
17 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
18 | #include "mlir/ExecutionEngine/OptUtils.h" |
19 | #include "mlir/Pass/Pass.h" |
20 | #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" |
21 | #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" |
22 | #include "mlir/Target/LLVMIR/Export.h" |
23 | #include "llvm/IR/LegacyPassManager.h" |
24 | #include "llvm/MC/TargetRegistry.h" |
25 | #include "llvm/Support/TargetSelect.h" |
26 | #include "llvm/Target/TargetMachine.h" |
27 | |
28 | #include <optional> |
29 | #include <string> |
30 | |
31 | #define DEBUG_TYPE "serialize-to-blob" |
32 | |
33 | using namespace mlir; |
34 | |
35 | std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary" ; } |
36 | |
37 | gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) |
38 | : OperationPass<gpu::GPUModuleOp>(passID) {} |
39 | |
40 | gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) |
41 | : OperationPass<gpu::GPUModuleOp>(other) {} |
42 | |
43 | std::optional<std::string> |
44 | gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule, |
45 | llvm::TargetMachine &targetMachine) { |
46 | llvmModule.setDataLayout(targetMachine.createDataLayout()); |
47 | |
48 | if (failed(result: optimizeLlvm(llvmModule, targetMachine))) |
49 | return std::nullopt; |
50 | |
51 | std::string targetISA; |
52 | llvm::raw_string_ostream stream(targetISA); |
53 | |
54 | { // Drop pstream after this to prevent the ISA from being stuck buffering |
55 | llvm::buffer_ostream pstream(stream); |
56 | llvm::legacy::PassManager codegenPasses; |
57 | |
58 | if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, |
59 | llvm::CodeGenFileType::AssemblyFile)) |
60 | return std::nullopt; |
61 | |
62 | codegenPasses.run(M&: llvmModule); |
63 | } |
64 | return stream.str(); |
65 | } |
66 | |
67 | void gpu::SerializeToBlobPass::runOnOperation() { |
68 | // Lower the module to an LLVM IR module using a separate context to enable |
69 | // multi-threaded processing. |
70 | llvm::LLVMContext llvmContext; |
71 | std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext); |
72 | if (!llvmModule) |
73 | return signalPassFailure(); |
74 | |
75 | // Lower the LLVM IR module to target ISA. |
76 | std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine(); |
77 | if (!targetMachine) |
78 | return signalPassFailure(); |
79 | |
80 | std::optional<std::string> maybeTargetISA = |
81 | translateToISA(llvmModule&: *llvmModule, targetMachine&: *targetMachine); |
82 | |
83 | if (!maybeTargetISA.has_value()) |
84 | return signalPassFailure(); |
85 | |
86 | std::string targetISA = std::move(*maybeTargetISA); |
87 | |
88 | LLVM_DEBUG({ |
89 | llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" ; |
90 | llvm::dbgs() << targetISA << "\n" ; |
91 | llvm::dbgs().flush(); |
92 | }); |
93 | |
94 | // Serialize the target ISA. |
95 | std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA); |
96 | if (!blob) |
97 | return signalPassFailure(); |
98 | |
99 | // Add the blob as module attribute. |
100 | auto attr = |
101 | StringAttr::get(&getContext(), StringRef(blob->data(), blob->size())); |
102 | getOperation()->setAttr(gpuBinaryAnnotation, attr); |
103 | } |
104 | |
105 | LogicalResult |
106 | gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, |
107 | llvm::TargetMachine &targetMachine) { |
108 | int optLevel = this->optLevel.getValue(); |
109 | if (optLevel < 0 || optLevel > 3) |
110 | return getOperation().emitError() |
111 | << "invalid optimization level " << optLevel; |
112 | |
113 | targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel)); |
114 | |
115 | auto transformer = |
116 | makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, targetMachine: &targetMachine); |
117 | auto error = transformer(&llvmModule); |
118 | if (error) { |
119 | InFlightDiagnostic mlirError = getOperation()->emitError(); |
120 | llvm::handleAllErrors( |
121 | std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { |
122 | mlirError << "could not optimize LLVM IR: " << ei.message(); |
123 | }); |
124 | return mlirError; |
125 | } |
126 | return success(); |
127 | } |
128 | |
129 | std::unique_ptr<llvm::TargetMachine> |
130 | gpu::SerializeToBlobPass::createTargetMachine() { |
131 | Location loc = getOperation().getLoc(); |
132 | std::string error; |
133 | const llvm::Target *target = |
134 | llvm::TargetRegistry::lookupTarget(triple, error); |
135 | if (!target) { |
136 | emitError(loc, message: Twine("failed to lookup target: " ) + error); |
137 | return {}; |
138 | } |
139 | llvm::TargetMachine *machine = |
140 | target->createTargetMachine(triple, chip, features, {}, {}); |
141 | if (!machine) { |
142 | emitError(loc, message: "failed to create target machine" ); |
143 | return {}; |
144 | } |
145 | |
146 | return std::unique_ptr<llvm::TargetMachine>{machine}; |
147 | } |
148 | |
149 | std::unique_ptr<llvm::Module> |
150 | gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { |
151 | return translateModuleToLLVMIR(getOperation(), llvmContext, |
152 | "LLVMDialectModule" ); |
153 | } |
154 | |