1 | //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This files defines ROCDL target related functions including registration |
10 | // calls for the `#rocdl.target` compilation attribute. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "mlir/Target/LLVM/ROCDL/Target.h" |
15 | |
16 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
17 | #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" |
18 | #include "mlir/Support/FileUtilities.h" |
19 | #include "mlir/Target/LLVM/ROCDL/Utils.h" |
20 | #include "mlir/Target/LLVMIR/Export.h" |
21 | |
22 | #include "llvm/IR/Constants.h" |
23 | #include "llvm/MC/MCAsmBackend.h" |
24 | #include "llvm/MC/MCAsmInfo.h" |
25 | #include "llvm/MC/MCCodeEmitter.h" |
26 | #include "llvm/MC/MCContext.h" |
27 | #include "llvm/MC/MCInstrInfo.h" |
28 | #include "llvm/MC/MCObjectFileInfo.h" |
29 | #include "llvm/MC/MCObjectWriter.h" |
30 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
31 | #include "llvm/MC/MCRegisterInfo.h" |
32 | #include "llvm/MC/MCStreamer.h" |
33 | #include "llvm/MC/MCSubtargetInfo.h" |
34 | #include "llvm/MC/TargetRegistry.h" |
35 | #include "llvm/Support/FileSystem.h" |
36 | #include "llvm/Support/FileUtilities.h" |
37 | #include "llvm/Support/Path.h" |
38 | #include "llvm/Support/Program.h" |
39 | #include "llvm/Support/SourceMgr.h" |
40 | #include "llvm/Support/TargetSelect.h" |
41 | #include "llvm/TargetParser/TargetParser.h" |
42 | |
43 | #include <cstdlib> |
44 | #include <optional> |
45 | |
46 | using namespace mlir; |
47 | using namespace mlir::ROCDL; |
48 | |
49 | #ifndef __DEFAULT_ROCM_PATH__ |
50 | #define __DEFAULT_ROCM_PATH__ "" |
51 | #endif |
52 | |
53 | namespace { |
54 | // Implementation of the `TargetAttrInterface` model. |
55 | class ROCDLTargetAttrImpl |
56 | : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> { |
57 | public: |
58 | std::optional<SmallVector<char, 0>> |
59 | serializeToObject(Attribute attribute, Operation *module, |
60 | const gpu::TargetOptions &options) const; |
61 | |
62 | Attribute createObject(Attribute attribute, Operation *module, |
63 | const SmallVector<char, 0> &object, |
64 | const gpu::TargetOptions &options) const; |
65 | }; |
66 | } // namespace |
67 | |
68 | // Register the ROCDL dialect, the ROCDL translation and the target interface. |
69 | void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( |
70 | DialectRegistry ®istry) { |
71 | registry.addExtension(extensionFn: +[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { |
72 | ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx); |
73 | }); |
74 | } |
75 | |
76 | void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( |
77 | MLIRContext &context) { |
78 | DialectRegistry registry; |
79 | registerROCDLTargetInterfaceExternalModels(registry); |
80 | context.appendDialectRegistry(registry); |
81 | } |
82 | |
83 | // Search for the ROCM path. |
84 | StringRef mlir::ROCDL::getROCMPath() { |
85 | if (const char *var = std::getenv(name: "ROCM_PATH")) |
86 | return var; |
87 | if (const char *var = std::getenv(name: "ROCM_ROOT")) |
88 | return var; |
89 | if (const char *var = std::getenv(name: "ROCM_HOME")) |
90 | return var; |
91 | return __DEFAULT_ROCM_PATH__; |
92 | } |
93 | |
94 | SerializeGPUModuleBase::SerializeGPUModuleBase( |
95 | Operation &module, ROCDLTargetAttr target, |
96 | const gpu::TargetOptions &targetOptions) |
97 | : ModuleToObject(module, target.getTriple(), target.getChip(), |
98 | target.getFeatures(), target.getO()), |
99 | target(target), toolkitPath(targetOptions.getToolkitPath()), |
100 | librariesToLink(targetOptions.getLibrariesToLink()) { |
101 | |
102 | // If `targetOptions` has an empty toolkitPath use `getROCMPath` |
103 | if (toolkitPath.empty()) |
104 | toolkitPath = getROCMPath(); |
105 | |
106 | // Append the files in the target attribute. |
107 | if (target.getLink()) |
108 | librariesToLink.append(target.getLink().begin(), target.getLink().end()); |
109 | } |
110 | |
111 | void SerializeGPUModuleBase::init() { |
112 | static llvm::once_flag initializeBackendOnce; |
113 | llvm::call_once(flag&: initializeBackendOnce, F: []() { |
114 | // If the `AMDGPU` LLVM target was built, initialize it. |
115 | #if MLIR_ENABLE_ROCM_CONVERSIONS |
116 | LLVMInitializeAMDGPUTarget(); |
117 | LLVMInitializeAMDGPUTargetInfo(); |
118 | LLVMInitializeAMDGPUTargetMC(); |
119 | LLVMInitializeAMDGPUAsmParser(); |
120 | LLVMInitializeAMDGPUAsmPrinter(); |
121 | #endif |
122 | }); |
123 | } |
124 | |
125 | ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } |
126 | |
127 | StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } |
128 | |
129 | ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const { |
130 | return librariesToLink; |
131 | } |
132 | |
133 | LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { |
134 | if (libs == AMDGCNLibraries::None) |
135 | return success(); |
136 | StringRef pathRef = getToolkitPath(); |
137 | |
138 | // Get the path for the device libraries |
139 | SmallString<256> path; |
140 | path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end()); |
141 | llvm::sys::path::append(path, a: "amdgcn", b: "bitcode"); |
142 | pathRef = StringRef(path.data(), path.size()); |
143 | |
144 | // Fail if the path is invalid. |
145 | if (!llvm::sys::fs::is_directory(Path: pathRef)) { |
146 | getOperation().emitError() << "ROCm amdgcn bitcode path: "<< pathRef |
147 | << " does not exist or is not a directory"; |
148 | return failure(); |
149 | } |
150 | |
151 | // Helper function for adding a library. |
152 | auto addLib = [&](const Twine &lib) -> bool { |
153 | auto baseSize = path.size(); |
154 | llvm::sys::path::append(path, a: lib); |
155 | StringRef pathRef(path.data(), path.size()); |
156 | if (!llvm::sys::fs::is_regular_file(Path: pathRef)) { |
157 | getOperation().emitRemark() << "bitcode library path: "<< pathRef |
158 | << " does not exist or is not a file"; |
159 | return true; |
160 | } |
161 | librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); |
162 | path.truncate(N: baseSize); |
163 | return false; |
164 | }; |
165 | |
166 | // Add ROCm device libraries. Fail if any of the libraries is not found, ie. |
167 | // if any of the `addLib` failed. |
168 | if ((any(Val: libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || |
169 | (any(Val: libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || |
170 | (any(Val: libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || |
171 | (any(Val: libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc"))) |
172 | return failure(); |
173 | return success(); |
174 | } |
175 | |
176 | std::optional<SmallVector<std::unique_ptr<llvm::Module>>> |
177 | SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { |
178 | // Return if there are no libs to load. |
179 | if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty()) |
180 | return SmallVector<std::unique_ptr<llvm::Module>>(); |
181 | if (failed(Result: appendStandardLibs(libs: deviceLibs))) |
182 | return std::nullopt; |
183 | SmallVector<std::unique_ptr<llvm::Module>> bcFiles; |
184 | if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink, |
185 | bcFiles, true))) |
186 | return std::nullopt; |
187 | return std::move(bcFiles); |
188 | } |
189 | |
190 | LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { |
191 | // Some ROCM builds don't strip this like they should |
192 | if (auto *openclVersion = module.getNamedMetadata(Name: "opencl.ocl.version")) |
193 | module.eraseNamedMetadata(NMD: openclVersion); |
194 | // Stop spamming us with clang version numbers |
195 | if (auto *ident = module.getNamedMetadata(Name: "llvm.ident")) |
196 | module.eraseNamedMetadata(NMD: ident); |
197 | // Override the libModules datalayout and target triple with the compiler's |
198 | // data layout should there be a discrepency. |
199 | setDataLayoutAndTriple(module); |
200 | return success(); |
201 | } |
202 | |
203 | void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { |
204 | // If all libraries are not set, traverse the module to determine which |
205 | // libraries are required. |
206 | if (deviceLibs != AMDGCNLibraries::All) { |
207 | for (llvm::Function &f : module.functions()) { |
208 | if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { |
209 | StringRef funcName = f.getName(); |
210 | if ("printf"== funcName) |
211 | deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | |
212 | AMDGCNLibraries::Ocml; |
213 | if (funcName.starts_with(Prefix: "__ockl_")) |
214 | deviceLibs |= AMDGCNLibraries::Ockl; |
215 | if (funcName.starts_with(Prefix: "__ocml_")) |
216 | deviceLibs |= AMDGCNLibraries::Ocml; |
217 | if (funcName == "__atomic_work_item_fence") |
218 | deviceLibs |= AMDGCNLibraries::Hip; |
219 | } |
220 | } |
221 | } |
222 | addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), |
223 | target.hasFiniteOnly(), target.hasUnsafeMath(), |
224 | target.hasFastMath(), target.hasCorrectSqrt(), |
225 | target.getAbi()); |
226 | } |
227 | |
228 | void SerializeGPUModuleBase::addControlVariables( |
229 | llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, |
230 | bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, |
231 | StringRef abiVer) { |
232 | // Helper function for adding control variables. |
233 | auto addControlVariable = [&module](StringRef name, uint32_t value, |
234 | uint32_t bitwidth) { |
235 | if (module.getNamedGlobal(Name: name)) |
236 | return; |
237 | llvm::IntegerType *type = |
238 | llvm::IntegerType::getIntNTy(C&: module.getContext(), N: bitwidth); |
239 | llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( |
240 | module, /*isConstant=*/type, true, |
241 | llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, |
242 | llvm::ConstantInt::get(Ty: type, V: value), name, /*before=*/nullptr, |
243 | /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, |
244 | /*addressSpace=*/4); |
245 | controlVariable->setVisibility( |
246 | llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); |
247 | controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); |
248 | controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); |
249 | }; |
250 | |
251 | // Note that COV6 requires ROCm 6.3+. |
252 | int abi = 600; |
253 | abiVer.getAsInteger(Radix: 0, Result&: abi); |
254 | module.addModuleFlag(Behavior: llvm::Module::Error, Key: "amdhsa_code_object_version", Val: abi); |
255 | // Return if no device libraries are required. |
256 | if (libs == AMDGCNLibraries::None) |
257 | return; |
258 | // Add ocml related control variables. |
259 | if (any(Val: libs & AMDGCNLibraries::Ocml)) { |
260 | addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); |
261 | addControlVariable("__oclc_daz_opt", daz || fastMath, 8); |
262 | addControlVariable("__oclc_correctly_rounded_sqrt32", |
263 | correctSqrt && !fastMath, 8); |
264 | addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); |
265 | } |
266 | // Add ocml or ockl related control variables. |
267 | if (any(Val: libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { |
268 | addControlVariable("__oclc_wavefrontsize64", wave64, 8); |
269 | // Get the ISA version. |
270 | llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(GPU: chip); |
271 | // Add the ISA control variable. |
272 | addControlVariable("__oclc_ISA_version", |
273 | isaVersion.Minor + 100 * isaVersion.Stepping + |
274 | 1000 * isaVersion.Major, |
275 | 32); |
276 | addControlVariable("__oclc_ABI_version", abi, 32); |
277 | } |
278 | } |
279 | |
280 | std::optional<SmallVector<char, 0>> |
281 | SerializeGPUModuleBase::assembleIsa(StringRef isa) { |
282 | auto loc = getOperation().getLoc(); |
283 | |
284 | StringRef targetTriple = this->triple; |
285 | |
286 | SmallVector<char, 0> result; |
287 | llvm::raw_svector_ostream os(result); |
288 | |
289 | llvm::Triple triple(llvm::Triple::normalize(Str: targetTriple)); |
290 | std::string error; |
291 | const llvm::Target *target = |
292 | llvm::TargetRegistry::lookupTarget(TripleStr: triple.normalize(), Error&: error); |
293 | if (!target) { |
294 | emitError(loc, Twine("failed to lookup target: ") + error); |
295 | return std::nullopt; |
296 | } |
297 | |
298 | llvm::SourceMgr srcMgr; |
299 | srcMgr.AddNewSourceBuffer(F: llvm::MemoryBuffer::getMemBuffer(InputData: isa), IncludeLoc: SMLoc()); |
300 | |
301 | const llvm::MCTargetOptions mcOptions; |
302 | std::unique_ptr<llvm::MCRegisterInfo> mri( |
303 | target->createMCRegInfo(TT: targetTriple)); |
304 | std::unique_ptr<llvm::MCAsmInfo> mai( |
305 | target->createMCAsmInfo(MRI: *mri, TheTriple: targetTriple, Options: mcOptions)); |
306 | std::unique_ptr<llvm::MCSubtargetInfo> sti( |
307 | target->createMCSubtargetInfo(TheTriple: targetTriple, CPU: chip, Features: features)); |
308 | |
309 | llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, |
310 | &mcOptions); |
311 | std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( |
312 | Ctx&: ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); |
313 | ctx.setObjectFileInfo(mofi.get()); |
314 | |
315 | SmallString<128> cwd; |
316 | if (!llvm::sys::fs::current_path(result&: cwd)) |
317 | ctx.setCompilationDir(cwd); |
318 | |
319 | std::unique_ptr<llvm::MCStreamer> mcStreamer; |
320 | std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); |
321 | |
322 | llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(II: *mcii, Ctx&: ctx); |
323 | llvm::MCAsmBackend *mab = target->createMCAsmBackend(STI: *sti, MRI: *mri, Options: mcOptions); |
324 | mcStreamer.reset(p: target->createMCObjectStreamer( |
325 | T: triple, Ctx&: ctx, TAB: std::unique_ptr<llvm::MCAsmBackend>(mab), |
326 | OW: mab->createObjectWriter(OS&: os), Emitter: std::unique_ptr<llvm::MCCodeEmitter>(ce), |
327 | STI: *sti)); |
328 | |
329 | std::unique_ptr<llvm::MCAsmParser> parser( |
330 | createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); |
331 | std::unique_ptr<llvm::MCTargetAsmParser> tap( |
332 | target->createMCAsmParser(STI: *sti, Parser&: *parser, MII: *mcii, Options: mcOptions)); |
333 | |
334 | if (!tap) { |
335 | emitError(loc, "assembler initialization error"); |
336 | return std::nullopt; |
337 | } |
338 | |
339 | parser->setTargetParser(*tap); |
340 | parser->Run(NoInitialTextSection: false); |
341 | return std::move(result); |
342 | } |
343 | |
344 | std::optional<SmallVector<char, 0>> |
345 | SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { |
346 | // Assemble the ISA. |
347 | std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(isa: serializedISA); |
348 | |
349 | if (!isaBinary) { |
350 | getOperation().emitError() << "failed during ISA assembling"; |
351 | return std::nullopt; |
352 | } |
353 | |
354 | // Save the ISA binary to a temp file. |
355 | int tempIsaBinaryFd = -1; |
356 | SmallString<128> tempIsaBinaryFilename; |
357 | if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel%%", Suffix: "o", ResultFD&: tempIsaBinaryFd, |
358 | ResultPath&: tempIsaBinaryFilename)) { |
359 | getOperation().emitError() |
360 | << "failed to create a temporary file for dumping the ISA binary"; |
361 | return std::nullopt; |
362 | } |
363 | llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); |
364 | { |
365 | llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); |
366 | tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size()); |
367 | tempIsaBinaryOs.flush(); |
368 | } |
369 | |
370 | // Create a temp file for HSA code object. |
371 | SmallString<128> tempHsacoFilename; |
372 | if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel", Suffix: "hsaco", |
373 | ResultPath&: tempHsacoFilename)) { |
374 | getOperation().emitError() |
375 | << "failed to create a temporary file for the HSA code object"; |
376 | return std::nullopt; |
377 | } |
378 | llvm::FileRemover cleanupHsaco(tempHsacoFilename); |
379 | |
380 | llvm::SmallString<128> lldPath(toolkitPath); |
381 | llvm::sys::path::append(path&: lldPath, a: "llvm", b: "bin", c: "ld.lld"); |
382 | int lldResult = llvm::sys::ExecuteAndWait( |
383 | Program: lldPath, |
384 | Args: {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); |
385 | if (lldResult != 0) { |
386 | getOperation().emitError() << "lld invocation failed"; |
387 | return std::nullopt; |
388 | } |
389 | |
390 | // Load the HSA code object. |
391 | auto hsacoFile = |
392 | llvm::MemoryBuffer::getFile(Filename: tempHsacoFilename, /*IsText=*/false); |
393 | if (!hsacoFile) { |
394 | getOperation().emitError() |
395 | << "failed to read the HSA code object from the temp file"; |
396 | return std::nullopt; |
397 | } |
398 | |
399 | StringRef buffer = (*hsacoFile)->getBuffer(); |
400 | |
401 | return SmallVector<char, 0>(buffer.begin(), buffer.end()); |
402 | } |
403 | |
404 | std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl( |
405 | const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { |
406 | // Return LLVM IR if the compilation target is offload. |
407 | #define DEBUG_TYPE "serialize-to-llvm" |
408 | LLVM_DEBUG({ |
409 | llvm::dbgs() << "LLVM IR for module: " |
410 | << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" |
411 | << llvmModule << "\n"; |
412 | }); |
413 | #undef DEBUG_TYPE |
414 | if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) |
415 | return SerializeGPUModuleBase::moduleToObject(llvmModule); |
416 | |
417 | std::optional<llvm::TargetMachine *> targetMachine = |
418 | getOrCreateTargetMachine(); |
419 | if (!targetMachine) { |
420 | getOperation().emitError() << "target Machine unavailable for triple " |
421 | << triple << ", can't compile with LLVM"; |
422 | return std::nullopt; |
423 | } |
424 | |
425 | // Translate the Module to ISA. |
426 | std::optional<std::string> serializedISA = |
427 | translateToISA(llvmModule, targetMachine&: **targetMachine); |
428 | if (!serializedISA) { |
429 | getOperation().emitError() << "failed translating the module to ISA"; |
430 | return std::nullopt; |
431 | } |
432 | #define DEBUG_TYPE "serialize-to-isa" |
433 | LLVM_DEBUG({ |
434 | llvm::dbgs() << "ISA for module: " |
435 | << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" |
436 | << *serializedISA << "\n"; |
437 | }); |
438 | #undef DEBUG_TYPE |
439 | // Return ISA assembly code if the compilation target is assembly. |
440 | if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) |
441 | return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end()); |
442 | |
443 | // Compiling to binary requires a valid ROCm path, fail if it's not found. |
444 | if (getToolkitPath().empty()) { |
445 | getOperation().emitError() << "invalid ROCm path, please set a valid path"; |
446 | return std::nullopt; |
447 | } |
448 | |
449 | // Compile to binary. |
450 | return compileToBinary(serializedISA: *serializedISA); |
451 | } |
452 | |
453 | #if MLIR_ENABLE_ROCM_CONVERSIONS |
454 | namespace { |
455 | class AMDGPUSerializer : public SerializeGPUModuleBase { |
456 | public: |
457 | AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, |
458 | const gpu::TargetOptions &targetOptions); |
459 | |
460 | std::optional<SmallVector<char, 0>> |
461 | moduleToObject(llvm::Module &llvmModule) override; |
462 | |
463 | private: |
464 | // Target options. |
465 | gpu::TargetOptions targetOptions; |
466 | }; |
467 | } // namespace |
468 | |
469 | AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, |
470 | const gpu::TargetOptions &targetOptions) |
471 | : SerializeGPUModuleBase(module, target, targetOptions), |
472 | targetOptions(targetOptions) {} |
473 | |
474 | std::optional<SmallVector<char, 0>> |
475 | AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { |
476 | return moduleToObjectImpl(targetOptions, llvmModule); |
477 | } |
478 | #endif // MLIR_ENABLE_ROCM_CONVERSIONS |
479 | |
480 | std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( |
481 | Attribute attribute, Operation *module, |
482 | const gpu::TargetOptions &options) const { |
483 | assert(module && "The module must be non null."); |
484 | if (!module) |
485 | return std::nullopt; |
486 | if (!mlir::isa<gpu::GPUModuleOp>(module)) { |
487 | module->emitError(message: "module must be a GPU module"); |
488 | return std::nullopt; |
489 | } |
490 | #if MLIR_ENABLE_ROCM_CONVERSIONS |
491 | AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute), |
492 | options); |
493 | serializer.init(); |
494 | return serializer.run(); |
495 | #else |
496 | module->emitError("the `AMDGPU` target was not built. Please enable it when " |
497 | "building LLVM"); |
498 | return std::nullopt; |
499 | #endif // MLIR_ENABLE_ROCM_CONVERSIONS |
500 | } |
501 | |
502 | Attribute |
503 | ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, |
504 | const SmallVector<char, 0> &object, |
505 | const gpu::TargetOptions &options) const { |
506 | gpu::CompilationTarget format = options.getCompilationTarget(); |
507 | // If format is `fatbin` transform it to binary as `fatbin` is not yet |
508 | // supported. |
509 | gpu::KernelTableAttr kernels; |
510 | if (format > gpu::CompilationTarget::Binary) { |
511 | format = gpu::CompilationTarget::Binary; |
512 | kernels = ROCDL::getKernelMetadata(module, object); |
513 | } |
514 | DictionaryAttr properties{}; |
515 | Builder builder(attribute.getContext()); |
516 | StringAttr objectStr = |
517 | builder.getStringAttr(StringRef(object.data(), object.size())); |
518 | return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr, |
519 | properties, kernels); |
520 | } |
521 |
Definitions
- ROCDLTargetAttrImpl
- registerROCDLTargetInterfaceExternalModels
- registerROCDLTargetInterfaceExternalModels
- getROCMPath
- SerializeGPUModuleBase
- init
- getTarget
- getToolkitPath
- getLibrariesToLink
- appendStandardLibs
- loadBitcodeFiles
- handleBitcodeFile
- handleModulePreLink
- addControlVariables
- assembleIsa
- compileToBinary
- moduleToObjectImpl
- AMDGPUSerializer
- AMDGPUSerializer
- moduleToObject
- serializeToObject
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more