1//===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This files defines ROCDL target related functions including registration
10// calls for the `#rocdl.target` compilation attribute.
11//
12//===----------------------------------------------------------------------===//
13
14#include "mlir/Target/LLVM/ROCDL/Target.h"
15
16#include "mlir/Dialect/GPU/IR/GPUDialect.h"
17#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18#include "mlir/Support/FileUtilities.h"
19#include "mlir/Target/LLVM/ROCDL/Utils.h"
20#include "mlir/Target/LLVMIR/Export.h"
21
22#include "llvm/IR/Constants.h"
23#include "llvm/MC/MCAsmBackend.h"
24#include "llvm/MC/MCAsmInfo.h"
25#include "llvm/MC/MCCodeEmitter.h"
26#include "llvm/MC/MCContext.h"
27#include "llvm/MC/MCInstrInfo.h"
28#include "llvm/MC/MCObjectFileInfo.h"
29#include "llvm/MC/MCObjectWriter.h"
30#include "llvm/MC/MCParser/MCTargetAsmParser.h"
31#include "llvm/MC/MCRegisterInfo.h"
32#include "llvm/MC/MCStreamer.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/FileUtilities.h"
37#include "llvm/Support/Path.h"
38#include "llvm/Support/Program.h"
39#include "llvm/Support/SourceMgr.h"
40#include "llvm/Support/TargetSelect.h"
41#include "llvm/TargetParser/TargetParser.h"
42
43#include <cstdlib>
44#include <optional>
45
46using namespace mlir;
47using namespace mlir::ROCDL;
48
49#ifndef __DEFAULT_ROCM_PATH__
50#define __DEFAULT_ROCM_PATH__ ""
51#endif
52
53namespace {
54// Implementation of the `TargetAttrInterface` model.
55class ROCDLTargetAttrImpl
56 : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
57public:
58 std::optional<SmallVector<char, 0>>
59 serializeToObject(Attribute attribute, Operation *module,
60 const gpu::TargetOptions &options) const;
61
62 Attribute createObject(Attribute attribute, Operation *module,
63 const SmallVector<char, 0> &object,
64 const gpu::TargetOptions &options) const;
65};
66} // namespace
67
68// Register the ROCDL dialect, the ROCDL translation and the target interface.
69void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
70 DialectRegistry &registry) {
71 registry.addExtension(extensionFn: +[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
72 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
73 });
74}
75
76void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
77 MLIRContext &context) {
78 DialectRegistry registry;
79 registerROCDLTargetInterfaceExternalModels(registry);
80 context.appendDialectRegistry(registry);
81}
82
83// Search for the ROCM path.
84StringRef mlir::ROCDL::getROCMPath() {
85 if (const char *var = std::getenv(name: "ROCM_PATH"))
86 return var;
87 if (const char *var = std::getenv(name: "ROCM_ROOT"))
88 return var;
89 if (const char *var = std::getenv(name: "ROCM_HOME"))
90 return var;
91 return __DEFAULT_ROCM_PATH__;
92}
93
94SerializeGPUModuleBase::SerializeGPUModuleBase(
95 Operation &module, ROCDLTargetAttr target,
96 const gpu::TargetOptions &targetOptions)
97 : ModuleToObject(module, target.getTriple(), target.getChip(),
98 target.getFeatures(), target.getO()),
99 target(target), toolkitPath(targetOptions.getToolkitPath()),
100 librariesToLink(targetOptions.getLibrariesToLink()) {
101
102 // If `targetOptions` has an empty toolkitPath use `getROCMPath`
103 if (toolkitPath.empty())
104 toolkitPath = getROCMPath();
105
106 // Append the files in the target attribute.
107 if (target.getLink())
108 librariesToLink.append(target.getLink().begin(), target.getLink().end());
109}
110
111void SerializeGPUModuleBase::init() {
112 static llvm::once_flag initializeBackendOnce;
113 llvm::call_once(flag&: initializeBackendOnce, F: []() {
114 // If the `AMDGPU` LLVM target was built, initialize it.
115#if MLIR_ENABLE_ROCM_CONVERSIONS
116 LLVMInitializeAMDGPUTarget();
117 LLVMInitializeAMDGPUTargetInfo();
118 LLVMInitializeAMDGPUTargetMC();
119 LLVMInitializeAMDGPUAsmParser();
120 LLVMInitializeAMDGPUAsmPrinter();
121#endif
122 });
123}
124
125ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
126
127StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
128
129ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {
130 return librariesToLink;
131}
132
133LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) {
134 if (libs == AMDGCNLibraries::None)
135 return success();
136 StringRef pathRef = getToolkitPath();
137
138 // Get the path for the device libraries
139 SmallString<256> path;
140 path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end());
141 llvm::sys::path::append(path, a: "amdgcn", b: "bitcode");
142 pathRef = StringRef(path.data(), path.size());
143
144 // Fail if the path is invalid.
145 if (!llvm::sys::fs::is_directory(Path: pathRef)) {
146 getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef
147 << " does not exist or is not a directory";
148 return failure();
149 }
150
151 // Helper function for adding a library.
152 auto addLib = [&](const Twine &lib) -> bool {
153 auto baseSize = path.size();
154 llvm::sys::path::append(path, a: lib);
155 StringRef pathRef(path.data(), path.size());
156 if (!llvm::sys::fs::is_regular_file(Path: pathRef)) {
157 getOperation().emitRemark() << "bitcode library path: " << pathRef
158 << " does not exist or is not a file";
159 return true;
160 }
161 librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
162 path.truncate(N: baseSize);
163 return false;
164 };
165
166 // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
167 // if any of the `addLib` failed.
168 if ((any(Val: libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) ||
169 (any(Val: libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) ||
170 (any(Val: libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) ||
171 (any(Val: libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")))
172 return failure();
173 return success();
174}
175
176std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
177SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
178 // Return if there are no libs to load.
179 if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty())
180 return SmallVector<std::unique_ptr<llvm::Module>>();
181 if (failed(Result: appendStandardLibs(libs: deviceLibs)))
182 return std::nullopt;
183 SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
184 if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink,
185 bcFiles, true)))
186 return std::nullopt;
187 return std::move(bcFiles);
188}
189
190LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
191 // Some ROCM builds don't strip this like they should
192 if (auto *openclVersion = module.getNamedMetadata(Name: "opencl.ocl.version"))
193 module.eraseNamedMetadata(NMD: openclVersion);
194 // Stop spamming us with clang version numbers
195 if (auto *ident = module.getNamedMetadata(Name: "llvm.ident"))
196 module.eraseNamedMetadata(NMD: ident);
197 // Override the libModules datalayout and target triple with the compiler's
198 // data layout should there be a discrepency.
199 setDataLayoutAndTriple(module);
200 return success();
201}
202
203void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
204 // If all libraries are not set, traverse the module to determine which
205 // libraries are required.
206 if (deviceLibs != AMDGCNLibraries::All) {
207 for (llvm::Function &f : module.functions()) {
208 if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
209 StringRef funcName = f.getName();
210 if ("printf" == funcName)
211 deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl |
212 AMDGCNLibraries::Ocml;
213 if (funcName.starts_with(Prefix: "__ockl_"))
214 deviceLibs |= AMDGCNLibraries::Ockl;
215 if (funcName.starts_with(Prefix: "__ocml_"))
216 deviceLibs |= AMDGCNLibraries::Ocml;
217 if (funcName == "__atomic_work_item_fence")
218 deviceLibs |= AMDGCNLibraries::Hip;
219 }
220 }
221 }
222 addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(),
223 target.hasFiniteOnly(), target.hasUnsafeMath(),
224 target.hasFastMath(), target.hasCorrectSqrt(),
225 target.getAbi());
226}
227
228void SerializeGPUModuleBase::addControlVariables(
229 llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz,
230 bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
231 StringRef abiVer) {
232 // Helper function for adding control variables.
233 auto addControlVariable = [&module](StringRef name, uint32_t value,
234 uint32_t bitwidth) {
235 if (module.getNamedGlobal(Name: name))
236 return;
237 llvm::IntegerType *type =
238 llvm::IntegerType::getIntNTy(C&: module.getContext(), N: bitwidth);
239 llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
240 module, /*isConstant=*/type, true,
241 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
242 llvm::ConstantInt::get(Ty: type, V: value), name, /*before=*/nullptr,
243 /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
244 /*addressSpace=*/4);
245 controlVariable->setVisibility(
246 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
247 controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
248 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
249 };
250
251 // Note that COV6 requires ROCm 6.3+.
252 int abi = 600;
253 abiVer.getAsInteger(Radix: 0, Result&: abi);
254 module.addModuleFlag(Behavior: llvm::Module::Error, Key: "amdhsa_code_object_version", Val: abi);
255 // Return if no device libraries are required.
256 if (libs == AMDGCNLibraries::None)
257 return;
258 // Add ocml related control variables.
259 if (any(Val: libs & AMDGCNLibraries::Ocml)) {
260 addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
261 addControlVariable("__oclc_daz_opt", daz || fastMath, 8);
262 addControlVariable("__oclc_correctly_rounded_sqrt32",
263 correctSqrt && !fastMath, 8);
264 addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
265 }
266 // Add ocml or ockl related control variables.
267 if (any(Val: libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) {
268 addControlVariable("__oclc_wavefrontsize64", wave64, 8);
269 // Get the ISA version.
270 llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(GPU: chip);
271 // Add the ISA control variable.
272 addControlVariable("__oclc_ISA_version",
273 isaVersion.Minor + 100 * isaVersion.Stepping +
274 1000 * isaVersion.Major,
275 32);
276 addControlVariable("__oclc_ABI_version", abi, 32);
277 }
278}
279
280std::optional<SmallVector<char, 0>>
281SerializeGPUModuleBase::assembleIsa(StringRef isa) {
282 auto loc = getOperation().getLoc();
283
284 StringRef targetTriple = this->triple;
285
286 SmallVector<char, 0> result;
287 llvm::raw_svector_ostream os(result);
288
289 llvm::Triple triple(llvm::Triple::normalize(Str: targetTriple));
290 std::string error;
291 const llvm::Target *target =
292 llvm::TargetRegistry::lookupTarget(TripleStr: triple.normalize(), Error&: error);
293 if (!target) {
294 emitError(loc, Twine("failed to lookup target: ") + error);
295 return std::nullopt;
296 }
297
298 llvm::SourceMgr srcMgr;
299 srcMgr.AddNewSourceBuffer(F: llvm::MemoryBuffer::getMemBuffer(InputData: isa), IncludeLoc: SMLoc());
300
301 const llvm::MCTargetOptions mcOptions;
302 std::unique_ptr<llvm::MCRegisterInfo> mri(
303 target->createMCRegInfo(TT: targetTriple));
304 std::unique_ptr<llvm::MCAsmInfo> mai(
305 target->createMCAsmInfo(MRI: *mri, TheTriple: targetTriple, Options: mcOptions));
306 std::unique_ptr<llvm::MCSubtargetInfo> sti(
307 target->createMCSubtargetInfo(TheTriple: targetTriple, CPU: chip, Features: features));
308
309 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
310 &mcOptions);
311 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
312 Ctx&: ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
313 ctx.setObjectFileInfo(mofi.get());
314
315 SmallString<128> cwd;
316 if (!llvm::sys::fs::current_path(result&: cwd))
317 ctx.setCompilationDir(cwd);
318
319 std::unique_ptr<llvm::MCStreamer> mcStreamer;
320 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
321
322 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(II: *mcii, Ctx&: ctx);
323 llvm::MCAsmBackend *mab = target->createMCAsmBackend(STI: *sti, MRI: *mri, Options: mcOptions);
324 mcStreamer.reset(p: target->createMCObjectStreamer(
325 T: triple, Ctx&: ctx, TAB: std::unique_ptr<llvm::MCAsmBackend>(mab),
326 OW: mab->createObjectWriter(OS&: os), Emitter: std::unique_ptr<llvm::MCCodeEmitter>(ce),
327 STI: *sti));
328
329 std::unique_ptr<llvm::MCAsmParser> parser(
330 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
331 std::unique_ptr<llvm::MCTargetAsmParser> tap(
332 target->createMCAsmParser(STI: *sti, Parser&: *parser, MII: *mcii, Options: mcOptions));
333
334 if (!tap) {
335 emitError(loc, "assembler initialization error");
336 return std::nullopt;
337 }
338
339 parser->setTargetParser(*tap);
340 parser->Run(NoInitialTextSection: false);
341 return std::move(result);
342}
343
344std::optional<SmallVector<char, 0>>
345SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
346 // Assemble the ISA.
347 std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(isa: serializedISA);
348
349 if (!isaBinary) {
350 getOperation().emitError() << "failed during ISA assembling";
351 return std::nullopt;
352 }
353
354 // Save the ISA binary to a temp file.
355 int tempIsaBinaryFd = -1;
356 SmallString<128> tempIsaBinaryFilename;
357 if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel%%", Suffix: "o", ResultFD&: tempIsaBinaryFd,
358 ResultPath&: tempIsaBinaryFilename)) {
359 getOperation().emitError()
360 << "failed to create a temporary file for dumping the ISA binary";
361 return std::nullopt;
362 }
363 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
364 {
365 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
366 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
367 tempIsaBinaryOs.flush();
368 }
369
370 // Create a temp file for HSA code object.
371 SmallString<128> tempHsacoFilename;
372 if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel", Suffix: "hsaco",
373 ResultPath&: tempHsacoFilename)) {
374 getOperation().emitError()
375 << "failed to create a temporary file for the HSA code object";
376 return std::nullopt;
377 }
378 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
379
380 llvm::SmallString<128> lldPath(toolkitPath);
381 llvm::sys::path::append(path&: lldPath, a: "llvm", b: "bin", c: "ld.lld");
382 int lldResult = llvm::sys::ExecuteAndWait(
383 Program: lldPath,
384 Args: {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
385 if (lldResult != 0) {
386 getOperation().emitError() << "lld invocation failed";
387 return std::nullopt;
388 }
389
390 // Load the HSA code object.
391 auto hsacoFile =
392 llvm::MemoryBuffer::getFile(Filename: tempHsacoFilename, /*IsText=*/false);
393 if (!hsacoFile) {
394 getOperation().emitError()
395 << "failed to read the HSA code object from the temp file";
396 return std::nullopt;
397 }
398
399 StringRef buffer = (*hsacoFile)->getBuffer();
400
401 return SmallVector<char, 0>(buffer.begin(), buffer.end());
402}
403
404std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
405 const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
406 // Return LLVM IR if the compilation target is offload.
407#define DEBUG_TYPE "serialize-to-llvm"
408 LLVM_DEBUG({
409 llvm::dbgs() << "LLVM IR for module: "
410 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
411 << llvmModule << "\n";
412 });
413#undef DEBUG_TYPE
414 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
415 return SerializeGPUModuleBase::moduleToObject(llvmModule);
416
417 std::optional<llvm::TargetMachine *> targetMachine =
418 getOrCreateTargetMachine();
419 if (!targetMachine) {
420 getOperation().emitError() << "target Machine unavailable for triple "
421 << triple << ", can't compile with LLVM";
422 return std::nullopt;
423 }
424
425 // Translate the Module to ISA.
426 std::optional<std::string> serializedISA =
427 translateToISA(llvmModule, targetMachine&: **targetMachine);
428 if (!serializedISA) {
429 getOperation().emitError() << "failed translating the module to ISA";
430 return std::nullopt;
431 }
432#define DEBUG_TYPE "serialize-to-isa"
433 LLVM_DEBUG({
434 llvm::dbgs() << "ISA for module: "
435 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
436 << *serializedISA << "\n";
437 });
438#undef DEBUG_TYPE
439 // Return ISA assembly code if the compilation target is assembly.
440 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
441 return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
442
443 // Compiling to binary requires a valid ROCm path, fail if it's not found.
444 if (getToolkitPath().empty()) {
445 getOperation().emitError() << "invalid ROCm path, please set a valid path";
446 return std::nullopt;
447 }
448
449 // Compile to binary.
450 return compileToBinary(serializedISA: *serializedISA);
451}
452
453#if MLIR_ENABLE_ROCM_CONVERSIONS
454namespace {
455class AMDGPUSerializer : public SerializeGPUModuleBase {
456public:
457 AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
458 const gpu::TargetOptions &targetOptions);
459
460 std::optional<SmallVector<char, 0>>
461 moduleToObject(llvm::Module &llvmModule) override;
462
463private:
464 // Target options.
465 gpu::TargetOptions targetOptions;
466};
467} // namespace
468
469AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
470 const gpu::TargetOptions &targetOptions)
471 : SerializeGPUModuleBase(module, target, targetOptions),
472 targetOptions(targetOptions) {}
473
474std::optional<SmallVector<char, 0>>
475AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
476 return moduleToObjectImpl(targetOptions, llvmModule);
477}
478#endif // MLIR_ENABLE_ROCM_CONVERSIONS
479
480std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
481 Attribute attribute, Operation *module,
482 const gpu::TargetOptions &options) const {
483 assert(module && "The module must be non null.");
484 if (!module)
485 return std::nullopt;
486 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
487 module->emitError(message: "module must be a GPU module");
488 return std::nullopt;
489 }
490#if MLIR_ENABLE_ROCM_CONVERSIONS
491 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
492 options);
493 serializer.init();
494 return serializer.run();
495#else
496 module->emitError("the `AMDGPU` target was not built. Please enable it when "
497 "building LLVM");
498 return std::nullopt;
499#endif // MLIR_ENABLE_ROCM_CONVERSIONS
500}
501
502Attribute
503ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
504 const SmallVector<char, 0> &object,
505 const gpu::TargetOptions &options) const {
506 gpu::CompilationTarget format = options.getCompilationTarget();
507 // If format is `fatbin` transform it to binary as `fatbin` is not yet
508 // supported.
509 gpu::KernelTableAttr kernels;
510 if (format > gpu::CompilationTarget::Binary) {
511 format = gpu::CompilationTarget::Binary;
512 kernels = ROCDL::getKernelMetadata(module, object);
513 }
514 DictionaryAttr properties{};
515 Builder builder(attribute.getContext());
516 StringAttr objectStr =
517 builder.getStringAttr(StringRef(object.data(), object.size()));
518 return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
519 properties, kernels);
520}
521

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of mlir/lib/Target/LLVM/ROCDL/Target.cpp