1//===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This files defines ROCDL target related functions including registration
10// calls for the `#rocdl.target` compilation attribute.
11//
12//===----------------------------------------------------------------------===//
13
14#include "mlir/Target/LLVM/ROCDL/Target.h"
15
16#include "mlir/Dialect/GPU/IR/GPUDialect.h"
17#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18#include "mlir/Support/FileUtilities.h"
19#include "mlir/Target/LLVM/ROCDL/Utils.h"
20#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
21#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
22#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
23#include "mlir/Target/LLVMIR/Export.h"
24
25#include "llvm/IR/Constants.h"
26#include "llvm/MC/MCAsmBackend.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCCodeEmitter.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCInstrInfo.h"
31#include "llvm/MC/MCObjectFileInfo.h"
32#include "llvm/MC/MCObjectWriter.h"
33#include "llvm/MC/MCParser/MCTargetAsmParser.h"
34#include "llvm/MC/MCRegisterInfo.h"
35#include "llvm/MC/MCStreamer.h"
36#include "llvm/MC/MCSubtargetInfo.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/FileSystem.h"
39#include "llvm/Support/FileUtilities.h"
40#include "llvm/Support/Path.h"
41#include "llvm/Support/Program.h"
42#include "llvm/Support/SourceMgr.h"
43#include "llvm/Support/TargetSelect.h"
44#include "llvm/TargetParser/TargetParser.h"
45
46#include <cstdlib>
47#include <optional>
48
49using namespace mlir;
50using namespace mlir::ROCDL;
51
52#ifndef __DEFAULT_ROCM_PATH__
53#define __DEFAULT_ROCM_PATH__ ""
54#endif
55
56namespace {
57// Implementation of the `TargetAttrInterface` model.
58class ROCDLTargetAttrImpl
59 : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
60public:
61 std::optional<SmallVector<char, 0>>
62 serializeToObject(Attribute attribute, Operation *module,
63 const gpu::TargetOptions &options) const;
64
65 Attribute createObject(Attribute attribute,
66 const SmallVector<char, 0> &object,
67 const gpu::TargetOptions &options) const;
68};
69} // namespace
70
71// Register the ROCDL dialect, the ROCDL translation and the target interface.
72void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
73 DialectRegistry &registry) {
74 registry.addExtension(extensionFn: +[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
75 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
76 });
77}
78
79void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
80 MLIRContext &context) {
81 DialectRegistry registry;
82 registerROCDLTargetInterfaceExternalModels(registry);
83 context.appendDialectRegistry(registry);
84}
85
86// Search for the ROCM path.
87StringRef mlir::ROCDL::getROCMPath() {
88 if (const char *var = std::getenv(name: "ROCM_PATH"))
89 return var;
90 if (const char *var = std::getenv(name: "ROCM_ROOT"))
91 return var;
92 if (const char *var = std::getenv(name: "ROCM_HOME"))
93 return var;
94 return __DEFAULT_ROCM_PATH__;
95}
96
97SerializeGPUModuleBase::SerializeGPUModuleBase(
98 Operation &module, ROCDLTargetAttr target,
99 const gpu::TargetOptions &targetOptions)
100 : ModuleToObject(module, target.getTriple(), target.getChip(),
101 target.getFeatures(), target.getO()),
102 target(target), toolkitPath(targetOptions.getToolkitPath()),
103 fileList(targetOptions.getLinkFiles()) {
104
105 // If `targetOptions` has an empty toolkitPath use `getROCMPath`
106 if (toolkitPath.empty())
107 toolkitPath = getROCMPath();
108
109 // Append the files in the target attribute.
110 if (ArrayAttr files = target.getLink())
111 for (Attribute attr : files.getValue())
112 if (auto file = dyn_cast<StringAttr>(attr))
113 fileList.push_back(file.str());
114
115 // Append standard ROCm device bitcode libraries to the files to be loaded.
116 (void)appendStandardLibs();
117}
118
119void SerializeGPUModuleBase::init() {
120 static llvm::once_flag initializeBackendOnce;
121 llvm::call_once(flag&: initializeBackendOnce, F: []() {
122 // If the `AMDGPU` LLVM target was built, initialize it.
123#if MLIR_ENABLE_ROCM_CONVERSIONS
124 LLVMInitializeAMDGPUTarget();
125 LLVMInitializeAMDGPUTargetInfo();
126 LLVMInitializeAMDGPUTargetMC();
127 LLVMInitializeAMDGPUAsmParser();
128 LLVMInitializeAMDGPUAsmPrinter();
129#endif
130 });
131}
132
133ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
134
135StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
136
137ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
138 return fileList;
139}
140
141LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
142 StringRef pathRef = getToolkitPath();
143 if (!pathRef.empty()) {
144 SmallVector<char, 256> path;
145 path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end());
146 llvm::sys::path::append(path, a: "amdgcn", b: "bitcode");
147 pathRef = StringRef(path.data(), path.size());
148 if (!llvm::sys::fs::is_directory(Path: pathRef)) {
149 getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
150 << " does not exist or is not a directory.";
151 return failure();
152 }
153 StringRef isaVersion =
154 llvm::AMDGPU::getArchNameAMDGCN(AK: llvm::AMDGPU::parseArchAMDGCN(CPU: chip));
155 isaVersion.consume_front(Prefix: "gfx");
156 return getCommonBitcodeLibs(libs&: fileList, libPath&: path, isaVersion);
157 }
158 return success();
159}
160
161std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
162SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
163 SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
164 if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
165 true)))
166 return std::nullopt;
167 return std::move(bcFiles);
168}
169
170LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
171 // Some ROCM builds don't strip this like they should
172 if (auto *openclVersion = module.getNamedMetadata(Name: "opencl.ocl.version"))
173 module.eraseNamedMetadata(NMD: openclVersion);
174 // Stop spamming us with clang version numbers
175 if (auto *ident = module.getNamedMetadata(Name: "llvm.ident"))
176 module.eraseNamedMetadata(NMD: ident);
177 return success();
178}
179
180void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
181 [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
182 getOrCreateTargetMachine();
183 assert(targetMachine && "expect a TargetMachine");
184 addControlVariables(module, target.hasWave64(), target.hasDaz(),
185 target.hasFiniteOnly(), target.hasUnsafeMath(),
186 target.hasFastMath(), target.hasCorrectSqrt(),
187 target.getAbi());
188}
189
190// Get the paths of ROCm device libraries.
191LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs(
192 llvm::SmallVector<std::string> &libs, SmallVector<char, 256> &libPath,
193 StringRef isaVersion) {
194 auto addLib = [&](StringRef path) -> bool {
195 if (!llvm::sys::fs::is_regular_file(Path: path)) {
196 getOperation().emitRemark() << "Bitcode library path: " << path
197 << " does not exist or is not a file.\n";
198 return true;
199 }
200 libs.push_back(Elt: path.str());
201 return false;
202 };
203 auto getLibPath = [&libPath](Twine lib) {
204 auto baseSize = libPath.size();
205 llvm::sys::path::append(path&: libPath, a: lib + ".bc");
206 std::string path(StringRef(libPath.data(), libPath.size()).str());
207 libPath.truncate(N: baseSize);
208 return path;
209 };
210
211 // Add ROCm device libraries. Fail if any of the libraries is not found.
212 if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) ||
213 addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) ||
214 addLib(getLibPath("oclc_isa_version_" + isaVersion)))
215 return failure();
216 return success();
217}
218
219void SerializeGPUModuleBase::addControlVariables(
220 llvm::Module &module, bool wave64, bool daz, bool finiteOnly,
221 bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) {
222 llvm::Type *i8Ty = llvm::Type::getInt8Ty(C&: module.getContext());
223 auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) {
224 llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
225 module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
226 llvm::ConstantInt::get(Ty: i8Ty, V: enable), name, nullptr,
227 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
228 controlVariable->setVisibility(
229 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
230 controlVariable->setAlignment(llvm::MaybeAlign(1));
231 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
232 };
233 addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath);
234 addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath);
235 addControlVariable("__oclc_daz_opt", daz || fastMath);
236 addControlVariable("__oclc_correctly_rounded_sqrt32",
237 correctSqrt && !fastMath);
238 addControlVariable("__oclc_wavefrontsize64", wave64);
239
240 llvm::Type *i32Ty = llvm::Type::getInt32Ty(C&: module.getContext());
241 int abi = 500;
242 abiVer.getAsInteger(Radix: 0, Result&: abi);
243 llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable(
244 module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
245 llvm::ConstantInt::get(Ty: i32Ty, V: abi), "__oclc_ABI_version", nullptr,
246 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
247 abiVersion->setVisibility(
248 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
249 abiVersion->setAlignment(llvm::MaybeAlign(4));
250 abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
251}
252
253std::optional<SmallVector<char, 0>>
254SerializeGPUModuleBase::assembleIsa(StringRef isa) {
255 auto loc = getOperation().getLoc();
256
257 StringRef targetTriple = this->triple;
258
259 SmallVector<char, 0> result;
260 llvm::raw_svector_ostream os(result);
261
262 llvm::Triple triple(llvm::Triple::normalize(Str: targetTriple));
263 std::string error;
264 const llvm::Target *target =
265 llvm::TargetRegistry::lookupTarget(Triple: triple.normalize(), Error&: error);
266 if (!target) {
267 emitError(loc, Twine("failed to lookup target: ") + error);
268 return std::nullopt;
269 }
270
271 llvm::SourceMgr srcMgr;
272 srcMgr.AddNewSourceBuffer(F: llvm::MemoryBuffer::getMemBuffer(InputData: isa), IncludeLoc: SMLoc());
273
274 const llvm::MCTargetOptions mcOptions;
275 std::unique_ptr<llvm::MCRegisterInfo> mri(
276 target->createMCRegInfo(TT: targetTriple));
277 std::unique_ptr<llvm::MCAsmInfo> mai(
278 target->createMCAsmInfo(MRI: *mri, TheTriple: targetTriple, Options: mcOptions));
279 std::unique_ptr<llvm::MCSubtargetInfo> sti(
280 target->createMCSubtargetInfo(TheTriple: targetTriple, CPU: chip, Features: features));
281
282 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
283 &mcOptions);
284 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
285 Ctx&: ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
286 ctx.setObjectFileInfo(mofi.get());
287
288 SmallString<128> cwd;
289 if (!llvm::sys::fs::current_path(result&: cwd))
290 ctx.setCompilationDir(cwd);
291
292 std::unique_ptr<llvm::MCStreamer> mcStreamer;
293 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
294
295 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(II: *mcii, Ctx&: ctx);
296 llvm::MCAsmBackend *mab = target->createMCAsmBackend(STI: *sti, MRI: *mri, Options: mcOptions);
297 mcStreamer.reset(p: target->createMCObjectStreamer(
298 T: triple, Ctx&: ctx, TAB: std::unique_ptr<llvm::MCAsmBackend>(mab),
299 OW: mab->createObjectWriter(OS&: os), Emitter: std::unique_ptr<llvm::MCCodeEmitter>(ce),
300 STI: *sti, RelaxAll: mcOptions.MCRelaxAll, IncrementalLinkerCompatible: mcOptions.MCIncrementalLinkerCompatible,
301 /*DWARFMustBeAtTheEnd*/ false));
302 mcStreamer->setUseAssemblerInfoForParsing(true);
303
304 std::unique_ptr<llvm::MCAsmParser> parser(
305 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
306 std::unique_ptr<llvm::MCTargetAsmParser> tap(
307 target->createMCAsmParser(STI: *sti, Parser&: *parser, MII: *mcii, Options: mcOptions));
308
309 if (!tap) {
310 emitError(loc, "assembler initialization error");
311 return {};
312 }
313
314 parser->setTargetParser(*tap);
315 parser->Run(NoInitialTextSection: false);
316
317 return result;
318}
319
320#if MLIR_ENABLE_ROCM_CONVERSIONS
321namespace {
322class AMDGPUSerializer : public SerializeGPUModuleBase {
323public:
324 AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
325 const gpu::TargetOptions &targetOptions);
326
327 gpu::GPUModuleOp getOperation();
328
329 // Compile to HSA.
330 std::optional<SmallVector<char, 0>>
331 compileToBinary(const std::string &serializedISA);
332
333 std::optional<SmallVector<char, 0>>
334 moduleToObject(llvm::Module &llvmModule) override;
335
336private:
337 // Target options.
338 gpu::TargetOptions targetOptions;
339};
340} // namespace
341
342AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
343 const gpu::TargetOptions &targetOptions)
344 : SerializeGPUModuleBase(module, target, targetOptions),
345 targetOptions(targetOptions) {}
346
347gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
348 return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
349}
350
351std::optional<SmallVector<char, 0>>
352AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
353 // Assemble the ISA.
354 std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
355
356 if (!isaBinary) {
357 getOperation().emitError() << "Failed during ISA assembling.";
358 return std::nullopt;
359 }
360
361 // Save the ISA binary to a temp file.
362 int tempIsaBinaryFd = -1;
363 SmallString<128> tempIsaBinaryFilename;
364 if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel%%", Suffix: "o", ResultFD&: tempIsaBinaryFd,
365 ResultPath&: tempIsaBinaryFilename)) {
366 getOperation().emitError()
367 << "Failed to create a temporary file for dumping the ISA binary.";
368 return std::nullopt;
369 }
370 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
371 {
372 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
373 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
374 tempIsaBinaryOs.flush();
375 }
376
377 // Create a temp file for HSA code object.
378 SmallString<128> tempHsacoFilename;
379 if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel", Suffix: "hsaco",
380 ResultPath&: tempHsacoFilename)) {
381 getOperation().emitError()
382 << "Failed to create a temporary file for the HSA code object.";
383 return std::nullopt;
384 }
385 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
386
387 llvm::SmallString<128> lldPath(toolkitPath);
388 llvm::sys::path::append(path&: lldPath, a: "llvm", b: "bin", c: "ld.lld");
389 int lldResult = llvm::sys::ExecuteAndWait(
390 Program: lldPath,
391 Args: {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
392 if (lldResult != 0) {
393 getOperation().emitError() << "lld invocation failed.";
394 return std::nullopt;
395 }
396
397 // Load the HSA code object.
398 auto hsacoFile =
399 llvm::MemoryBuffer::getFile(Filename: tempHsacoFilename, /*IsText=*/false);
400 if (!hsacoFile) {
401 getOperation().emitError()
402 << "Failed to read the HSA code object from the temp file.";
403 return std::nullopt;
404 }
405
406 StringRef buffer = (*hsacoFile)->getBuffer();
407
408 return SmallVector<char, 0>(buffer.begin(), buffer.end());
409}
410
411std::optional<SmallVector<char, 0>>
412AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
413 // Return LLVM IR if the compilation target is offload.
414#define DEBUG_TYPE "serialize-to-llvm"
415 LLVM_DEBUG({
416 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
417 << "\n"
418 << llvmModule << "\n";
419 });
420#undef DEBUG_TYPE
421 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
422 return SerializeGPUModuleBase::moduleToObject(llvmModule);
423
424 std::optional<llvm::TargetMachine *> targetMachine =
425 getOrCreateTargetMachine();
426 if (!targetMachine) {
427 getOperation().emitError() << "Target Machine unavailable for triple "
428 << triple << ", can't compile with LLVM\n";
429 return std::nullopt;
430 }
431
432 // Translate the Module to ISA.
433 std::optional<std::string> serializedISA =
434 translateToISA(llvmModule, targetMachine&: **targetMachine);
435 if (!serializedISA) {
436 getOperation().emitError() << "Failed translating the module to ISA.";
437 return std::nullopt;
438 }
439#define DEBUG_TYPE "serialize-to-isa"
440 LLVM_DEBUG({
441 llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"
442 << *serializedISA << "\n";
443 });
444#undef DEBUG_TYPE
445 // Return ISA assembly code if the compilation target is assembly.
446 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
447 return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
448
449 // Compile to binary.
450 return compileToBinary(serializedISA: *serializedISA);
451}
452#endif // MLIR_ENABLE_ROCM_CONVERSIONS
453
454std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
455 Attribute attribute, Operation *module,
456 const gpu::TargetOptions &options) const {
457 assert(module && "The module must be non null.");
458 if (!module)
459 return std::nullopt;
460 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
461 module->emitError(message: "Module must be a GPU module.");
462 return std::nullopt;
463 }
464#if MLIR_ENABLE_ROCM_CONVERSIONS
465 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
466 options);
467 serializer.init();
468 return serializer.run();
469#else
470 module->emitError("The `AMDGPU` target was not built. Please enable it when "
471 "building LLVM.");
472 return std::nullopt;
473#endif // MLIR_ENABLE_ROCM_CONVERSIONS
474}
475
476Attribute
477ROCDLTargetAttrImpl::createObject(Attribute attribute,
478 const SmallVector<char, 0> &object,
479 const gpu::TargetOptions &options) const {
480 gpu::CompilationTarget format = options.getCompilationTarget();
481 Builder builder(attribute.getContext());
482 return builder.getAttr<gpu::ObjectAttr>(
483 attribute,
484 format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
485 : format,
486 builder.getStringAttr(StringRef(object.data(), object.size())), nullptr);
487}
488

source code of mlir/lib/Target/LLVM/ROCDL/Target.cpp