1//===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This files defines NVVM target related functions including registration
10// calls for the `#nvvm.target` compilation attribute.
11//
12//===----------------------------------------------------------------------===//
13
14#include "mlir/Target/LLVM/NVVM/Target.h"
15
16#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
17#include "mlir/Dialect/GPU/IR/GPUDialect.h"
18#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
19#include "mlir/IR/BuiltinDialect.h"
20#include "mlir/IR/BuiltinTypes.h"
21#include "mlir/IR/DialectResourceBlobManager.h"
22#include "mlir/Target/LLVM/NVVM/Utils.h"
23#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
24#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
25#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
26#include "mlir/Target/LLVMIR/Export.h"
27
28#include "llvm/ADT/ScopeExit.h"
29#include "llvm/Config/Targets.h"
30#include "llvm/Support/FileSystem.h"
31#include "llvm/Support/FileUtilities.h"
32#include "llvm/Support/FormatVariadic.h"
33#include "llvm/Support/MemoryBuffer.h"
34#include "llvm/Support/Path.h"
35#include "llvm/Support/Process.h"
36#include "llvm/Support/Program.h"
37#include "llvm/Support/TargetSelect.h"
38#include "llvm/Support/Timer.h"
39#include "llvm/Support/raw_ostream.h"
40
41#include <cstdint>
42#include <cstdlib>
43#include <optional>
44
45using namespace mlir;
46using namespace mlir::NVVM;
47
48#ifndef __DEFAULT_CUDATOOLKIT_PATH__
49#define __DEFAULT_CUDATOOLKIT_PATH__ ""
50#endif
51
52extern "C" const unsigned char _mlir_embedded_libdevice[];
53extern "C" const unsigned _mlir_embedded_libdevice_size;
54
55namespace {
56// Implementation of the `TargetAttrInterface` model.
57class NVVMTargetAttrImpl
58 : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
59public:
60 std::optional<SmallVector<char, 0>>
61 serializeToObject(Attribute attribute, Operation *module,
62 const gpu::TargetOptions &options) const;
63
64 Attribute createObject(Attribute attribute, Operation *module,
65 const SmallVector<char, 0> &object,
66 const gpu::TargetOptions &options) const;
67};
68} // namespace
69
70// Register the NVVM dialect, the NVVM translation & the target interface.
71void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
72 DialectRegistry &registry) {
73 registry.addExtension(extensionFn: +[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
74 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(context&: *ctx);
75 });
76}
77
78void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
79 MLIRContext &context) {
80 DialectRegistry registry;
81 registerNVVMTargetInterfaceExternalModels(registry);
82 context.appendDialectRegistry(registry);
83}
84
85// Search for the CUDA toolkit path.
86StringRef mlir::NVVM::getCUDAToolkitPath() {
87 if (const char *var = std::getenv(name: "CUDA_ROOT"))
88 return var;
89 if (const char *var = std::getenv(name: "CUDA_HOME"))
90 return var;
91 if (const char *var = std::getenv(name: "CUDA_PATH"))
92 return var;
93 return __DEFAULT_CUDATOOLKIT_PATH__;
94}
95
96SerializeGPUModuleBase::SerializeGPUModuleBase(
97 Operation &module, NVVMTargetAttr target,
98 const gpu::TargetOptions &targetOptions)
99 : ModuleToObject(module, target.getTriple(), target.getChip(),
100 target.getFeatures(), target.getO(),
101 targetOptions.getInitialLlvmIRCallback(),
102 targetOptions.getLinkedLlvmIRCallback(),
103 targetOptions.getOptimizedLlvmIRCallback(),
104 targetOptions.getISACallback()),
105 target(target), toolkitPath(targetOptions.getToolkitPath()),
106 librariesToLink(targetOptions.getLibrariesToLink()) {
107
108 // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
109 if (toolkitPath.empty())
110 toolkitPath = getCUDAToolkitPath();
111
112 // Append the files in the target attribute.
113 if (target.getLink())
114 librariesToLink.append(in_start: target.getLink().begin(), in_end: target.getLink().end());
115
116 // Append libdevice to the files to be loaded.
117 (void)appendStandardLibs();
118}
119
120void SerializeGPUModuleBase::init() {
121 static llvm::once_flag initializeBackendOnce;
122 llvm::call_once(flag&: initializeBackendOnce, F: []() {
123 // If the `NVPTX` LLVM target was built, initialize it.
124#if LLVM_HAS_NVPTX_TARGET
125 LLVMInitializeNVPTXTarget();
126 LLVMInitializeNVPTXTargetInfo();
127 LLVMInitializeNVPTXTargetMC();
128 LLVMInitializeNVPTXAsmPrinter();
129#endif
130 });
131}
132
133NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
134
135StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
136
137ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {
138 return librariesToLink;
139}
140
141// Try to append `libdevice` from a CUDA toolkit installation.
142LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
143#if MLIR_NVVM_EMBED_LIBDEVICE
144 // If libdevice is embedded in the binary, we don't look it up on the
145 // filesystem.
146 MLIRContext *ctx = target.getContext();
147 auto type =
148 RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size},
149 IntegerType::get(ctx, 8));
150 auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
151
152 // Lookup if we already loaded the resource, otherwise create it.
153 DialectResourceBlobManager::BlobEntry *blob =
154 resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
155 if (blob) {
156 librariesToLink.push_back(DenseResourceElementsAttr::get(
157 type, DenseResourceElementsHandle(
158 blob, ctx->getLoadedDialect<BuiltinDialect>())));
159 return success();
160 }
161
162 // Allocate a resource using one of the UnManagedResourceBlob method to wrap
163 // the embedded data.
164 auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(
165 ArrayRef<char>{(const char *)_mlir_embedded_libdevice,
166 _mlir_embedded_libdevice_size});
167 librariesToLink.push_back(DenseResourceElementsAttr::get(
168 type, resourceManager.insert("_mlir_embedded_libdevice",
169 std::move(unmanagedBlob))));
170#else
171 StringRef pathRef = getToolkitPath();
172 if (!pathRef.empty()) {
173 SmallVector<char, 256> path;
174 path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end());
175 pathRef = StringRef(path.data(), path.size());
176 if (!llvm::sys::fs::is_directory(Path: pathRef)) {
177 getOperation().emitError() << "CUDA path: " << pathRef
178 << " does not exist or is not a directory.\n";
179 return failure();
180 }
181 llvm::sys::path::append(path, a: "nvvm", b: "libdevice", c: "libdevice.10.bc");
182 pathRef = StringRef(path.data(), path.size());
183 if (!llvm::sys::fs::is_regular_file(Path: pathRef)) {
184 getOperation().emitError() << "LibDevice path: " << pathRef
185 << " does not exist or is not a file.\n";
186 return failure();
187 }
188 librariesToLink.push_back(Elt: StringAttr::get(context: target.getContext(), bytes: pathRef));
189 }
190#endif
191 return success();
192}
193
194std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
195SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
196 SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
197 if (failed(Result: loadBitcodeFilesFromList(context&: module.getContext(), librariesToLink,
198 llvmModules&: bcFiles, failureOnError: true)))
199 return std::nullopt;
200 return std::move(bcFiles);
201}
202
203namespace {
204class NVPTXSerializer : public SerializeGPUModuleBase {
205public:
206 NVPTXSerializer(Operation &module, NVVMTargetAttr target,
207 const gpu::TargetOptions &targetOptions);
208
209 /// Returns the GPU module op being serialized.
210 gpu::GPUModuleOp getOperation();
211
212 /// Compiles PTX to cubin using `ptxas`.
213 std::optional<SmallVector<char, 0>>
214 compileToBinary(const std::string &ptxCode);
215
216 /// Compiles PTX to cubin using the `nvptxcompiler` library.
217 std::optional<SmallVector<char, 0>>
218 compileToBinaryNVPTX(const std::string &ptxCode);
219
220 /// Serializes the LLVM module to an object format, depending on the
221 /// compilation target selected in target options.
222 std::optional<SmallVector<char, 0>>
223 moduleToObject(llvm::Module &llvmModule) override;
224
225 /// Get LLVMIR->ISA performance result.
226 /// Return nullopt if moduleToObject has not been called or the target format
227 /// is LLVMIR.
228 std::optional<int64_t> getLLVMIRToISATimeInMs();
229
230 /// Get ISA->Binary performance result.
231 /// Return nullopt if moduleToObject has not been called or the target format
232 /// is LLVMIR or ISA.
233 std::optional<int64_t> getISAToBinaryTimeInMs();
234
235private:
236 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
237
238 /// Creates a temp file.
239 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
240
241 /// Finds the `tool` path, where `tool` is the name of the binary to search,
242 /// i.e. `ptxas` or `fatbinary`. The search order is:
243 /// 1. The toolkit path in `targetOptions`.
244 /// 2. In the system PATH.
245 /// 3. The path from `getCUDAToolkitPath()`.
246 std::optional<std::string> findTool(StringRef tool);
247
248 /// Target options.
249 gpu::TargetOptions targetOptions;
250
251 /// LLVMIR->ISA perf result.
252 std::optional<int64_t> llvmToISATimeInMs;
253
254 /// ISA->Binary perf result.
255 std::optional<int64_t> isaToBinaryTimeInMs;
256};
257} // namespace
258
259NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
260 const gpu::TargetOptions &targetOptions)
261 : SerializeGPUModuleBase(module, target, targetOptions),
262 targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),
263 isaToBinaryTimeInMs(std::nullopt) {}
264
265std::optional<NVPTXSerializer::TmpFile>
266NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
267 llvm::SmallString<128> filename;
268 std::error_code ec =
269 llvm::sys::fs::createTemporaryFile(Prefix: name, Suffix: suffix, ResultPath&: filename);
270 if (ec) {
271 getOperation().emitError() << "Couldn't create the temp file: `" << filename
272 << "`, error message: " << ec.message();
273 return std::nullopt;
274 }
275 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
276}
277
278std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {
279 return llvmToISATimeInMs;
280}
281
282std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
283 return isaToBinaryTimeInMs;
284}
285
286gpu::GPUModuleOp NVPTXSerializer::getOperation() {
287 return dyn_cast<gpu::GPUModuleOp>(Val: &SerializeGPUModuleBase::getOperation());
288}
289
290std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
291 // Find the `tool` path.
292 // 1. Check the toolkit path given in the command line.
293 StringRef pathRef = targetOptions.getToolkitPath();
294 SmallVector<char, 256> path;
295 if (!pathRef.empty()) {
296 path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end());
297 llvm::sys::path::append(path, a: "bin", b: tool);
298 if (llvm::sys::fs::can_execute(Path: path))
299 return StringRef(path.data(), path.size()).str();
300 }
301
302 // 2. Check PATH.
303 if (std::optional<std::string> toolPath =
304 llvm::sys::Process::FindInEnvPath(EnvName: "PATH", FileName: tool))
305 return *toolPath;
306
307 // 3. Check `getCUDAToolkitPath()`.
308 pathRef = getCUDAToolkitPath();
309 path.clear();
310 if (!pathRef.empty()) {
311 path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end());
312 llvm::sys::path::append(path, a: "bin", b: tool);
313 if (llvm::sys::fs::can_execute(Path: path))
314 return StringRef(path.data(), path.size()).str();
315 }
316 getOperation().emitError()
317 << "Couldn't find the `" << tool
318 << "` binary. Please specify the toolkit "
319 "path, add the compiler to $PATH, or set one of the environment "
320 "variables in `NVVM::getCUDAToolkitPath()`.";
321 return std::nullopt;
322}
323
324/// Adds optional command-line arguments to existing arguments.
325template <typename T>
326static void setOptionalCommandlineArguments(NVVMTargetAttr target,
327 SmallVectorImpl<T> &ptxasArgs) {
328 if (!target.hasCmdOptions())
329 return;
330
331 std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
332 for (Attribute attr : cast<ArrayAttr>(Val: cmdOptions->getValue())) {
333 if (auto strAttr = dyn_cast<StringAttr>(Val&: attr)) {
334 if constexpr (std::is_same_v<T, StringRef>) {
335 ptxasArgs.push_back(strAttr.getValue());
336 } else if constexpr (std::is_same_v<T, const char *>) {
337 ptxasArgs.push_back(strAttr.getValue().data());
338 }
339 }
340 }
341}
342
343// TODO: clean this method & have a generic tool driver or never emit binaries
344// with this mechanism and let another stage take care of it.
345std::optional<SmallVector<char, 0>>
346NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
347 // Determine if the serializer should create a fatbinary with the PTX embeded
348 // or a simple CUBIN binary.
349 const bool createFatbin =
350 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
351
352 // Find the `ptxas` & `fatbinary` tools.
353 std::optional<std::string> ptxasCompiler = findTool(tool: "ptxas");
354 if (!ptxasCompiler)
355 return std::nullopt;
356 std::optional<std::string> fatbinaryTool;
357 if (createFatbin) {
358 fatbinaryTool = findTool(tool: "fatbinary");
359 if (!fatbinaryTool)
360 return std::nullopt;
361 }
362 Location loc = getOperation().getLoc();
363
364 // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
365 std::string basename =
366 llvm::formatv(Fmt: "mlir-{0}-{1}-{2}", Vals: getOperation().getNameAttr().getValue(),
367 Vals: getTarget().getTriple(), Vals: getTarget().getChip());
368
369 // Create temp files:
370 std::optional<TmpFile> ptxFile = createTemp(name: basename, suffix: "ptx");
371 if (!ptxFile)
372 return std::nullopt;
373 std::optional<TmpFile> logFile = createTemp(name: basename, suffix: "log");
374 if (!logFile)
375 return std::nullopt;
376 std::optional<TmpFile> binaryFile = createTemp(name: basename, suffix: "bin");
377 if (!binaryFile)
378 return std::nullopt;
379 TmpFile cubinFile;
380 if (createFatbin) {
381 std::string cubinFilename = (ptxFile->first + ".cubin").str();
382 cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
383 } else {
384 cubinFile.first = binaryFile->first;
385 }
386
387 std::error_code ec;
388 // Dump the PTX to a temp file.
389 {
390 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
391 if (ec) {
392 emitError(loc) << "Couldn't open the file: `" << ptxFile->first
393 << "`, error message: " << ec.message();
394 return std::nullopt;
395 }
396 ptxStream << ptxCode;
397 if (ptxStream.has_error()) {
398 emitError(loc) << "An error occurred while writing the PTX to: `"
399 << ptxFile->first << "`.";
400 return std::nullopt;
401 }
402 ptxStream.flush();
403 }
404
405 // Command redirects.
406 std::optional<StringRef> redirects[] = {
407 std::nullopt,
408 logFile->first,
409 logFile->first,
410 };
411
412 // Get any extra args passed in `targetOptions`.
413 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
414 targetOptions.tokenizeCmdOptions();
415
416 // Create ptxas args.
417 std::string optLevel = std::to_string(val: this->optLevel);
418 SmallVector<StringRef, 12> ptxasArgs(
419 {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
420 StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
421 "--opt-level", optLevel});
422
423 bool useFatbin32 = false;
424 for (const auto *cArg : cmdOpts.second) {
425 // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
426 // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
427 // assumed.
428 if (StringRef arg(cArg); arg != "-32")
429 ptxasArgs.push_back(Elt: arg);
430 else
431 useFatbin32 = true;
432 }
433
434 // Set optional command line arguments
435 setOptionalCommandlineArguments(target: getTarget(), ptxasArgs);
436
437 // Create the `fatbinary` args.
438 StringRef chip = getTarget().getChip();
439 // Remove the arch prefix to obtain the compute capability.
440 chip.consume_front(Prefix: "sm_"), chip.consume_front(Prefix: "compute_");
441 // Embed the cubin object.
442 std::string cubinArg =
443 llvm::formatv(Fmt: "--image3=kind=elf,sm={0},file={1}", Vals&: chip, Vals&: cubinFile.first)
444 .str();
445 // Embed the PTX file so the driver can JIT if needed.
446 std::string ptxArg =
447 llvm::formatv(Fmt: "--image3=kind=ptx,sm={0},file={1}", Vals&: chip, Vals&: ptxFile->first)
448 .str();
449 SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
450 useFatbin32 ? "-32" : "-64", cubinArg,
451 ptxArg, "--create", binaryFile->first});
452
453 // Dump tool invocation commands.
454#define DEBUG_TYPE "serialize-to-binary"
455 LLVM_DEBUG({
456 llvm::dbgs() << "Tool invocation for module: "
457 << getOperation().getNameAttr() << "\n";
458 llvm::dbgs() << "ptxas executable:" << ptxasCompiler.value() << "\n";
459 llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
460 llvm::dbgs() << "\n";
461 if (createFatbin) {
462 llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
463 llvm::dbgs() << "\n";
464 }
465 });
466#undef DEBUG_TYPE
467
468 // Helper function for printing tool error logs.
469 std::string message;
470 auto emitLogError =
471 [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
472 if (message.empty()) {
473 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
474 llvm::MemoryBuffer::getFile(Filename: logFile->first);
475 if (toolStderr)
476 emitError(loc) << toolName << " invocation failed. Log:\n"
477 << toolStderr->get()->getBuffer();
478 else
479 emitError(loc) << toolName << " invocation failed.";
480 return std::nullopt;
481 }
482 emitError(loc) << toolName
483 << " invocation failed, error message: " << message;
484 return std::nullopt;
485 };
486
487 // Invoke PTXAS.
488 if (llvm::sys::ExecuteAndWait(Program: ptxasCompiler.value(), Args: ptxasArgs,
489 /*Env=*/std::nullopt,
490 /*Redirects=*/redirects,
491 /*SecondsToWait=*/0,
492 /*MemoryLimit=*/0,
493 /*ErrMsg=*/&message))
494 return emitLogError("`ptxas`");
495#define DEBUG_TYPE "dump-sass"
496 LLVM_DEBUG({
497 std::optional<std::string> nvdisasm = findTool("nvdisasm");
498 SmallVector<StringRef> nvdisasmArgs(
499 {StringRef("nvdisasm"), StringRef(cubinFile.first)});
500 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
501 /*Env=*/std::nullopt,
502 /*Redirects=*/redirects,
503 /*SecondsToWait=*/0,
504 /*MemoryLimit=*/0,
505 /*ErrMsg=*/&message))
506 return emitLogError("`nvdisasm`");
507 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
508 llvm::MemoryBuffer::getFile(logFile->first);
509 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
510 llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
511 llvm::dbgs().flush();
512 }
513 });
514#undef DEBUG_TYPE
515
516 // Invoke `fatbin`.
517 message.clear();
518 if (createFatbin && llvm::sys::ExecuteAndWait(Program: *fatbinaryTool, Args: fatbinArgs,
519 /*Env=*/std::nullopt,
520 /*Redirects=*/redirects,
521 /*SecondsToWait=*/0,
522 /*MemoryLimit=*/0,
523 /*ErrMsg=*/&message))
524 return emitLogError("`fatbinary`");
525
526// Dump the output of the tools, helpful if the verbose flag was passed.
527#define DEBUG_TYPE "serialize-to-binary"
528 LLVM_DEBUG({
529 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
530 llvm::MemoryBuffer::getFile(logFile->first);
531 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
532 llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
533 llvm::dbgs().flush();
534 }
535 });
536#undef DEBUG_TYPE
537
538 // Read the fatbin.
539 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
540 llvm::MemoryBuffer::getFile(Filename: binaryFile->first);
541 if (!binaryBuffer) {
542 emitError(loc) << "Couldn't open the file: `" << binaryFile->first
543 << "`, error message: " << binaryBuffer.getError().message();
544 return std::nullopt;
545 }
546 StringRef fatbin = (*binaryBuffer)->getBuffer();
547 return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
548}
549
550#if MLIR_ENABLE_NVPTXCOMPILER
551#include "nvPTXCompiler.h"
552
553#define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
554 do { \
555 if (auto status = (expr)) { \
556 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
557 << status; \
558 return std::nullopt; \
559 } \
560 } while (false)
561
562#include "nvFatbin.h"
563
564#define RETURN_ON_NVFATBIN_ERROR(expr) \
565 do { \
566 auto result = (expr); \
567 if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
568 emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
569 << nvFatbinGetErrorString(result); \
570 return std::nullopt; \
571 } \
572 } while (false)
573
574std::optional<SmallVector<char, 0>>
575NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
576 Location loc = getOperation().getLoc();
577 nvPTXCompilerHandle compiler = nullptr;
578 nvPTXCompileResult status;
579 size_t logSize;
580
581 // Create the options.
582 std::string optLevel = std::to_string(this->optLevel);
583 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
584 targetOptions.tokenizeCmdOptions();
585 cmdOpts.second.append(
586 {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
587
588 // Set optional command line arguments
589 setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
590 // Create the compiler handle.
591 RETURN_ON_NVPTXCOMPILER_ERROR(
592 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
593
594 // Try to compile the binary.
595 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
596 cmdOpts.second.data());
597
598 // Check if compilation failed.
599 if (status != NVPTXCOMPILE_SUCCESS) {
600 RETURN_ON_NVPTXCOMPILER_ERROR(
601 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
602 if (logSize != 0) {
603 SmallVector<char> log(logSize + 1, 0);
604 RETURN_ON_NVPTXCOMPILER_ERROR(
605 nvPTXCompilerGetErrorLog(compiler, log.data()));
606 emitError(loc) << "NVPTX compiler invocation failed, error log: "
607 << log.data();
608 } else {
609 emitError(loc) << "NVPTX compiler invocation failed with error code: "
610 << status;
611 }
612 return std::nullopt;
613 }
614
615 // Retrieve the binary.
616 size_t elfSize;
617 RETURN_ON_NVPTXCOMPILER_ERROR(
618 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
619 SmallVector<char, 0> binary(elfSize, 0);
620 RETURN_ON_NVPTXCOMPILER_ERROR(
621 nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
622
623// Dump the log of the compiler, helpful if the verbose flag was passed.
624#define DEBUG_TYPE "serialize-to-binary"
625 LLVM_DEBUG({
626 RETURN_ON_NVPTXCOMPILER_ERROR(
627 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
628 if (logSize != 0) {
629 SmallVector<char> log(logSize + 1, 0);
630 RETURN_ON_NVPTXCOMPILER_ERROR(
631 nvPTXCompilerGetInfoLog(compiler, log.data()));
632 llvm::dbgs() << "NVPTX compiler invocation for module: "
633 << getOperation().getNameAttr() << "\n";
634 llvm::dbgs() << "Arguments: ";
635 llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
636 llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
637 llvm::dbgs().flush();
638 }
639 });
640#undef DEBUG_TYPE
641 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
642
643 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
644 bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
645 return llvm::StringRef(option) == "-32";
646 });
647
648 const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
649 nvFatbinHandle handle;
650
651 auto chip = getTarget().getChip();
652 chip.consume_front("sm_");
653
654 RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
655 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
656 handle, binary.data(), binary.size(), chip.data(), nullptr));
657 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
658 handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
659
660 size_t fatbinSize;
661 RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
662 SmallVector<char, 0> fatbin(fatbinSize, 0);
663 RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
664 RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
665 return fatbin;
666 }
667
668 return binary;
669}
670#endif // MLIR_ENABLE_NVPTXCOMPILER
671
672std::optional<SmallVector<char, 0>>
673NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
674 llvm::Timer moduleToObjectTimer(
675 "moduleToObjectTimer",
676 "Timer for perf llvm-ir -> isa and isa -> binary.");
677 auto clear = llvm::make_scope_exit(F: [&]() { moduleToObjectTimer.clear(); });
678 // Return LLVM IR if the compilation target is `offload`.
679#define DEBUG_TYPE "serialize-to-llvm"
680 LLVM_DEBUG({
681 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
682 << "\n";
683 llvm::dbgs() << llvmModule << "\n";
684 llvm::dbgs().flush();
685 });
686#undef DEBUG_TYPE
687 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
688 return SerializeGPUModuleBase::moduleToObject(llvmModule);
689
690#if !LLVM_HAS_NVPTX_TARGET
691 getOperation()->emitError(
692 "The `NVPTX` target was not built. Please enable it when building LLVM.");
693 return std::nullopt;
694#endif // LLVM_HAS_NVPTX_TARGET
695
696 // Emit PTX code.
697 std::optional<llvm::TargetMachine *> targetMachine =
698 getOrCreateTargetMachine();
699 if (!targetMachine) {
700 getOperation().emitError() << "Target Machine unavailable for triple "
701 << triple << ", can't optimize with LLVM\n";
702 return std::nullopt;
703 }
704 moduleToObjectTimer.startTimer();
705 std::optional<std::string> serializedISA =
706 translateToISA(llvmModule, targetMachine&: **targetMachine);
707 moduleToObjectTimer.stopTimer();
708 llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
709 moduleToObjectTimer.clear();
710 if (!serializedISA) {
711 getOperation().emitError() << "Failed translating the module to ISA.";
712 return std::nullopt;
713 }
714
715 if (isaCallback)
716 isaCallback(serializedISA.value());
717
718#define DEBUG_TYPE "serialize-to-isa"
719 LLVM_DEBUG({
720 llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
721 llvm::dbgs() << *serializedISA << "\n";
722 llvm::dbgs().flush();
723 });
724#undef DEBUG_TYPE
725
726 // Return PTX if the compilation target is `assembly`.
727 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
728 return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
729
730 std::optional<SmallVector<char, 0>> result;
731 moduleToObjectTimer.startTimer();
732 // Compile to binary.
733#if MLIR_ENABLE_NVPTXCOMPILER
734 result = compileToBinaryNVPTX(*serializedISA);
735#else
736 result = compileToBinary(ptxCode: *serializedISA);
737#endif // MLIR_ENABLE_NVPTXCOMPILER
738
739 moduleToObjectTimer.stopTimer();
740 isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
741 moduleToObjectTimer.clear();
742 return result;
743}
744
745std::optional<SmallVector<char, 0>>
746NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
747 const gpu::TargetOptions &options) const {
748 Builder builder(attribute.getContext());
749 assert(module && "The module must be non null.");
750 if (!module)
751 return std::nullopt;
752 if (!mlir::isa<gpu::GPUModuleOp>(Val: module)) {
753 module->emitError(message: "Module must be a GPU module.");
754 return std::nullopt;
755 }
756 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(Val&: attribute), options);
757 serializer.init();
758 std::optional<SmallVector<char, 0>> result = serializer.run();
759 auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
760 if (llvmToISATimeInMs.has_value())
761 module->setAttr(name: "LLVMIRToISATimeInMs",
762 value: builder.getI64IntegerAttr(value: *llvmToISATimeInMs));
763 auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
764 if (isaToBinaryTimeInMs.has_value())
765 module->setAttr(name: "ISAToBinaryTimeInMs",
766 value: builder.getI64IntegerAttr(value: *isaToBinaryTimeInMs));
767 return result;
768}
769
770Attribute
771NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
772 const SmallVector<char, 0> &object,
773 const gpu::TargetOptions &options) const {
774 auto target = cast<NVVMTargetAttr>(Val&: attribute);
775 gpu::CompilationTarget format = options.getCompilationTarget();
776 DictionaryAttr objectProps;
777 Builder builder(attribute.getContext());
778 SmallVector<NamedAttribute, 4> properties;
779 if (format == gpu::CompilationTarget::Assembly)
780 properties.push_back(
781 Elt: builder.getNamedAttr(name: "O", val: builder.getI32IntegerAttr(value: target.getO())));
782
783 if (StringRef section = options.getELFSection(); !section.empty())
784 properties.push_back(Elt: builder.getNamedAttr(name: gpu::elfSectionName,
785 val: builder.getStringAttr(bytes: section)));
786
787 for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
788 if (module->hasAttr(name: perfName)) {
789 IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(Val: module->getAttr(name: perfName));
790 properties.push_back(Elt: builder.getNamedAttr(
791 name: perfName, val: builder.getI64IntegerAttr(value: attr.getInt())));
792 }
793 }
794
795 if (!properties.empty())
796 objectProps = builder.getDictionaryAttr(value: properties);
797
798 return builder.getAttr<gpu::ObjectAttr>(
799 args&: attribute, args&: format,
800 args: builder.getStringAttr(bytes: StringRef(object.data(), object.size())),
801 args&: objectProps, /*kernels=*/args: nullptr);
802}
803

source code of mlir/lib/Target/LLVM/NVVM/Target.cpp