1 | //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This files defines NVVM target related functions including registration |
10 | // calls for the `#nvvm.target` compilation attribute. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "mlir/Target/LLVM/NVVM/Target.h" |
15 | |
16 | #include "mlir/Config/mlir-config.h" |
17 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
18 | #include "mlir/Dialect/LLVMIR/NVVMDialect.h" |
19 | #include "mlir/Target/LLVM/NVVM/Utils.h" |
20 | #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" |
21 | #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" |
22 | #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" |
23 | #include "mlir/Target/LLVMIR/Export.h" |
24 | |
25 | #include "llvm/Config/llvm-config.h" |
26 | #include "llvm/Support/FileSystem.h" |
27 | #include "llvm/Support/FileUtilities.h" |
28 | #include "llvm/Support/FormatVariadic.h" |
29 | #include "llvm/Support/MemoryBuffer.h" |
30 | #include "llvm/Support/Path.h" |
31 | #include "llvm/Support/Process.h" |
32 | #include "llvm/Support/Program.h" |
33 | #include "llvm/Support/TargetSelect.h" |
34 | |
35 | #include <cstdlib> |
36 | |
37 | using namespace mlir; |
38 | using namespace mlir::NVVM; |
39 | |
40 | #ifndef __DEFAULT_CUDATOOLKIT_PATH__ |
41 | #define __DEFAULT_CUDATOOLKIT_PATH__ "" |
42 | #endif |
43 | |
44 | namespace { |
45 | // Implementation of the `TargetAttrInterface` model. |
46 | class NVVMTargetAttrImpl |
47 | : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> { |
48 | public: |
49 | std::optional<SmallVector<char, 0>> |
50 | serializeToObject(Attribute attribute, Operation *module, |
51 | const gpu::TargetOptions &options) const; |
52 | |
53 | Attribute createObject(Attribute attribute, |
54 | const SmallVector<char, 0> &object, |
55 | const gpu::TargetOptions &options) const; |
56 | }; |
57 | } // namespace |
58 | |
59 | // Register the NVVM dialect, the NVVM translation & the target interface. |
60 | void mlir::NVVM::registerNVVMTargetInterfaceExternalModels( |
61 | DialectRegistry ®istry) { |
62 | registry.addExtension(extensionFn: +[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) { |
63 | NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx); |
64 | }); |
65 | } |
66 | |
67 | void mlir::NVVM::registerNVVMTargetInterfaceExternalModels( |
68 | MLIRContext &context) { |
69 | DialectRegistry registry; |
70 | registerNVVMTargetInterfaceExternalModels(registry); |
71 | context.appendDialectRegistry(registry); |
72 | } |
73 | |
74 | // Search for the CUDA toolkit path. |
75 | StringRef mlir::NVVM::getCUDAToolkitPath() { |
76 | if (const char *var = std::getenv(name: "CUDA_ROOT" )) |
77 | return var; |
78 | if (const char *var = std::getenv(name: "CUDA_HOME" )) |
79 | return var; |
80 | if (const char *var = std::getenv(name: "CUDA_PATH" )) |
81 | return var; |
82 | return __DEFAULT_CUDATOOLKIT_PATH__; |
83 | } |
84 | |
85 | SerializeGPUModuleBase::SerializeGPUModuleBase( |
86 | Operation &module, NVVMTargetAttr target, |
87 | const gpu::TargetOptions &targetOptions) |
88 | : ModuleToObject(module, target.getTriple(), target.getChip(), |
89 | target.getFeatures(), target.getO()), |
90 | target(target), toolkitPath(targetOptions.getToolkitPath()), |
91 | fileList(targetOptions.getLinkFiles()) { |
92 | |
93 | // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath` |
94 | if (toolkitPath.empty()) |
95 | toolkitPath = getCUDAToolkitPath(); |
96 | |
97 | // Append the files in the target attribute. |
98 | if (ArrayAttr files = target.getLink()) |
99 | for (Attribute attr : files.getValue()) |
100 | if (auto file = dyn_cast<StringAttr>(attr)) |
101 | fileList.push_back(file.str()); |
102 | |
103 | // Append libdevice to the files to be loaded. |
104 | (void)appendStandardLibs(); |
105 | } |
106 | |
107 | void SerializeGPUModuleBase::init() { |
108 | static llvm::once_flag initializeBackendOnce; |
109 | llvm::call_once(flag&: initializeBackendOnce, F: []() { |
110 | // If the `NVPTX` LLVM target was built, initialize it. |
111 | #if LLVM_HAS_NVPTX_TARGET |
112 | LLVMInitializeNVPTXTarget(); |
113 | LLVMInitializeNVPTXTargetInfo(); |
114 | LLVMInitializeNVPTXTargetMC(); |
115 | LLVMInitializeNVPTXAsmPrinter(); |
116 | #endif |
117 | }); |
118 | } |
119 | |
120 | NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } |
121 | |
122 | StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } |
123 | |
124 | ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const { |
125 | return fileList; |
126 | } |
127 | |
128 | // Try to append `libdevice` from a CUDA toolkit installation. |
129 | LogicalResult SerializeGPUModuleBase::appendStandardLibs() { |
130 | StringRef pathRef = getToolkitPath(); |
131 | if (!pathRef.empty()) { |
132 | SmallVector<char, 256> path; |
133 | path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end()); |
134 | pathRef = StringRef(path.data(), path.size()); |
135 | if (!llvm::sys::fs::is_directory(Path: pathRef)) { |
136 | getOperation().emitError() << "CUDA path: " << pathRef |
137 | << " does not exist or is not a directory.\n" ; |
138 | return failure(); |
139 | } |
140 | llvm::sys::path::append(path, a: "nvvm" , b: "libdevice" , c: "libdevice.10.bc" ); |
141 | pathRef = StringRef(path.data(), path.size()); |
142 | if (!llvm::sys::fs::is_regular_file(Path: pathRef)) { |
143 | getOperation().emitError() << "LibDevice path: " << pathRef |
144 | << " does not exist or is not a file.\n" ; |
145 | return failure(); |
146 | } |
147 | fileList.push_back(Elt: pathRef.str()); |
148 | } |
149 | return success(); |
150 | } |
151 | |
152 | std::optional<SmallVector<std::unique_ptr<llvm::Module>>> |
153 | SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { |
154 | SmallVector<std::unique_ptr<llvm::Module>> bcFiles; |
155 | if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, |
156 | true))) |
157 | return std::nullopt; |
158 | return std::move(bcFiles); |
159 | } |
160 | |
161 | #if MLIR_ENABLE_CUDA_CONVERSIONS |
162 | namespace { |
163 | class NVPTXSerializer : public SerializeGPUModuleBase { |
164 | public: |
165 | NVPTXSerializer(Operation &module, NVVMTargetAttr target, |
166 | const gpu::TargetOptions &targetOptions); |
167 | |
168 | gpu::GPUModuleOp getOperation(); |
169 | |
170 | // Compile PTX to cubin using `ptxas`. |
171 | std::optional<SmallVector<char, 0>> |
172 | compileToBinary(const std::string &ptxCode); |
173 | |
174 | // Compile PTX to cubin using the `nvptxcompiler` library. |
175 | std::optional<SmallVector<char, 0>> |
176 | compileToBinaryNVPTX(const std::string &ptxCode); |
177 | |
178 | std::optional<SmallVector<char, 0>> |
179 | moduleToObject(llvm::Module &llvmModule) override; |
180 | |
181 | private: |
182 | using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>; |
183 | |
184 | // Create a temp file. |
185 | std::optional<TmpFile> createTemp(StringRef name, StringRef suffix); |
186 | |
187 | // Find the `tool` path, where `tool` is the name of the binary to search, |
188 | // i.e. `ptxas` or `fatbinary`. The search order is: |
189 | // 1. The toolkit path in `targetOptions`. |
190 | // 2. In the system PATH. |
191 | // 3. The path from `getCUDAToolkitPath()`. |
192 | std::optional<std::string> findTool(StringRef tool); |
193 | |
194 | // Target options. |
195 | gpu::TargetOptions targetOptions; |
196 | }; |
197 | } // namespace |
198 | |
199 | NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target, |
200 | const gpu::TargetOptions &targetOptions) |
201 | : SerializeGPUModuleBase(module, target, targetOptions), |
202 | targetOptions(targetOptions) {} |
203 | |
204 | std::optional<NVPTXSerializer::TmpFile> |
205 | NVPTXSerializer::createTemp(StringRef name, StringRef suffix) { |
206 | llvm::SmallString<128> filename; |
207 | std::error_code ec = |
208 | llvm::sys::fs::createTemporaryFile(Prefix: name, Suffix: suffix, ResultPath&: filename); |
209 | if (ec) { |
210 | getOperation().emitError() << "Couldn't create the temp file: `" << filename |
211 | << "`, error message: " << ec.message(); |
212 | return std::nullopt; |
213 | } |
214 | return TmpFile(filename, llvm::FileRemover(filename.c_str())); |
215 | } |
216 | |
217 | gpu::GPUModuleOp NVPTXSerializer::getOperation() { |
218 | return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation()); |
219 | } |
220 | |
221 | std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) { |
222 | // Find the `tool` path. |
223 | // 1. Check the toolkit path given in the command line. |
224 | StringRef pathRef = targetOptions.getToolkitPath(); |
225 | SmallVector<char, 256> path; |
226 | if (!pathRef.empty()) { |
227 | path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end()); |
228 | llvm::sys::path::append(path, a: "bin" , b: tool); |
229 | if (llvm::sys::fs::can_execute(Path: path)) |
230 | return StringRef(path.data(), path.size()).str(); |
231 | } |
232 | |
233 | // 2. Check PATH. |
234 | if (std::optional<std::string> toolPath = |
235 | llvm::sys::Process::FindInEnvPath(EnvName: "PATH" , FileName: tool)) |
236 | return *toolPath; |
237 | |
238 | // 3. Check `getCUDAToolkitPath()`. |
239 | pathRef = getCUDAToolkitPath(); |
240 | path.clear(); |
241 | if (!pathRef.empty()) { |
242 | path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end()); |
243 | llvm::sys::path::append(path, a: "bin" , b: tool); |
244 | if (llvm::sys::fs::can_execute(Path: path)) |
245 | return StringRef(path.data(), path.size()).str(); |
246 | } |
247 | getOperation().emitError() |
248 | << "Couldn't find the `" << tool |
249 | << "` binary. Please specify the toolkit " |
250 | "path, add the compiler to $PATH, or set one of the environment " |
251 | "variables in `NVVM::getCUDAToolkitPath()`." ; |
252 | return std::nullopt; |
253 | } |
254 | |
255 | // TODO: clean this method & have a generic tool driver or never emit binaries |
256 | // with this mechanism and let another stage take care of it. |
257 | std::optional<SmallVector<char, 0>> |
258 | NVPTXSerializer::compileToBinary(const std::string &ptxCode) { |
259 | // Determine if the serializer should create a fatbinary with the PTX embeded |
260 | // or a simple CUBIN binary. |
261 | const bool createFatbin = |
262 | targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin; |
263 | |
264 | // Find the `ptxas` & `fatbinary` tools. |
265 | std::optional<std::string> ptxasCompiler = findTool(tool: "ptxas" ); |
266 | if (!ptxasCompiler) |
267 | return std::nullopt; |
268 | std::optional<std::string> fatbinaryTool = findTool(tool: "fatbinary" ); |
269 | if (createFatbin && !fatbinaryTool) |
270 | return std::nullopt; |
271 | Location loc = getOperation().getLoc(); |
272 | |
273 | // Base name for all temp files: mlir-<module name>-<target triple>-<chip>. |
274 | std::string basename = |
275 | llvm::formatv("mlir-{0}-{1}-{2}" , getOperation().getNameAttr().getValue(), |
276 | getTarget().getTriple(), getTarget().getChip()); |
277 | |
278 | // Create temp files: |
279 | std::optional<TmpFile> ptxFile = createTemp(name: basename, suffix: "ptx" ); |
280 | if (!ptxFile) |
281 | return std::nullopt; |
282 | std::optional<TmpFile> logFile = createTemp(name: basename, suffix: "log" ); |
283 | if (!logFile) |
284 | return std::nullopt; |
285 | std::optional<TmpFile> binaryFile = createTemp(name: basename, suffix: "bin" ); |
286 | if (!binaryFile) |
287 | return std::nullopt; |
288 | TmpFile cubinFile; |
289 | if (createFatbin) { |
290 | Twine cubinFilename = ptxFile->first + ".cubin" ; |
291 | cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename)); |
292 | } else { |
293 | cubinFile.first = binaryFile->first; |
294 | } |
295 | |
296 | std::error_code ec; |
297 | // Dump the PTX to a temp file. |
298 | { |
299 | llvm::raw_fd_ostream ptxStream(ptxFile->first, ec); |
300 | if (ec) { |
301 | emitError(loc) << "Couldn't open the file: `" << ptxFile->first |
302 | << "`, error message: " << ec.message(); |
303 | return std::nullopt; |
304 | } |
305 | ptxStream << ptxCode; |
306 | if (ptxStream.has_error()) { |
307 | emitError(loc) << "An error occurred while writing the PTX to: `" |
308 | << ptxFile->first << "`." ; |
309 | return std::nullopt; |
310 | } |
311 | ptxStream.flush(); |
312 | } |
313 | |
314 | // Command redirects. |
315 | std::optional<StringRef> redirects[] = { |
316 | std::nullopt, |
317 | logFile->first, |
318 | logFile->first, |
319 | }; |
320 | |
321 | // Get any extra args passed in `targetOptions`. |
322 | std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts = |
323 | targetOptions.tokenizeCmdOptions(); |
324 | |
325 | // Create ptxas args. |
326 | std::string optLevel = std::to_string(val: this->optLevel); |
327 | SmallVector<StringRef, 12> ptxasArgs( |
328 | {StringRef("ptxas" ), StringRef("-arch" ), getTarget().getChip(), |
329 | StringRef(ptxFile->first), StringRef("-o" ), StringRef(cubinFile.first), |
330 | "--opt-level" , optLevel}); |
331 | |
332 | bool useFatbin32 = false; |
333 | for (const auto *cArg : cmdOpts.second) { |
334 | // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to |
335 | // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is |
336 | // assumed. |
337 | if (StringRef arg(cArg); arg != "-32" ) |
338 | ptxasArgs.push_back(Elt: arg); |
339 | else |
340 | useFatbin32 = true; |
341 | } |
342 | |
343 | // Create the `fatbinary` args. |
344 | StringRef chip = getTarget().getChip(); |
345 | // Remove the arch prefix to obtain the compute capability. |
346 | chip.consume_front(Prefix: "sm_" ), chip.consume_front(Prefix: "compute_" ); |
347 | // Embed the cubin object. |
348 | std::string cubinArg = |
349 | llvm::formatv(Fmt: "--image3=kind=elf,sm={0},file={1}" , Vals&: chip, Vals&: cubinFile.first) |
350 | .str(); |
351 | // Embed the PTX file so the driver can JIT if needed. |
352 | std::string ptxArg = |
353 | llvm::formatv(Fmt: "--image3=kind=ptx,sm={0},file={1}" , Vals&: chip, Vals&: ptxFile->first) |
354 | .str(); |
355 | SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary" ), |
356 | useFatbin32 ? "-32" : "-64" , cubinArg, |
357 | ptxArg, "--create" , binaryFile->first}); |
358 | |
359 | // Dump tool invocation commands. |
360 | #define DEBUG_TYPE "serialize-to-binary" |
361 | LLVM_DEBUG({ |
362 | llvm::dbgs() << "Tool invocation for module: " |
363 | << getOperation().getNameAttr() << "\n" ; |
364 | llvm::interleave(ptxasArgs, llvm::dbgs(), " " ); |
365 | llvm::dbgs() << "\n" ; |
366 | if (createFatbin) { |
367 | llvm::interleave(fatbinArgs, llvm::dbgs(), " " ); |
368 | llvm::dbgs() << "\n" ; |
369 | } |
370 | }); |
371 | #undef DEBUG_TYPE |
372 | |
373 | // Helper function for printing tool error logs. |
374 | std::string message; |
375 | auto emitLogError = |
376 | [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> { |
377 | if (message.empty()) { |
378 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr = |
379 | llvm::MemoryBuffer::getFile(Filename: logFile->first); |
380 | if (toolStderr) |
381 | emitError(loc) << toolName << " invocation failed. Log:\n" |
382 | << toolStderr->get()->getBuffer(); |
383 | else |
384 | emitError(loc) << toolName << " invocation failed." ; |
385 | return std::nullopt; |
386 | } |
387 | emitError(loc) << toolName |
388 | << " invocation failed, error message: " << message; |
389 | return std::nullopt; |
390 | }; |
391 | |
392 | // Invoke PTXAS. |
393 | if (llvm::sys::ExecuteAndWait(Program: ptxasCompiler.value(), Args: ptxasArgs, |
394 | /*Env=*/std::nullopt, |
395 | /*Redirects=*/redirects, |
396 | /*SecondsToWait=*/0, |
397 | /*MemoryLimit=*/0, |
398 | /*ErrMsg=*/&message)) |
399 | return emitLogError("`ptxas`" ); |
400 | |
401 | // Invoke `fatbin`. |
402 | message.clear(); |
403 | if (createFatbin && llvm::sys::ExecuteAndWait(Program: *fatbinaryTool, Args: fatbinArgs, |
404 | /*Env=*/std::nullopt, |
405 | /*Redirects=*/redirects, |
406 | /*SecondsToWait=*/0, |
407 | /*MemoryLimit=*/0, |
408 | /*ErrMsg=*/&message)) |
409 | return emitLogError("`fatbinary`" ); |
410 | |
411 | // Dump the output of the tools, helpful if the verbose flag was passed. |
412 | #define DEBUG_TYPE "serialize-to-binary" |
413 | LLVM_DEBUG({ |
414 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer = |
415 | llvm::MemoryBuffer::getFile(logFile->first); |
416 | if (logBuffer && !(*logBuffer)->getBuffer().empty()) { |
417 | llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n" ; |
418 | llvm::dbgs().flush(); |
419 | } |
420 | }); |
421 | #undef DEBUG_TYPE |
422 | |
423 | // Read the fatbin. |
424 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer = |
425 | llvm::MemoryBuffer::getFile(Filename: binaryFile->first); |
426 | if (!binaryBuffer) { |
427 | emitError(loc) << "Couldn't open the file: `" << binaryFile->first |
428 | << "`, error message: " << binaryBuffer.getError().message(); |
429 | return std::nullopt; |
430 | } |
431 | StringRef fatbin = (*binaryBuffer)->getBuffer(); |
432 | return SmallVector<char, 0>(fatbin.begin(), fatbin.end()); |
433 | } |
434 | |
435 | #if MLIR_ENABLE_NVPTXCOMPILER |
436 | #include "nvPTXCompiler.h" |
437 | |
438 | #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \ |
439 | do { \ |
440 | if (auto status = (expr)) { \ |
441 | emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \ |
442 | << status; \ |
443 | return std::nullopt; \ |
444 | } \ |
445 | } while (false) |
446 | |
447 | std::optional<SmallVector<char, 0>> |
448 | NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) { |
449 | Location loc = getOperation().getLoc(); |
450 | nvPTXCompilerHandle compiler = nullptr; |
451 | nvPTXCompileResult status; |
452 | size_t logSize; |
453 | |
454 | // Create the options. |
455 | std::string optLevel = std::to_string(this->optLevel); |
456 | std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts = |
457 | targetOptions.tokenizeCmdOptions(); |
458 | cmdOpts.second.append( |
459 | {"-arch" , getTarget().getChip().data(), "--opt-level" , optLevel.c_str()}); |
460 | |
461 | // Create the compiler handle. |
462 | RETURN_ON_NVPTXCOMPILER_ERROR( |
463 | nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str())); |
464 | |
465 | // Try to compile the binary. |
466 | status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(), |
467 | cmdOpts.second.data()); |
468 | |
469 | // Check if compilation failed. |
470 | if (status != NVPTXCOMPILE_SUCCESS) { |
471 | RETURN_ON_NVPTXCOMPILER_ERROR( |
472 | nvPTXCompilerGetErrorLogSize(compiler, &logSize)); |
473 | if (logSize != 0) { |
474 | SmallVector<char> log(logSize + 1, 0); |
475 | RETURN_ON_NVPTXCOMPILER_ERROR( |
476 | nvPTXCompilerGetErrorLog(compiler, log.data())); |
477 | emitError(loc) << "NVPTX compiler invocation failed, error log: " |
478 | << log.data(); |
479 | } else |
480 | emitError(loc) << "NVPTX compiler invocation failed with error code: " |
481 | << status; |
482 | return std::nullopt; |
483 | } |
484 | |
485 | // Retrieve the binary. |
486 | size_t elfSize; |
487 | RETURN_ON_NVPTXCOMPILER_ERROR( |
488 | nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize)); |
489 | SmallVector<char, 0> binary(elfSize, 0); |
490 | RETURN_ON_NVPTXCOMPILER_ERROR( |
491 | nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data())); |
492 | |
493 | // Dump the log of the compiler, helpful if the verbose flag was passed. |
494 | #define DEBUG_TYPE "serialize-to-binary" |
495 | LLVM_DEBUG({ |
496 | RETURN_ON_NVPTXCOMPILER_ERROR( |
497 | nvPTXCompilerGetInfoLogSize(compiler, &logSize)); |
498 | if (logSize != 0) { |
499 | SmallVector<char> log(logSize + 1, 0); |
500 | RETURN_ON_NVPTXCOMPILER_ERROR( |
501 | nvPTXCompilerGetInfoLog(compiler, log.data())); |
502 | llvm::dbgs() << "NVPTX compiler invocation for module: " |
503 | << getOperation().getNameAttr() << "\n" ; |
504 | llvm::dbgs() << "Arguments: " ; |
505 | llvm::interleave(cmdOpts.second, llvm::dbgs(), " " ); |
506 | llvm::dbgs() << "\nOutput\n" << log.data() << "\n" ; |
507 | llvm::dbgs().flush(); |
508 | } |
509 | }); |
510 | #undef DEBUG_TYPE |
511 | RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler)); |
512 | return binary; |
513 | } |
514 | #endif // MLIR_ENABLE_NVPTXCOMPILER |
515 | |
516 | std::optional<SmallVector<char, 0>> |
517 | NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) { |
518 | // Return LLVM IR if the compilation target is offload. |
519 | #define DEBUG_TYPE "serialize-to-llvm" |
520 | LLVM_DEBUG({ |
521 | llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() |
522 | << "\n" ; |
523 | llvm::dbgs() << llvmModule << "\n" ; |
524 | llvm::dbgs().flush(); |
525 | }); |
526 | #undef DEBUG_TYPE |
527 | if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) |
528 | return SerializeGPUModuleBase::moduleToObject(llvmModule); |
529 | |
530 | // Emit PTX code. |
531 | std::optional<llvm::TargetMachine *> targetMachine = |
532 | getOrCreateTargetMachine(); |
533 | if (!targetMachine) { |
534 | getOperation().emitError() << "Target Machine unavailable for triple " |
535 | << triple << ", can't optimize with LLVM\n" ; |
536 | return std::nullopt; |
537 | } |
538 | std::optional<std::string> serializedISA = |
539 | translateToISA(llvmModule, targetMachine&: **targetMachine); |
540 | if (!serializedISA) { |
541 | getOperation().emitError() << "Failed translating the module to ISA." ; |
542 | return std::nullopt; |
543 | } |
544 | #define DEBUG_TYPE "serialize-to-isa" |
545 | LLVM_DEBUG({ |
546 | llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n" ; |
547 | llvm::dbgs() << *serializedISA << "\n" ; |
548 | llvm::dbgs().flush(); |
549 | }); |
550 | #undef DEBUG_TYPE |
551 | |
552 | // Return PTX if the compilation target is assembly. |
553 | if (targetOptions.getCompilationTarget() == |
554 | gpu::CompilationTarget::Assembly) { |
555 | // Make sure to include the null terminator. |
556 | StringRef bin(serializedISA->c_str(), serializedISA->size() + 1); |
557 | return SmallVector<char, 0>(bin.begin(), bin.end()); |
558 | } |
559 | |
560 | // Compile to binary. |
561 | #if MLIR_ENABLE_NVPTXCOMPILER |
562 | return compileToBinaryNVPTX(*serializedISA); |
563 | #else |
564 | return compileToBinary(ptxCode: *serializedISA); |
565 | #endif // MLIR_ENABLE_NVPTXCOMPILER |
566 | } |
567 | #endif // MLIR_ENABLE_CUDA_CONVERSIONS |
568 | |
569 | std::optional<SmallVector<char, 0>> |
570 | NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module, |
571 | const gpu::TargetOptions &options) const { |
572 | assert(module && "The module must be non null." ); |
573 | if (!module) |
574 | return std::nullopt; |
575 | if (!mlir::isa<gpu::GPUModuleOp>(module)) { |
576 | module->emitError(message: "Module must be a GPU module." ); |
577 | return std::nullopt; |
578 | } |
579 | #if MLIR_ENABLE_CUDA_CONVERSIONS |
580 | NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options); |
581 | serializer.init(); |
582 | return serializer.run(); |
583 | #else |
584 | module->emitError( |
585 | "The `NVPTX` target was not built. Please enable it when building LLVM." ); |
586 | return std::nullopt; |
587 | #endif // MLIR_ENABLE_CUDA_CONVERSIONS |
588 | } |
589 | |
590 | Attribute |
591 | NVVMTargetAttrImpl::createObject(Attribute attribute, |
592 | const SmallVector<char, 0> &object, |
593 | const gpu::TargetOptions &options) const { |
594 | auto target = cast<NVVMTargetAttr>(attribute); |
595 | gpu::CompilationTarget format = options.getCompilationTarget(); |
596 | DictionaryAttr objectProps; |
597 | Builder builder(attribute.getContext()); |
598 | if (format == gpu::CompilationTarget::Assembly) |
599 | objectProps = builder.getDictionaryAttr( |
600 | value: {builder.getNamedAttr(name: "O" , val: builder.getI32IntegerAttr(value: target.getO()))}); |
601 | return builder.getAttr<gpu::ObjectAttr>( |
602 | attribute, format, |
603 | builder.getStringAttr(StringRef(object.data(), object.size())), |
604 | objectProps); |
605 | } |
606 | |