| 1 | //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This files defines ROCDL target related functions including registration |
| 10 | // calls for the `#rocdl.target` compilation attribute. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "mlir/Target/LLVM/ROCDL/Target.h" |
| 15 | |
| 16 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| 17 | #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" |
| 18 | #include "mlir/Support/FileUtilities.h" |
| 19 | #include "mlir/Target/LLVM/ROCDL/Utils.h" |
| 20 | #include "mlir/Target/LLVMIR/Export.h" |
| 21 | |
| 22 | #include "llvm/IR/Constants.h" |
| 23 | #include "llvm/MC/MCAsmBackend.h" |
| 24 | #include "llvm/MC/MCAsmInfo.h" |
| 25 | #include "llvm/MC/MCCodeEmitter.h" |
| 26 | #include "llvm/MC/MCContext.h" |
| 27 | #include "llvm/MC/MCInstrInfo.h" |
| 28 | #include "llvm/MC/MCObjectFileInfo.h" |
| 29 | #include "llvm/MC/MCObjectWriter.h" |
| 30 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
| 31 | #include "llvm/MC/MCRegisterInfo.h" |
| 32 | #include "llvm/MC/MCStreamer.h" |
| 33 | #include "llvm/MC/MCSubtargetInfo.h" |
| 34 | #include "llvm/MC/TargetRegistry.h" |
| 35 | #include "llvm/Support/FileSystem.h" |
| 36 | #include "llvm/Support/FileUtilities.h" |
| 37 | #include "llvm/Support/Path.h" |
| 38 | #include "llvm/Support/Program.h" |
| 39 | #include "llvm/Support/SourceMgr.h" |
| 40 | #include "llvm/Support/TargetSelect.h" |
| 41 | #include "llvm/TargetParser/TargetParser.h" |
| 42 | |
| 43 | #include <cstdlib> |
| 44 | #include <optional> |
| 45 | |
| 46 | using namespace mlir; |
| 47 | using namespace mlir::ROCDL; |
| 48 | |
| 49 | #ifndef __DEFAULT_ROCM_PATH__ |
| 50 | #define __DEFAULT_ROCM_PATH__ "" |
| 51 | #endif |
| 52 | |
| 53 | namespace { |
| 54 | // Implementation of the `TargetAttrInterface` model. |
| 55 | class ROCDLTargetAttrImpl |
| 56 | : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> { |
| 57 | public: |
| 58 | std::optional<SmallVector<char, 0>> |
| 59 | serializeToObject(Attribute attribute, Operation *module, |
| 60 | const gpu::TargetOptions &options) const; |
| 61 | |
| 62 | Attribute createObject(Attribute attribute, Operation *module, |
| 63 | const SmallVector<char, 0> &object, |
| 64 | const gpu::TargetOptions &options) const; |
| 65 | }; |
| 66 | } // namespace |
| 67 | |
| 68 | // Register the ROCDL dialect, the ROCDL translation and the target interface. |
| 69 | void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( |
| 70 | DialectRegistry ®istry) { |
| 71 | registry.addExtension(extensionFn: +[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { |
| 72 | ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx); |
| 73 | }); |
| 74 | } |
| 75 | |
| 76 | void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( |
| 77 | MLIRContext &context) { |
| 78 | DialectRegistry registry; |
| 79 | registerROCDLTargetInterfaceExternalModels(registry); |
| 80 | context.appendDialectRegistry(registry); |
| 81 | } |
| 82 | |
| 83 | // Search for the ROCM path. |
| 84 | StringRef mlir::ROCDL::getROCMPath() { |
| 85 | if (const char *var = std::getenv(name: "ROCM_PATH" )) |
| 86 | return var; |
| 87 | if (const char *var = std::getenv(name: "ROCM_ROOT" )) |
| 88 | return var; |
| 89 | if (const char *var = std::getenv(name: "ROCM_HOME" )) |
| 90 | return var; |
| 91 | return __DEFAULT_ROCM_PATH__; |
| 92 | } |
| 93 | |
| 94 | SerializeGPUModuleBase::SerializeGPUModuleBase( |
| 95 | Operation &module, ROCDLTargetAttr target, |
| 96 | const gpu::TargetOptions &targetOptions) |
| 97 | : ModuleToObject(module, target.getTriple(), target.getChip(), |
| 98 | target.getFeatures(), target.getO()), |
| 99 | target(target), toolkitPath(targetOptions.getToolkitPath()), |
| 100 | librariesToLink(targetOptions.getLibrariesToLink()) { |
| 101 | |
| 102 | // If `targetOptions` has an empty toolkitPath use `getROCMPath` |
| 103 | if (toolkitPath.empty()) |
| 104 | toolkitPath = getROCMPath(); |
| 105 | |
| 106 | // Append the files in the target attribute. |
| 107 | if (target.getLink()) |
| 108 | librariesToLink.append(target.getLink().begin(), target.getLink().end()); |
| 109 | } |
| 110 | |
| 111 | void SerializeGPUModuleBase::init() { |
| 112 | static llvm::once_flag initializeBackendOnce; |
| 113 | llvm::call_once(flag&: initializeBackendOnce, F: []() { |
| 114 | // If the `AMDGPU` LLVM target was built, initialize it. |
| 115 | #if MLIR_ENABLE_ROCM_CONVERSIONS |
| 116 | LLVMInitializeAMDGPUTarget(); |
| 117 | LLVMInitializeAMDGPUTargetInfo(); |
| 118 | LLVMInitializeAMDGPUTargetMC(); |
| 119 | LLVMInitializeAMDGPUAsmParser(); |
| 120 | LLVMInitializeAMDGPUAsmPrinter(); |
| 121 | #endif |
| 122 | }); |
| 123 | } |
| 124 | |
| 125 | ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } |
| 126 | |
| 127 | StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } |
| 128 | |
| 129 | ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const { |
| 130 | return librariesToLink; |
| 131 | } |
| 132 | |
| 133 | LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { |
| 134 | if (libs == AMDGCNLibraries::None) |
| 135 | return success(); |
| 136 | StringRef pathRef = getToolkitPath(); |
| 137 | |
| 138 | // Get the path for the device libraries |
| 139 | SmallString<256> path; |
| 140 | path.insert(I: path.begin(), From: pathRef.begin(), To: pathRef.end()); |
| 141 | llvm::sys::path::append(path, a: "amdgcn" , b: "bitcode" ); |
| 142 | pathRef = StringRef(path.data(), path.size()); |
| 143 | |
| 144 | // Fail if the path is invalid. |
| 145 | if (!llvm::sys::fs::is_directory(Path: pathRef)) { |
| 146 | getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef |
| 147 | << " does not exist or is not a directory" ; |
| 148 | return failure(); |
| 149 | } |
| 150 | |
| 151 | // Helper function for adding a library. |
| 152 | auto addLib = [&](const Twine &lib) -> bool { |
| 153 | auto baseSize = path.size(); |
| 154 | llvm::sys::path::append(path, a: lib); |
| 155 | StringRef pathRef(path.data(), path.size()); |
| 156 | if (!llvm::sys::fs::is_regular_file(Path: pathRef)) { |
| 157 | getOperation().emitRemark() << "bitcode library path: " << pathRef |
| 158 | << " does not exist or is not a file" ; |
| 159 | return true; |
| 160 | } |
| 161 | librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); |
| 162 | path.truncate(N: baseSize); |
| 163 | return false; |
| 164 | }; |
| 165 | |
| 166 | // Add ROCm device libraries. Fail if any of the libraries is not found, ie. |
| 167 | // if any of the `addLib` failed. |
| 168 | if ((any(Val: libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc" )) || |
| 169 | (any(Val: libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc" )) || |
| 170 | (any(Val: libs & AMDGCNLibraries::Hip) && addLib("hip.bc" )) || |
| 171 | (any(Val: libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc" ))) |
| 172 | return failure(); |
| 173 | return success(); |
| 174 | } |
| 175 | |
| 176 | std::optional<SmallVector<std::unique_ptr<llvm::Module>>> |
| 177 | SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { |
| 178 | // Return if there are no libs to load. |
| 179 | if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty()) |
| 180 | return SmallVector<std::unique_ptr<llvm::Module>>(); |
| 181 | if (failed(Result: appendStandardLibs(libs: deviceLibs))) |
| 182 | return std::nullopt; |
| 183 | SmallVector<std::unique_ptr<llvm::Module>> bcFiles; |
| 184 | if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink, |
| 185 | bcFiles, true))) |
| 186 | return std::nullopt; |
| 187 | return std::move(bcFiles); |
| 188 | } |
| 189 | |
| 190 | LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { |
| 191 | // Some ROCM builds don't strip this like they should |
| 192 | if (auto *openclVersion = module.getNamedMetadata(Name: "opencl.ocl.version" )) |
| 193 | module.eraseNamedMetadata(NMD: openclVersion); |
| 194 | // Stop spamming us with clang version numbers |
| 195 | if (auto *ident = module.getNamedMetadata(Name: "llvm.ident" )) |
| 196 | module.eraseNamedMetadata(NMD: ident); |
| 197 | // Override the libModules datalayout and target triple with the compiler's |
| 198 | // data layout should there be a discrepency. |
| 199 | setDataLayoutAndTriple(module); |
| 200 | return success(); |
| 201 | } |
| 202 | |
| 203 | void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { |
| 204 | // If all libraries are not set, traverse the module to determine which |
| 205 | // libraries are required. |
| 206 | if (deviceLibs != AMDGCNLibraries::All) { |
| 207 | for (llvm::Function &f : module.functions()) { |
| 208 | if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { |
| 209 | StringRef funcName = f.getName(); |
| 210 | if ("printf" == funcName) |
| 211 | deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | |
| 212 | AMDGCNLibraries::Ocml; |
| 213 | if (funcName.starts_with(Prefix: "__ockl_" )) |
| 214 | deviceLibs |= AMDGCNLibraries::Ockl; |
| 215 | if (funcName.starts_with(Prefix: "__ocml_" )) |
| 216 | deviceLibs |= AMDGCNLibraries::Ocml; |
| 217 | if (funcName == "__atomic_work_item_fence" ) |
| 218 | deviceLibs |= AMDGCNLibraries::Hip; |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), |
| 223 | target.hasFiniteOnly(), target.hasUnsafeMath(), |
| 224 | target.hasFastMath(), target.hasCorrectSqrt(), |
| 225 | target.getAbi()); |
| 226 | } |
| 227 | |
| 228 | void SerializeGPUModuleBase::addControlVariables( |
| 229 | llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, |
| 230 | bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, |
| 231 | StringRef abiVer) { |
| 232 | // Helper function for adding control variables. |
| 233 | auto addControlVariable = [&module](StringRef name, uint32_t value, |
| 234 | uint32_t bitwidth) { |
| 235 | if (module.getNamedGlobal(Name: name)) |
| 236 | return; |
| 237 | llvm::IntegerType *type = |
| 238 | llvm::IntegerType::getIntNTy(C&: module.getContext(), N: bitwidth); |
| 239 | llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( |
| 240 | module, /*isConstant=*/type, true, |
| 241 | llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, |
| 242 | llvm::ConstantInt::get(Ty: type, V: value), name, /*before=*/nullptr, |
| 243 | /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, |
| 244 | /*addressSpace=*/4); |
| 245 | controlVariable->setVisibility( |
| 246 | llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); |
| 247 | controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); |
| 248 | controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); |
| 249 | }; |
| 250 | |
| 251 | // Note that COV6 requires ROCm 6.3+. |
| 252 | int abi = 600; |
| 253 | abiVer.getAsInteger(Radix: 0, Result&: abi); |
| 254 | module.addModuleFlag(Behavior: llvm::Module::Error, Key: "amdhsa_code_object_version" , Val: abi); |
| 255 | // Return if no device libraries are required. |
| 256 | if (libs == AMDGCNLibraries::None) |
| 257 | return; |
| 258 | // Add ocml related control variables. |
| 259 | if (any(Val: libs & AMDGCNLibraries::Ocml)) { |
| 260 | addControlVariable("__oclc_finite_only_opt" , finiteOnly || fastMath, 8); |
| 261 | addControlVariable("__oclc_daz_opt" , daz || fastMath, 8); |
| 262 | addControlVariable("__oclc_correctly_rounded_sqrt32" , |
| 263 | correctSqrt && !fastMath, 8); |
| 264 | addControlVariable("__oclc_unsafe_math_opt" , unsafeMath || fastMath, 8); |
| 265 | } |
| 266 | // Add ocml or ockl related control variables. |
| 267 | if (any(Val: libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { |
| 268 | addControlVariable("__oclc_wavefrontsize64" , wave64, 8); |
| 269 | // Get the ISA version. |
| 270 | llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(GPU: chip); |
| 271 | // Add the ISA control variable. |
| 272 | addControlVariable("__oclc_ISA_version" , |
| 273 | isaVersion.Minor + 100 * isaVersion.Stepping + |
| 274 | 1000 * isaVersion.Major, |
| 275 | 32); |
| 276 | addControlVariable("__oclc_ABI_version" , abi, 32); |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | std::optional<SmallVector<char, 0>> |
| 281 | SerializeGPUModuleBase::assembleIsa(StringRef isa) { |
| 282 | auto loc = getOperation().getLoc(); |
| 283 | |
| 284 | StringRef targetTriple = this->triple; |
| 285 | |
| 286 | SmallVector<char, 0> result; |
| 287 | llvm::raw_svector_ostream os(result); |
| 288 | |
| 289 | llvm::Triple triple(llvm::Triple::normalize(Str: targetTriple)); |
| 290 | std::string error; |
| 291 | const llvm::Target *target = |
| 292 | llvm::TargetRegistry::lookupTarget(TripleStr: triple.normalize(), Error&: error); |
| 293 | if (!target) { |
| 294 | emitError(loc, Twine("failed to lookup target: " ) + error); |
| 295 | return std::nullopt; |
| 296 | } |
| 297 | |
| 298 | llvm::SourceMgr srcMgr; |
| 299 | srcMgr.AddNewSourceBuffer(F: llvm::MemoryBuffer::getMemBuffer(InputData: isa), IncludeLoc: SMLoc()); |
| 300 | |
| 301 | const llvm::MCTargetOptions mcOptions; |
| 302 | std::unique_ptr<llvm::MCRegisterInfo> mri( |
| 303 | target->createMCRegInfo(TT: targetTriple)); |
| 304 | std::unique_ptr<llvm::MCAsmInfo> mai( |
| 305 | target->createMCAsmInfo(MRI: *mri, TheTriple: targetTriple, Options: mcOptions)); |
| 306 | std::unique_ptr<llvm::MCSubtargetInfo> sti( |
| 307 | target->createMCSubtargetInfo(TheTriple: targetTriple, CPU: chip, Features: features)); |
| 308 | |
| 309 | llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, |
| 310 | &mcOptions); |
| 311 | std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( |
| 312 | Ctx&: ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); |
| 313 | ctx.setObjectFileInfo(mofi.get()); |
| 314 | |
| 315 | SmallString<128> cwd; |
| 316 | if (!llvm::sys::fs::current_path(result&: cwd)) |
| 317 | ctx.setCompilationDir(cwd); |
| 318 | |
| 319 | std::unique_ptr<llvm::MCStreamer> mcStreamer; |
| 320 | std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); |
| 321 | |
| 322 | llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(II: *mcii, Ctx&: ctx); |
| 323 | llvm::MCAsmBackend *mab = target->createMCAsmBackend(STI: *sti, MRI: *mri, Options: mcOptions); |
| 324 | mcStreamer.reset(p: target->createMCObjectStreamer( |
| 325 | T: triple, Ctx&: ctx, TAB: std::unique_ptr<llvm::MCAsmBackend>(mab), |
| 326 | OW: mab->createObjectWriter(OS&: os), Emitter: std::unique_ptr<llvm::MCCodeEmitter>(ce), |
| 327 | STI: *sti)); |
| 328 | |
| 329 | std::unique_ptr<llvm::MCAsmParser> parser( |
| 330 | createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); |
| 331 | std::unique_ptr<llvm::MCTargetAsmParser> tap( |
| 332 | target->createMCAsmParser(STI: *sti, Parser&: *parser, MII: *mcii, Options: mcOptions)); |
| 333 | |
| 334 | if (!tap) { |
| 335 | emitError(loc, "assembler initialization error" ); |
| 336 | return std::nullopt; |
| 337 | } |
| 338 | |
| 339 | parser->setTargetParser(*tap); |
| 340 | parser->Run(NoInitialTextSection: false); |
| 341 | return std::move(result); |
| 342 | } |
| 343 | |
| 344 | std::optional<SmallVector<char, 0>> |
| 345 | SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { |
| 346 | // Assemble the ISA. |
| 347 | std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(isa: serializedISA); |
| 348 | |
| 349 | if (!isaBinary) { |
| 350 | getOperation().emitError() << "failed during ISA assembling" ; |
| 351 | return std::nullopt; |
| 352 | } |
| 353 | |
| 354 | // Save the ISA binary to a temp file. |
| 355 | int tempIsaBinaryFd = -1; |
| 356 | SmallString<128> tempIsaBinaryFilename; |
| 357 | if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel%%" , Suffix: "o" , ResultFD&: tempIsaBinaryFd, |
| 358 | ResultPath&: tempIsaBinaryFilename)) { |
| 359 | getOperation().emitError() |
| 360 | << "failed to create a temporary file for dumping the ISA binary" ; |
| 361 | return std::nullopt; |
| 362 | } |
| 363 | llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); |
| 364 | { |
| 365 | llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); |
| 366 | tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size()); |
| 367 | tempIsaBinaryOs.flush(); |
| 368 | } |
| 369 | |
| 370 | // Create a temp file for HSA code object. |
| 371 | SmallString<128> tempHsacoFilename; |
| 372 | if (llvm::sys::fs::createTemporaryFile(Prefix: "kernel" , Suffix: "hsaco" , |
| 373 | ResultPath&: tempHsacoFilename)) { |
| 374 | getOperation().emitError() |
| 375 | << "failed to create a temporary file for the HSA code object" ; |
| 376 | return std::nullopt; |
| 377 | } |
| 378 | llvm::FileRemover cleanupHsaco(tempHsacoFilename); |
| 379 | |
| 380 | llvm::SmallString<128> lldPath(toolkitPath); |
| 381 | llvm::sys::path::append(path&: lldPath, a: "llvm" , b: "bin" , c: "ld.lld" ); |
| 382 | int lldResult = llvm::sys::ExecuteAndWait( |
| 383 | Program: lldPath, |
| 384 | Args: {"ld.lld" , "-shared" , tempIsaBinaryFilename, "-o" , tempHsacoFilename}); |
| 385 | if (lldResult != 0) { |
| 386 | getOperation().emitError() << "lld invocation failed" ; |
| 387 | return std::nullopt; |
| 388 | } |
| 389 | |
| 390 | // Load the HSA code object. |
| 391 | auto hsacoFile = |
| 392 | llvm::MemoryBuffer::getFile(Filename: tempHsacoFilename, /*IsText=*/false); |
| 393 | if (!hsacoFile) { |
| 394 | getOperation().emitError() |
| 395 | << "failed to read the HSA code object from the temp file" ; |
| 396 | return std::nullopt; |
| 397 | } |
| 398 | |
| 399 | StringRef buffer = (*hsacoFile)->getBuffer(); |
| 400 | |
| 401 | return SmallVector<char, 0>(buffer.begin(), buffer.end()); |
| 402 | } |
| 403 | |
| 404 | std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl( |
| 405 | const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { |
| 406 | // Return LLVM IR if the compilation target is offload. |
| 407 | #define DEBUG_TYPE "serialize-to-llvm" |
| 408 | LLVM_DEBUG({ |
| 409 | llvm::dbgs() << "LLVM IR for module: " |
| 410 | << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" |
| 411 | << llvmModule << "\n" ; |
| 412 | }); |
| 413 | #undef DEBUG_TYPE |
| 414 | if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) |
| 415 | return SerializeGPUModuleBase::moduleToObject(llvmModule); |
| 416 | |
| 417 | std::optional<llvm::TargetMachine *> targetMachine = |
| 418 | getOrCreateTargetMachine(); |
| 419 | if (!targetMachine) { |
| 420 | getOperation().emitError() << "target Machine unavailable for triple " |
| 421 | << triple << ", can't compile with LLVM" ; |
| 422 | return std::nullopt; |
| 423 | } |
| 424 | |
| 425 | // Translate the Module to ISA. |
| 426 | std::optional<std::string> serializedISA = |
| 427 | translateToISA(llvmModule, targetMachine&: **targetMachine); |
| 428 | if (!serializedISA) { |
| 429 | getOperation().emitError() << "failed translating the module to ISA" ; |
| 430 | return std::nullopt; |
| 431 | } |
| 432 | #define DEBUG_TYPE "serialize-to-isa" |
| 433 | LLVM_DEBUG({ |
| 434 | llvm::dbgs() << "ISA for module: " |
| 435 | << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" |
| 436 | << *serializedISA << "\n" ; |
| 437 | }); |
| 438 | #undef DEBUG_TYPE |
| 439 | // Return ISA assembly code if the compilation target is assembly. |
| 440 | if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) |
| 441 | return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end()); |
| 442 | |
| 443 | // Compiling to binary requires a valid ROCm path, fail if it's not found. |
| 444 | if (getToolkitPath().empty()) { |
| 445 | getOperation().emitError() << "invalid ROCm path, please set a valid path" ; |
| 446 | return std::nullopt; |
| 447 | } |
| 448 | |
| 449 | // Compile to binary. |
| 450 | return compileToBinary(serializedISA: *serializedISA); |
| 451 | } |
| 452 | |
| 453 | #if MLIR_ENABLE_ROCM_CONVERSIONS |
| 454 | namespace { |
| 455 | class AMDGPUSerializer : public SerializeGPUModuleBase { |
| 456 | public: |
| 457 | AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, |
| 458 | const gpu::TargetOptions &targetOptions); |
| 459 | |
| 460 | std::optional<SmallVector<char, 0>> |
| 461 | moduleToObject(llvm::Module &llvmModule) override; |
| 462 | |
| 463 | private: |
| 464 | // Target options. |
| 465 | gpu::TargetOptions targetOptions; |
| 466 | }; |
| 467 | } // namespace |
| 468 | |
| 469 | AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, |
| 470 | const gpu::TargetOptions &targetOptions) |
| 471 | : SerializeGPUModuleBase(module, target, targetOptions), |
| 472 | targetOptions(targetOptions) {} |
| 473 | |
| 474 | std::optional<SmallVector<char, 0>> |
| 475 | AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { |
| 476 | return moduleToObjectImpl(targetOptions, llvmModule); |
| 477 | } |
| 478 | #endif // MLIR_ENABLE_ROCM_CONVERSIONS |
| 479 | |
| 480 | std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( |
| 481 | Attribute attribute, Operation *module, |
| 482 | const gpu::TargetOptions &options) const { |
| 483 | assert(module && "The module must be non null." ); |
| 484 | if (!module) |
| 485 | return std::nullopt; |
| 486 | if (!mlir::isa<gpu::GPUModuleOp>(module)) { |
| 487 | module->emitError(message: "module must be a GPU module" ); |
| 488 | return std::nullopt; |
| 489 | } |
| 490 | #if MLIR_ENABLE_ROCM_CONVERSIONS |
| 491 | AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute), |
| 492 | options); |
| 493 | serializer.init(); |
| 494 | return serializer.run(); |
| 495 | #else |
| 496 | module->emitError("the `AMDGPU` target was not built. Please enable it when " |
| 497 | "building LLVM" ); |
| 498 | return std::nullopt; |
| 499 | #endif // MLIR_ENABLE_ROCM_CONVERSIONS |
| 500 | } |
| 501 | |
| 502 | Attribute |
| 503 | ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, |
| 504 | const SmallVector<char, 0> &object, |
| 505 | const gpu::TargetOptions &options) const { |
| 506 | gpu::CompilationTarget format = options.getCompilationTarget(); |
| 507 | // If format is `fatbin` transform it to binary as `fatbin` is not yet |
| 508 | // supported. |
| 509 | gpu::KernelTableAttr kernels; |
| 510 | if (format > gpu::CompilationTarget::Binary) { |
| 511 | format = gpu::CompilationTarget::Binary; |
| 512 | kernels = ROCDL::getKernelMetadata(module, object); |
| 513 | } |
| 514 | DictionaryAttr properties{}; |
| 515 | Builder builder(attribute.getContext()); |
| 516 | StringAttr objectStr = |
| 517 | builder.getStringAttr(StringRef(object.data(), object.size())); |
| 518 | return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr, |
| 519 | properties, kernels); |
| 520 | } |
| 521 | |