| 1 | //===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This files defines ROCDL target related utility classes and functions. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "mlir/Target/LLVM/ROCDL/Utils.h" |
| 14 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| 15 | |
| 16 | #include "llvm/ADT/StringMap.h" |
| 17 | #include "llvm/Frontend/Offloading/Utility.h" |
| 18 | |
| 19 | using namespace mlir; |
| 20 | using namespace mlir::ROCDL; |
| 21 | |
| 22 | std::optional<DenseMap<StringAttr, NamedAttrList>> |
| 23 | mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, |
| 24 | ArrayRef<char> elfData) { |
| 25 | uint16_t elfABIVersion; |
| 26 | llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels; |
| 27 | llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), |
| 28 | "buffer" ); |
| 29 | // Get the metadata. |
| 30 | llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( |
| 31 | MemBuffer: buffer, KernelInfoMap&: kernels, ELFABIVersion&: elfABIVersion); |
| 32 | // Return `nullopt` if the metadata couldn't be retrieved. |
| 33 | if (error) { |
| 34 | llvm::consumeError(Err: std::move(error)); |
| 35 | return std::nullopt; |
| 36 | } |
| 37 | // Helper lambda for converting values. |
| 38 | auto getI32Array = [&builder](const uint32_t *array) { |
| 39 | return builder.getDenseI32ArrayAttr(values: {static_cast<int32_t>(array[0]), |
| 40 | static_cast<int32_t>(array[1]), |
| 41 | static_cast<int32_t>(array[2])}); |
| 42 | }; |
| 43 | DenseMap<StringAttr, NamedAttrList> kernelMD; |
| 44 | for (const auto &[name, kernel] : kernels) { |
| 45 | NamedAttrList attrs; |
| 46 | // Add kernel metadata. |
| 47 | attrs.append(name: "agpr_count" , attr: builder.getI64IntegerAttr(value: kernel.AGPRCount)); |
| 48 | attrs.append(name: "sgpr_count" , attr: builder.getI64IntegerAttr(value: kernel.SGPRCount)); |
| 49 | attrs.append(name: "vgpr_count" , attr: builder.getI64IntegerAttr(value: kernel.VGPRCount)); |
| 50 | attrs.append(name: "sgpr_spill_count" , |
| 51 | attr: builder.getI64IntegerAttr(value: kernel.SGPRSpillCount)); |
| 52 | attrs.append(name: "vgpr_spill_count" , |
| 53 | attr: builder.getI64IntegerAttr(value: kernel.VGPRSpillCount)); |
| 54 | attrs.append(name: "wavefront_size" , |
| 55 | attr: builder.getI64IntegerAttr(value: kernel.WavefrontSize)); |
| 56 | attrs.append(name: "max_flat_workgroup_size" , |
| 57 | attr: builder.getI64IntegerAttr(value: kernel.MaxFlatWorkgroupSize)); |
| 58 | attrs.append(name: "group_segment_fixed_size" , |
| 59 | attr: builder.getI64IntegerAttr(value: kernel.GroupSegmentList)); |
| 60 | attrs.append(name: "private_segment_fixed_size" , |
| 61 | attr: builder.getI64IntegerAttr(value: kernel.PrivateSegmentSize)); |
| 62 | attrs.append(name: "reqd_workgroup_size" , |
| 63 | attr: getI32Array(kernel.RequestedWorkgroupSize)); |
| 64 | attrs.append(name: "workgroup_size_hint" , attr: getI32Array(kernel.WorkgroupSizeHint)); |
| 65 | kernelMD[builder.getStringAttr(bytes: name)] = std::move(attrs); |
| 66 | } |
| 67 | return std::move(kernelMD); |
| 68 | } |
| 69 | |
| 70 | gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, |
| 71 | ArrayRef<char> elfData) { |
| 72 | auto module = cast<gpu::GPUModuleOp>(Val: gpuModule); |
| 73 | Builder builder(module.getContext()); |
| 74 | SmallVector<gpu::KernelMetadataAttr> kernels; |
| 75 | std::optional<DenseMap<StringAttr, NamedAttrList>> mdMapOrNull = |
| 76 | getAMDHSAKernelsELFMetadata(builder, elfData); |
| 77 | for (auto funcOp : module.getBody()->getOps<LLVM::LLVMFuncOp>()) { |
| 78 | if (!funcOp->getDiscardableAttr(name: "rocdl.kernel" )) |
| 79 | continue; |
| 80 | kernels.push_back(Elt: gpu::KernelMetadataAttr::get( |
| 81 | kernel: funcOp, metadata: mdMapOrNull ? builder.getDictionaryAttr( |
| 82 | value: mdMapOrNull->lookup(Val: funcOp.getNameAttr())) |
| 83 | : nullptr)); |
| 84 | } |
| 85 | return gpu::KernelTableAttr::get(context: gpuModule->getContext(), kernels); |
| 86 | } |
| 87 | |