| 1 | //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
| 9 | #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
| 10 | |
| 11 | #include "mlir/Conversion/LLVMCommon/Pattern.h" |
| 12 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| 13 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| 14 | |
| 15 | namespace mlir { |
| 16 | |
| 17 | //===----------------------------------------------------------------------===// |
| 18 | // Helper Functions |
| 19 | //===----------------------------------------------------------------------===// |
| 20 | |
| 21 | /// Find or create an external function declaration in the given module. |
| 22 | LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, |
| 23 | OpBuilder &b, StringRef name, |
| 24 | LLVM::LLVMFunctionType type); |
| 25 | |
| 26 | /// Create a global that contains the given string. If a global with the same |
| 27 | /// string already exists in the module, return that global. |
| 28 | LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, |
| 29 | gpu::GPUModuleOp moduleOp, Type llvmI8, |
| 30 | StringRef namePrefix, StringRef str, |
| 31 | uint64_t alignment = 0, |
| 32 | unsigned addrSpace = 0); |
| 33 | |
| 34 | //===----------------------------------------------------------------------===// |
| 35 | // Lowering Patterns |
| 36 | //===----------------------------------------------------------------------===// |
| 37 | |
| 38 | /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first |
| 39 | /// create a 0-sized global array symbol similar as LLVM expects. It constructs |
| 40 | /// a memref descriptor with these values and return it. |
| 41 | struct GPUDynamicSharedMemoryOpLowering |
| 42 | : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> { |
| 43 | using ConvertOpToLLVMPattern< |
| 44 | gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; |
| 45 | GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, |
| 46 | unsigned alignmentBit = 0, |
| 47 | PatternBenefit benefit = 1) |
| 48 | : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit), |
| 49 | alignmentBit(alignmentBit) {} |
| 50 | |
| 51 | LogicalResult |
| 52 | matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, |
| 53 | ConversionPatternRewriter &rewriter) const override; |
| 54 | |
| 55 | private: |
| 56 | // Alignment bit |
| 57 | unsigned alignmentBit; |
| 58 | }; |
| 59 | |
| 60 | struct GPUFuncOpLoweringOptions { |
| 61 | /// The address space to use for `alloca`s in private memory. |
| 62 | unsigned allocaAddrSpace; |
| 63 | /// The address space to use declaring workgroup memory. |
| 64 | unsigned workgroupAddrSpace; |
| 65 | |
| 66 | /// The attribute name to use instead of `gpu.kernel`. Null if no attribute |
| 67 | /// should be used. |
| 68 | StringAttr kernelAttributeName; |
| 69 | /// The attribute name to to set block size. Null if no attribute should be |
| 70 | /// used. |
| 71 | StringAttr kernelBlockSizeAttributeName; |
| 72 | |
| 73 | /// The calling convention to use for kernel functions. |
| 74 | LLVM::CConv kernelCallingConvention = LLVM::CConv::C; |
| 75 | /// The calling convention to use for non-kernel functions. |
| 76 | LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; |
| 77 | |
| 78 | /// Whether to encode workgroup attributions as additional arguments instead |
| 79 | /// of a global variable. |
| 80 | bool encodeWorkgroupAttributionsAsArguments = false; |
| 81 | }; |
| 82 | |
| 83 | struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { |
| 84 | GPUFuncOpLowering(const LLVMTypeConverter &converter, |
| 85 | const GPUFuncOpLoweringOptions &options, |
| 86 | PatternBenefit benefit = 1) |
| 87 | : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit), |
| 88 | allocaAddrSpace(options.allocaAddrSpace), |
| 89 | workgroupAddrSpace(options.workgroupAddrSpace), |
| 90 | kernelAttributeName(options.kernelAttributeName), |
| 91 | kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), |
| 92 | kernelCallingConvention(options.kernelCallingConvention), |
| 93 | nonKernelCallingConvention(options.nonKernelCallingConvention), |
| 94 | encodeWorkgroupAttributionsAsArguments( |
| 95 | options.encodeWorkgroupAttributionsAsArguments) {} |
| 96 | |
| 97 | LogicalResult |
| 98 | matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, |
| 99 | ConversionPatternRewriter &rewriter) const override; |
| 100 | |
| 101 | private: |
| 102 | /// The address space to use for `alloca`s in private memory. |
| 103 | unsigned allocaAddrSpace; |
| 104 | /// The address space to use declaring workgroup memory. |
| 105 | unsigned workgroupAddrSpace; |
| 106 | |
| 107 | /// The attribute name to use instead of `gpu.kernel`. Null if no attribute |
| 108 | /// should be used. |
| 109 | StringAttr kernelAttributeName; |
| 110 | /// The attribute name to to set block size. Null if no attribute should be |
| 111 | /// used. |
| 112 | StringAttr kernelBlockSizeAttributeName; |
| 113 | |
| 114 | /// The calling convention to use for kernel functions |
| 115 | LLVM::CConv kernelCallingConvention; |
| 116 | /// The calling convention to use for non-kernel functions |
| 117 | LLVM::CConv nonKernelCallingConvention; |
| 118 | |
| 119 | /// Whether to encode workgroup attributions as additional arguments instead |
| 120 | /// of a global variable. |
| 121 | bool encodeWorkgroupAttributionsAsArguments; |
| 122 | }; |
| 123 | |
| 124 | /// The lowering of gpu.printf to a call to HIP hostcalls |
| 125 | /// |
| 126 | /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have |
| 127 | /// to deal with %s (even if there were first-class strings in MLIR, they're not |
| 128 | /// legal input to gpu.printf) or non-constant format strings |
| 129 | struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
| 130 | using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
| 131 | |
| 132 | LogicalResult |
| 133 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
| 134 | ConversionPatternRewriter &rewriter) const override; |
| 135 | }; |
| 136 | |
| 137 | /// The lowering of gpu.printf to a call to an external printf() function |
| 138 | /// |
| 139 | /// This pass will add a declaration of printf() to the GPUModule if needed |
| 140 | /// and separate out the format strings into global constants. For some |
| 141 | /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler |
| 142 | /// will lower printf calls to appropriate device-side code |
| 143 | struct GPUPrintfOpToLLVMCallLowering |
| 144 | : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
| 145 | GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, |
| 146 | int addressSpace = 0) |
| 147 | : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter), |
| 148 | addressSpace(addressSpace) {} |
| 149 | |
| 150 | LogicalResult |
| 151 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
| 152 | ConversionPatternRewriter &rewriter) const override; |
| 153 | |
| 154 | private: |
| 155 | int addressSpace; |
| 156 | }; |
| 157 | |
| 158 | /// Lowering of gpu.printf to a vprintf standard library. |
| 159 | struct GPUPrintfOpToVPrintfLowering |
| 160 | : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
| 161 | using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
| 162 | |
| 163 | LogicalResult |
| 164 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
| 165 | ConversionPatternRewriter &rewriter) const override; |
| 166 | }; |
| 167 | |
| 168 | struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { |
| 169 | using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern; |
| 170 | |
| 171 | LogicalResult |
| 172 | matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, |
| 173 | ConversionPatternRewriter &rewriter) const override; |
| 174 | }; |
| 175 | |
| 176 | namespace impl { |
| 177 | /// Unrolls op to array/vector elements. |
| 178 | LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, |
| 179 | ConversionPatternRewriter &rewriter, |
| 180 | const LLVMTypeConverter &converter); |
| 181 | } // namespace impl |
| 182 | |
| 183 | /// Unrolls SourceOp to array/vector elements. |
| 184 | template <typename SourceOp> |
| 185 | struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> { |
| 186 | public: |
| 187 | using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern; |
| 188 | |
| 189 | LogicalResult |
| 190 | matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, |
| 191 | ConversionPatternRewriter &rewriter) const override { |
| 192 | return impl::scalarizeVectorOp(op, operands: adaptor.getOperands(), rewriter, |
| 193 | converter: *this->getTypeConverter()); |
| 194 | } |
| 195 | }; |
| 196 | |
| 197 | } // namespace mlir |
| 198 | |
| 199 | #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
| 200 | |