1 | //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
9 | #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
10 | |
11 | #include "mlir/Conversion/LLVMCommon/Pattern.h" |
12 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
13 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
14 | |
15 | namespace mlir { |
16 | |
17 | /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first |
18 | /// create a 0-sized global array symbol similar as LLVM expects. It constructs |
19 | /// a memref descriptor with these values and return it. |
20 | struct GPUDynamicSharedMemoryOpLowering |
21 | : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> { |
22 | using ConvertOpToLLVMPattern< |
23 | gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; |
24 | GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, |
25 | unsigned alignmentBit = 0) |
26 | : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter), |
27 | alignmentBit(alignmentBit) {} |
28 | |
29 | LogicalResult |
30 | matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, |
31 | ConversionPatternRewriter &rewriter) const override; |
32 | |
33 | private: |
34 | // Alignment bit |
35 | unsigned alignmentBit; |
36 | }; |
37 | |
38 | struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { |
39 | GPUFuncOpLowering( |
40 | const LLVMTypeConverter &converter, unsigned allocaAddrSpace, |
41 | unsigned workgroupAddrSpace, StringAttr kernelAttributeName, |
42 | std::optional<StringAttr> kernelBlockSizeAttributeName = std::nullopt) |
43 | : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter), |
44 | allocaAddrSpace(allocaAddrSpace), |
45 | workgroupAddrSpace(workgroupAddrSpace), |
46 | kernelAttributeName(kernelAttributeName), |
47 | kernelBlockSizeAttributeName(kernelBlockSizeAttributeName) {} |
48 | |
49 | LogicalResult |
50 | matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, |
51 | ConversionPatternRewriter &rewriter) const override; |
52 | |
53 | private: |
54 | /// The address space to use for `alloca`s in private memory. |
55 | unsigned allocaAddrSpace; |
56 | /// The address space to use declaring workgroup memory. |
57 | unsigned workgroupAddrSpace; |
58 | |
59 | /// The attribute name to use instead of `gpu.kernel`. |
60 | StringAttr kernelAttributeName; |
61 | |
62 | /// The attribute name to to set block size |
63 | std::optional<StringAttr> kernelBlockSizeAttributeName; |
64 | }; |
65 | |
66 | /// The lowering of gpu.printf to a call to HIP hostcalls |
67 | /// |
68 | /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have |
69 | /// to deal with %s (even if there were first-class strings in MLIR, they're not |
70 | /// legal input to gpu.printf) or non-constant format strings |
71 | struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
72 | using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
73 | |
74 | LogicalResult |
75 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
76 | ConversionPatternRewriter &rewriter) const override; |
77 | }; |
78 | |
79 | /// The lowering of gpu.printf to a call to an external printf() function |
80 | /// |
81 | /// This pass will add a declaration of printf() to the GPUModule if needed |
82 | /// and seperate out the format strings into global constants. For some |
83 | /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler |
84 | /// will lower printf calls to appropriate device-side code |
85 | struct GPUPrintfOpToLLVMCallLowering |
86 | : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
87 | GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, |
88 | int addressSpace = 0) |
89 | : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter), |
90 | addressSpace(addressSpace) {} |
91 | |
92 | LogicalResult |
93 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
94 | ConversionPatternRewriter &rewriter) const override; |
95 | |
96 | private: |
97 | int addressSpace; |
98 | }; |
99 | |
100 | /// Lowering of gpu.printf to a vprintf standard library. |
101 | struct GPUPrintfOpToVPrintfLowering |
102 | : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
103 | using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
104 | |
105 | LogicalResult |
106 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
107 | ConversionPatternRewriter &rewriter) const override; |
108 | }; |
109 | |
110 | struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { |
111 | using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern; |
112 | |
113 | LogicalResult |
114 | matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, |
115 | ConversionPatternRewriter &rewriter) const override { |
116 | rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, adaptor.getOperands()); |
117 | return success(); |
118 | } |
119 | }; |
120 | |
121 | namespace impl { |
122 | /// Unrolls op if it's operating on vectors. |
123 | LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, |
124 | ConversionPatternRewriter &rewriter, |
125 | const LLVMTypeConverter &converter); |
126 | } // namespace impl |
127 | |
128 | /// Rewriting that unrolls SourceOp to scalars if it's operating on vectors. |
129 | template <typename SourceOp> |
130 | struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> { |
131 | public: |
132 | using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern; |
133 | |
134 | LogicalResult |
135 | matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, |
136 | ConversionPatternRewriter &rewriter) const override { |
137 | return impl::scalarizeVectorOp(op, operands: adaptor.getOperands(), rewriter, |
138 | converter: *this->getTypeConverter()); |
139 | } |
140 | }; |
141 | } // namespace mlir |
142 | |
143 | #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
144 | |