1 | //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
9 | #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
10 | |
11 | #include "mlir/Conversion/LLVMCommon/Pattern.h" |
12 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
13 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
14 | |
15 | namespace mlir { |
16 | |
17 | //===----------------------------------------------------------------------===// |
18 | // Helper Functions |
19 | //===----------------------------------------------------------------------===// |
20 | |
21 | /// Find or create an external function declaration in the given module. |
22 | LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, |
23 | OpBuilder &b, StringRef name, |
24 | LLVM::LLVMFunctionType type); |
25 | |
26 | /// Create a global that contains the given string. If a global with the same |
27 | /// string already exists in the module, return that global. |
28 | LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, |
29 | gpu::GPUModuleOp moduleOp, Type llvmI8, |
30 | StringRef namePrefix, StringRef str, |
31 | uint64_t alignment = 0, |
32 | unsigned addrSpace = 0); |
33 | |
34 | //===----------------------------------------------------------------------===// |
35 | // Lowering Patterns |
36 | //===----------------------------------------------------------------------===// |
37 | |
38 | /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first |
39 | /// create a 0-sized global array symbol similar as LLVM expects. It constructs |
40 | /// a memref descriptor with these values and return it. |
41 | struct GPUDynamicSharedMemoryOpLowering |
42 | : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> { |
43 | using ConvertOpToLLVMPattern< |
44 | gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; |
45 | GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, |
46 | unsigned alignmentBit = 0, |
47 | PatternBenefit benefit = 1) |
48 | : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit), |
49 | alignmentBit(alignmentBit) {} |
50 | |
51 | LogicalResult |
52 | matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, |
53 | ConversionPatternRewriter &rewriter) const override; |
54 | |
55 | private: |
56 | // Alignment bit |
57 | unsigned alignmentBit; |
58 | }; |
59 | |
60 | struct GPUFuncOpLoweringOptions { |
61 | /// The address space to use for `alloca`s in private memory. |
62 | unsigned allocaAddrSpace; |
63 | /// The address space to use declaring workgroup memory. |
64 | unsigned workgroupAddrSpace; |
65 | |
66 | /// The attribute name to use instead of `gpu.kernel`. Null if no attribute |
67 | /// should be used. |
68 | StringAttr kernelAttributeName; |
69 | /// The attribute name to to set block size. Null if no attribute should be |
70 | /// used. |
71 | StringAttr kernelBlockSizeAttributeName; |
72 | |
73 | /// The calling convention to use for kernel functions. |
74 | LLVM::CConv kernelCallingConvention = LLVM::CConv::C; |
75 | /// The calling convention to use for non-kernel functions. |
76 | LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; |
77 | |
78 | /// Whether to encode workgroup attributions as additional arguments instead |
79 | /// of a global variable. |
80 | bool encodeWorkgroupAttributionsAsArguments = false; |
81 | }; |
82 | |
83 | struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { |
84 | GPUFuncOpLowering(const LLVMTypeConverter &converter, |
85 | const GPUFuncOpLoweringOptions &options, |
86 | PatternBenefit benefit = 1) |
87 | : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit), |
88 | allocaAddrSpace(options.allocaAddrSpace), |
89 | workgroupAddrSpace(options.workgroupAddrSpace), |
90 | kernelAttributeName(options.kernelAttributeName), |
91 | kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), |
92 | kernelCallingConvention(options.kernelCallingConvention), |
93 | nonKernelCallingConvention(options.nonKernelCallingConvention), |
94 | encodeWorkgroupAttributionsAsArguments( |
95 | options.encodeWorkgroupAttributionsAsArguments) {} |
96 | |
97 | LogicalResult |
98 | matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, |
99 | ConversionPatternRewriter &rewriter) const override; |
100 | |
101 | private: |
102 | /// The address space to use for `alloca`s in private memory. |
103 | unsigned allocaAddrSpace; |
104 | /// The address space to use declaring workgroup memory. |
105 | unsigned workgroupAddrSpace; |
106 | |
107 | /// The attribute name to use instead of `gpu.kernel`. Null if no attribute |
108 | /// should be used. |
109 | StringAttr kernelAttributeName; |
110 | /// The attribute name to to set block size. Null if no attribute should be |
111 | /// used. |
112 | StringAttr kernelBlockSizeAttributeName; |
113 | |
114 | /// The calling convention to use for kernel functions |
115 | LLVM::CConv kernelCallingConvention; |
116 | /// The calling convention to use for non-kernel functions |
117 | LLVM::CConv nonKernelCallingConvention; |
118 | |
119 | /// Whether to encode workgroup attributions as additional arguments instead |
120 | /// of a global variable. |
121 | bool encodeWorkgroupAttributionsAsArguments; |
122 | }; |
123 | |
124 | /// The lowering of gpu.printf to a call to HIP hostcalls |
125 | /// |
126 | /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have |
127 | /// to deal with %s (even if there were first-class strings in MLIR, they're not |
128 | /// legal input to gpu.printf) or non-constant format strings |
129 | struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
130 | using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
131 | |
132 | LogicalResult |
133 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
134 | ConversionPatternRewriter &rewriter) const override; |
135 | }; |
136 | |
137 | /// The lowering of gpu.printf to a call to an external printf() function |
138 | /// |
139 | /// This pass will add a declaration of printf() to the GPUModule if needed |
140 | /// and separate out the format strings into global constants. For some |
141 | /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler |
142 | /// will lower printf calls to appropriate device-side code |
143 | struct GPUPrintfOpToLLVMCallLowering |
144 | : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
145 | GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, |
146 | int addressSpace = 0) |
147 | : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter), |
148 | addressSpace(addressSpace) {} |
149 | |
150 | LogicalResult |
151 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
152 | ConversionPatternRewriter &rewriter) const override; |
153 | |
154 | private: |
155 | int addressSpace; |
156 | }; |
157 | |
158 | /// Lowering of gpu.printf to a vprintf standard library. |
159 | struct GPUPrintfOpToVPrintfLowering |
160 | : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
161 | using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
162 | |
163 | LogicalResult |
164 | matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
165 | ConversionPatternRewriter &rewriter) const override; |
166 | }; |
167 | |
168 | struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { |
169 | using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern; |
170 | |
171 | LogicalResult |
172 | matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, |
173 | ConversionPatternRewriter &rewriter) const override; |
174 | }; |
175 | |
176 | namespace impl { |
177 | /// Unrolls op to array/vector elements. |
178 | LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, |
179 | ConversionPatternRewriter &rewriter, |
180 | const LLVMTypeConverter &converter); |
181 | } // namespace impl |
182 | |
183 | /// Unrolls SourceOp to array/vector elements. |
184 | template <typename SourceOp> |
185 | struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> { |
186 | public: |
187 | using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern; |
188 | |
189 | LogicalResult |
190 | matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, |
191 | ConversionPatternRewriter &rewriter) const override { |
192 | return impl::scalarizeVectorOp(op, operands: adaptor.getOperands(), rewriter, |
193 | converter: *this->getTypeConverter()); |
194 | } |
195 | }; |
196 | |
197 | } // namespace mlir |
198 | |
199 | #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
200 | |