1 | //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | /// \file |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H |
11 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H |
12 | |
13 | #include "llvm/IR/PassManager.h" |
14 | #include "llvm/Pass.h" |
15 | #include "llvm/Support/AMDGPUAddrSpace.h" |
16 | #include "llvm/Support/CodeGen.h" |
17 | |
18 | namespace llvm { |
19 | |
20 | class AMDGPUTargetMachine; |
21 | class TargetMachine; |
22 | |
23 | // GlobalISel passes |
24 | void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); |
25 | FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); |
26 | void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); |
27 | FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); |
28 | FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); |
29 | void initializeAMDGPURegBankCombinerPass(PassRegistry &); |
30 | |
31 | void initializeAMDGPURegBankSelectPass(PassRegistry &); |
32 | |
33 | // SI Passes |
34 | FunctionPass *createGCNDPPCombinePass(); |
35 | FunctionPass *createSIAnnotateControlFlowPass(); |
36 | FunctionPass *createSIFoldOperandsPass(); |
37 | FunctionPass *createSIPeepholeSDWAPass(); |
38 | FunctionPass *createSILowerI1CopiesPass(); |
39 | FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); |
40 | FunctionPass *createSIShrinkInstructionsPass(); |
41 | FunctionPass *createSILoadStoreOptimizerPass(); |
42 | FunctionPass *createSIWholeQuadModePass(); |
43 | FunctionPass *createSIFixControlFlowLiveIntervalsPass(); |
44 | FunctionPass *createSIOptimizeExecMaskingPreRAPass(); |
45 | FunctionPass *createSIOptimizeVGPRLiveRangePass(); |
46 | FunctionPass *createSIFixSGPRCopiesPass(); |
47 | FunctionPass *createLowerWWMCopiesPass(); |
48 | FunctionPass *createSIMemoryLegalizerPass(); |
49 | FunctionPass *createSIInsertWaitcntsPass(); |
50 | FunctionPass *createSIPreAllocateWWMRegsPass(); |
51 | FunctionPass *createSIFormMemoryClausesPass(); |
52 | |
53 | FunctionPass *createSIPostRABundlerPass(); |
54 | FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); |
55 | ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); |
56 | FunctionPass *createAMDGPUCodeGenPreparePass(); |
57 | FunctionPass *createAMDGPULateCodeGenPreparePass(); |
58 | FunctionPass *createAMDGPUMachineCFGStructurizerPass(); |
59 | FunctionPass *createAMDGPURewriteOutArgumentsPass(); |
60 | ModulePass * |
61 | createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); |
62 | FunctionPass *createSIModeRegisterPass(); |
63 | FunctionPass *createGCNPreRAOptimizationsPass(); |
64 | |
65 | struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { |
66 | AMDGPUSimplifyLibCallsPass() {} |
67 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
68 | }; |
69 | |
70 | struct AMDGPUImageIntrinsicOptimizerPass |
71 | : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> { |
72 | AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {} |
73 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
74 | |
75 | private: |
76 | TargetMachine &TM; |
77 | }; |
78 | |
79 | struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { |
80 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
81 | }; |
82 | |
83 | void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); |
84 | |
85 | void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); |
86 | extern char &AMDGPUMachineCFGStructurizerID; |
87 | |
88 | void initializeAMDGPUAlwaysInlinePass(PassRegistry&); |
89 | |
90 | Pass *createAMDGPUAnnotateKernelFeaturesPass(); |
91 | Pass *createAMDGPUAttributorLegacyPass(); |
92 | void initializeAMDGPUAttributorLegacyPass(PassRegistry &); |
93 | void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); |
94 | extern char &AMDGPUAnnotateKernelFeaturesID; |
95 | |
96 | // DPP/Iterative option enables the atomic optimizer with given strategy |
97 | // whereas None disables the atomic optimizer. |
98 | enum class ScanOptions { DPP, Iterative, None }; |
99 | FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); |
100 | void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); |
101 | extern char &AMDGPUAtomicOptimizerID; |
102 | |
103 | ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); |
104 | void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); |
105 | extern char &AMDGPUCtorDtorLoweringLegacyPassID; |
106 | |
107 | FunctionPass *createAMDGPULowerKernelArgumentsPass(); |
108 | void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); |
109 | extern char &AMDGPULowerKernelArgumentsID; |
110 | |
111 | FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); |
112 | void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); |
113 | extern char &AMDGPUPromoteKernelArgumentsID; |
114 | |
115 | struct AMDGPUPromoteKernelArgumentsPass |
116 | : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { |
117 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
118 | }; |
119 | |
120 | ModulePass *createAMDGPULowerKernelAttributesPass(); |
121 | void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); |
122 | extern char &AMDGPULowerKernelAttributesID; |
123 | |
124 | struct AMDGPULowerKernelAttributesPass |
125 | : PassInfoMixin<AMDGPULowerKernelAttributesPass> { |
126 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
127 | }; |
128 | |
129 | void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &); |
130 | extern char &AMDGPULowerModuleLDSLegacyPassID; |
131 | |
132 | struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { |
133 | const AMDGPUTargetMachine &TM; |
134 | AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} |
135 | |
136 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
137 | }; |
138 | |
139 | void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); |
140 | extern char &AMDGPURewriteOutArgumentsID; |
141 | |
142 | void initializeGCNDPPCombinePass(PassRegistry &); |
143 | extern char &GCNDPPCombineID; |
144 | |
145 | void initializeSIFoldOperandsPass(PassRegistry &); |
146 | extern char &SIFoldOperandsID; |
147 | |
148 | void initializeSIPeepholeSDWAPass(PassRegistry &); |
149 | extern char &SIPeepholeSDWAID; |
150 | |
151 | void initializeSIShrinkInstructionsPass(PassRegistry&); |
152 | extern char &SIShrinkInstructionsID; |
153 | |
154 | void initializeSIFixSGPRCopiesPass(PassRegistry &); |
155 | extern char &SIFixSGPRCopiesID; |
156 | |
157 | void initializeSIFixVGPRCopiesPass(PassRegistry &); |
158 | extern char &SIFixVGPRCopiesID; |
159 | |
160 | void initializeSILowerWWMCopiesPass(PassRegistry &); |
161 | extern char &SILowerWWMCopiesID; |
162 | |
163 | void initializeSILowerI1CopiesPass(PassRegistry &); |
164 | extern char &SILowerI1CopiesID; |
165 | |
166 | void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); |
167 | extern char &AMDGPUGlobalISelDivergenceLoweringID; |
168 | |
169 | void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); |
170 | extern char &AMDGPUMarkLastScratchLoadID; |
171 | |
172 | void initializeSILowerSGPRSpillsPass(PassRegistry &); |
173 | extern char &SILowerSGPRSpillsID; |
174 | |
175 | void initializeSILoadStoreOptimizerPass(PassRegistry &); |
176 | extern char &SILoadStoreOptimizerID; |
177 | |
178 | void initializeSIWholeQuadModePass(PassRegistry &); |
179 | extern char &SIWholeQuadModeID; |
180 | |
181 | void initializeSILowerControlFlowPass(PassRegistry &); |
182 | extern char &SILowerControlFlowID; |
183 | |
184 | void initializeSIPreEmitPeepholePass(PassRegistry &); |
185 | extern char &SIPreEmitPeepholeID; |
186 | |
187 | void initializeSILateBranchLoweringPass(PassRegistry &); |
188 | extern char &SILateBranchLoweringPassID; |
189 | |
190 | void initializeSIOptimizeExecMaskingPass(PassRegistry &); |
191 | extern char &SIOptimizeExecMaskingID; |
192 | |
193 | void initializeSIPreAllocateWWMRegsPass(PassRegistry &); |
194 | extern char &SIPreAllocateWWMRegsID; |
195 | |
196 | void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); |
197 | extern char &AMDGPUImageIntrinsicOptimizerID; |
198 | |
199 | void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); |
200 | extern char &AMDGPUPerfHintAnalysisID; |
201 | |
202 | void initializeGCNRegPressurePrinterPass(PassRegistry &); |
203 | extern char &GCNRegPressurePrinterID; |
204 | |
205 | // Passes common to R600 and SI |
206 | FunctionPass *createAMDGPUPromoteAlloca(); |
207 | void initializeAMDGPUPromoteAllocaPass(PassRegistry&); |
208 | extern char &AMDGPUPromoteAllocaID; |
209 | |
210 | FunctionPass *createAMDGPUPromoteAllocaToVector(); |
211 | void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); |
212 | extern char &AMDGPUPromoteAllocaToVectorID; |
213 | |
214 | struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { |
215 | AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} |
216 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
217 | |
218 | private: |
219 | TargetMachine &TM; |
220 | }; |
221 | |
222 | struct AMDGPUPromoteAllocaToVectorPass |
223 | : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { |
224 | AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} |
225 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
226 | |
227 | private: |
228 | TargetMachine &TM; |
229 | }; |
230 | |
231 | struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { |
232 | AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) |
233 | : TM(TM), ScanImpl(ScanImpl) {} |
234 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
235 | |
236 | private: |
237 | TargetMachine &TM; |
238 | ScanOptions ScanImpl; |
239 | }; |
240 | |
241 | Pass *createAMDGPUStructurizeCFGPass(); |
242 | FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel); |
243 | ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); |
244 | |
245 | struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { |
246 | AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} |
247 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
248 | |
249 | private: |
250 | bool GlobalOpt; |
251 | }; |
252 | |
253 | class AMDGPUCodeGenPreparePass |
254 | : public PassInfoMixin<AMDGPUCodeGenPreparePass> { |
255 | private: |
256 | TargetMachine &TM; |
257 | |
258 | public: |
259 | AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; |
260 | PreservedAnalyses run(Function &, FunctionAnalysisManager &); |
261 | }; |
262 | |
263 | class AMDGPULowerKernelArgumentsPass |
264 | : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> { |
265 | private: |
266 | TargetMachine &TM; |
267 | |
268 | public: |
269 | AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){}; |
270 | PreservedAnalyses run(Function &, FunctionAnalysisManager &); |
271 | }; |
272 | |
273 | class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> { |
274 | private: |
275 | TargetMachine &TM; |
276 | |
277 | public: |
278 | AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; |
279 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
280 | }; |
281 | |
282 | FunctionPass *createAMDGPUAnnotateUniformValues(); |
283 | |
284 | ModulePass *createAMDGPUPrintfRuntimeBinding(); |
285 | void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); |
286 | extern char &AMDGPUPrintfRuntimeBindingID; |
287 | |
288 | void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); |
289 | extern char &AMDGPUResourceUsageAnalysisID; |
290 | |
291 | struct AMDGPUPrintfRuntimeBindingPass |
292 | : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { |
293 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
294 | }; |
295 | |
296 | ModulePass* createAMDGPUUnifyMetadataPass(); |
297 | void initializeAMDGPUUnifyMetadataPass(PassRegistry&); |
298 | extern char &AMDGPUUnifyMetadataID; |
299 | |
300 | struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { |
301 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
302 | }; |
303 | |
304 | void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); |
305 | extern char &SIOptimizeExecMaskingPreRAID; |
306 | |
307 | void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); |
308 | extern char &SIOptimizeVGPRLiveRangeID; |
309 | |
310 | void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); |
311 | extern char &AMDGPUAnnotateUniformValuesPassID; |
312 | |
313 | void initializeAMDGPUCodeGenPreparePass(PassRegistry&); |
314 | extern char &AMDGPUCodeGenPrepareID; |
315 | |
316 | void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); |
317 | extern char &AMDGPURemoveIncompatibleFunctionsID; |
318 | |
319 | void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); |
320 | extern char &AMDGPULateCodeGenPrepareID; |
321 | |
322 | FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); |
323 | void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); |
324 | extern char &AMDGPURewriteUndefForPHILegacyPassID; |
325 | |
326 | class AMDGPURewriteUndefForPHIPass |
327 | : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> { |
328 | public: |
329 | AMDGPURewriteUndefForPHIPass() = default; |
330 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
331 | }; |
332 | |
333 | void initializeSIAnnotateControlFlowPass(PassRegistry&); |
334 | extern char &SIAnnotateControlFlowPassID; |
335 | |
336 | void initializeSIMemoryLegalizerPass(PassRegistry&); |
337 | extern char &SIMemoryLegalizerID; |
338 | |
339 | void initializeSIModeRegisterPass(PassRegistry&); |
340 | extern char &SIModeRegisterID; |
341 | |
342 | void initializeAMDGPUInsertDelayAluPass(PassRegistry &); |
343 | extern char &AMDGPUInsertDelayAluID; |
344 | |
345 | void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &); |
346 | extern char &AMDGPUInsertSingleUseVDSTID; |
347 | |
348 | void initializeSIInsertHardClausesPass(PassRegistry &); |
349 | extern char &SIInsertHardClausesID; |
350 | |
351 | void initializeSIInsertWaitcntsPass(PassRegistry&); |
352 | extern char &SIInsertWaitcntsID; |
353 | |
354 | void initializeSIFormMemoryClausesPass(PassRegistry&); |
355 | extern char &SIFormMemoryClausesID; |
356 | |
357 | void initializeSIPostRABundlerPass(PassRegistry&); |
358 | extern char &SIPostRABundlerID; |
359 | |
360 | void initializeGCNCreateVOPDPass(PassRegistry &); |
361 | extern char &GCNCreateVOPDID; |
362 | |
363 | void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); |
364 | extern char &AMDGPUUnifyDivergentExitNodesID; |
365 | |
366 | ImmutablePass *createAMDGPUAAWrapperPass(); |
367 | void initializeAMDGPUAAWrapperPassPass(PassRegistry&); |
368 | ImmutablePass *createAMDGPUExternalAAWrapperPass(); |
369 | void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); |
370 | |
371 | void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); |
372 | |
373 | ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); |
374 | void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); |
375 | extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; |
376 | |
377 | void initializeGCNNSAReassignPass(PassRegistry &); |
378 | extern char &GCNNSAReassignID; |
379 | |
380 | void initializeGCNPreRALongBranchRegPass(PassRegistry &); |
381 | extern char &GCNPreRALongBranchRegID; |
382 | |
383 | void initializeGCNPreRAOptimizationsPass(PassRegistry &); |
384 | extern char &GCNPreRAOptimizationsID; |
385 | |
386 | FunctionPass *createAMDGPUSetWavePriorityPass(); |
387 | void initializeAMDGPUSetWavePriorityPass(PassRegistry &); |
388 | |
389 | void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); |
390 | extern char &GCNRewritePartialRegUsesID; |
391 | |
392 | namespace AMDGPU { |
393 | enum TargetIndex { |
394 | TI_CONSTDATA_START, |
395 | TI_SCRATCH_RSRC_DWORD0, |
396 | TI_SCRATCH_RSRC_DWORD1, |
397 | TI_SCRATCH_RSRC_DWORD2, |
398 | TI_SCRATCH_RSRC_DWORD3 |
399 | }; |
400 | |
401 | // FIXME: Missing constant_32bit |
402 | inline bool isFlatGlobalAddrSpace(unsigned AS) { |
403 | return AS == AMDGPUAS::GLOBAL_ADDRESS || |
404 | AS == AMDGPUAS::FLAT_ADDRESS || |
405 | AS == AMDGPUAS::CONSTANT_ADDRESS || |
406 | AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; |
407 | } |
408 | |
409 | inline bool isExtendedGlobalAddrSpace(unsigned AS) { |
410 | return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || |
411 | AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || |
412 | AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; |
413 | } |
414 | |
415 | static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { |
416 | static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range" ); |
417 | |
418 | if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) |
419 | return true; |
420 | |
421 | // This array is indexed by address space value enum elements 0 ... to 9 |
422 | // clang-format off |
423 | static const bool ASAliasRules[10][10] = { |
424 | /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */ |
425 | /* Flat */ {true, true, false, true, true, true, true, true, true, true}, |
426 | /* Global */ {true, true, false, false, true, false, true, true, true, true}, |
427 | /* Region */ {false, false, true, false, false, false, false, false, false, false}, |
428 | /* Group */ {true, false, false, true, false, false, false, false, false, false}, |
429 | /* Constant */ {true, true, false, false, false, false, true, true, true, true}, |
430 | /* Private */ {true, false, false, false, false, true, false, false, false, false}, |
431 | /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true}, |
432 | /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true}, |
433 | /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true}, |
434 | /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true}, |
435 | }; |
436 | // clang-format on |
437 | |
438 | return ASAliasRules[AS1][AS2]; |
439 | } |
440 | |
441 | } |
442 | |
443 | } // End namespace llvm |
444 | |
445 | #endif |
446 | |