1//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// \file
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13#include "llvm/IR/PassManager.h"
14#include "llvm/Pass.h"
15#include "llvm/Support/AMDGPUAddrSpace.h"
16#include "llvm/Support/CodeGen.h"
17
18namespace llvm {
19
20class AMDGPUTargetMachine;
21class TargetMachine;
22
23// GlobalISel passes
24void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30
31void initializeAMDGPURegBankSelectPass(PassRegistry &);
32
33// SI Passes
34FunctionPass *createGCNDPPCombinePass();
35FunctionPass *createSIAnnotateControlFlowPass();
36FunctionPass *createSIFoldOperandsPass();
37FunctionPass *createSIPeepholeSDWAPass();
38FunctionPass *createSILowerI1CopiesPass();
39FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40FunctionPass *createSIShrinkInstructionsPass();
41FunctionPass *createSILoadStoreOptimizerPass();
42FunctionPass *createSIWholeQuadModePass();
43FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45FunctionPass *createSIOptimizeVGPRLiveRangePass();
46FunctionPass *createSIFixSGPRCopiesPass();
47FunctionPass *createLowerWWMCopiesPass();
48FunctionPass *createSIMemoryLegalizerPass();
49FunctionPass *createSIInsertWaitcntsPass();
50FunctionPass *createSIPreAllocateWWMRegsPass();
51FunctionPass *createSIFormMemoryClausesPass();
52
53FunctionPass *createSIPostRABundlerPass();
54FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56FunctionPass *createAMDGPUCodeGenPreparePass();
57FunctionPass *createAMDGPULateCodeGenPreparePass();
58FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59FunctionPass *createAMDGPURewriteOutArgumentsPass();
60ModulePass *
61createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62FunctionPass *createSIModeRegisterPass();
63FunctionPass *createGCNPreRAOptimizationsPass();
64
65struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
66 AMDGPUSimplifyLibCallsPass() {}
67 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
68};
69
70struct AMDGPUImageIntrinsicOptimizerPass
71 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
72 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
73 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
74
75private:
76 TargetMachine &TM;
77};
78
79struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
80 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
81};
82
83void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
84
85void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
86extern char &AMDGPUMachineCFGStructurizerID;
87
88void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
89
90Pass *createAMDGPUAnnotateKernelFeaturesPass();
91Pass *createAMDGPUAttributorLegacyPass();
92void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
93void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
94extern char &AMDGPUAnnotateKernelFeaturesID;
95
96// DPP/Iterative option enables the atomic optimizer with given strategy
97// whereas None disables the atomic optimizer.
98enum class ScanOptions { DPP, Iterative, None };
99FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
100void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
101extern char &AMDGPUAtomicOptimizerID;
102
103ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
104void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
105extern char &AMDGPUCtorDtorLoweringLegacyPassID;
106
107FunctionPass *createAMDGPULowerKernelArgumentsPass();
108void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
109extern char &AMDGPULowerKernelArgumentsID;
110
111FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
112void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
113extern char &AMDGPUPromoteKernelArgumentsID;
114
115struct AMDGPUPromoteKernelArgumentsPass
116 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
118};
119
120ModulePass *createAMDGPULowerKernelAttributesPass();
121void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
122extern char &AMDGPULowerKernelAttributesID;
123
124struct AMDGPULowerKernelAttributesPass
125 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
127};
128
129void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
130extern char &AMDGPULowerModuleLDSLegacyPassID;
131
132struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
133 const AMDGPUTargetMachine &TM;
134 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
135
136 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
137};
138
139void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
140extern char &AMDGPURewriteOutArgumentsID;
141
142void initializeGCNDPPCombinePass(PassRegistry &);
143extern char &GCNDPPCombineID;
144
145void initializeSIFoldOperandsPass(PassRegistry &);
146extern char &SIFoldOperandsID;
147
148void initializeSIPeepholeSDWAPass(PassRegistry &);
149extern char &SIPeepholeSDWAID;
150
151void initializeSIShrinkInstructionsPass(PassRegistry&);
152extern char &SIShrinkInstructionsID;
153
154void initializeSIFixSGPRCopiesPass(PassRegistry &);
155extern char &SIFixSGPRCopiesID;
156
157void initializeSIFixVGPRCopiesPass(PassRegistry &);
158extern char &SIFixVGPRCopiesID;
159
160void initializeSILowerWWMCopiesPass(PassRegistry &);
161extern char &SILowerWWMCopiesID;
162
163void initializeSILowerI1CopiesPass(PassRegistry &);
164extern char &SILowerI1CopiesID;
165
166void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
167extern char &AMDGPUGlobalISelDivergenceLoweringID;
168
169void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
170extern char &AMDGPUMarkLastScratchLoadID;
171
172void initializeSILowerSGPRSpillsPass(PassRegistry &);
173extern char &SILowerSGPRSpillsID;
174
175void initializeSILoadStoreOptimizerPass(PassRegistry &);
176extern char &SILoadStoreOptimizerID;
177
178void initializeSIWholeQuadModePass(PassRegistry &);
179extern char &SIWholeQuadModeID;
180
181void initializeSILowerControlFlowPass(PassRegistry &);
182extern char &SILowerControlFlowID;
183
184void initializeSIPreEmitPeepholePass(PassRegistry &);
185extern char &SIPreEmitPeepholeID;
186
187void initializeSILateBranchLoweringPass(PassRegistry &);
188extern char &SILateBranchLoweringPassID;
189
190void initializeSIOptimizeExecMaskingPass(PassRegistry &);
191extern char &SIOptimizeExecMaskingID;
192
193void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
194extern char &SIPreAllocateWWMRegsID;
195
196void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
197extern char &AMDGPUImageIntrinsicOptimizerID;
198
199void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
200extern char &AMDGPUPerfHintAnalysisID;
201
202void initializeGCNRegPressurePrinterPass(PassRegistry &);
203extern char &GCNRegPressurePrinterID;
204
205// Passes common to R600 and SI
206FunctionPass *createAMDGPUPromoteAlloca();
207void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
208extern char &AMDGPUPromoteAllocaID;
209
210FunctionPass *createAMDGPUPromoteAllocaToVector();
211void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
212extern char &AMDGPUPromoteAllocaToVectorID;
213
214struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
215 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
216 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
217
218private:
219 TargetMachine &TM;
220};
221
222struct AMDGPUPromoteAllocaToVectorPass
223 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
224 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
225 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
226
227private:
228 TargetMachine &TM;
229};
230
231struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
232 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
233 : TM(TM), ScanImpl(ScanImpl) {}
234 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
235
236private:
237 TargetMachine &TM;
238 ScanOptions ScanImpl;
239};
240
241Pass *createAMDGPUStructurizeCFGPass();
242FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
243ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
244
245struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
246 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
247 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
248
249private:
250 bool GlobalOpt;
251};
252
253class AMDGPUCodeGenPreparePass
254 : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
255private:
256 TargetMachine &TM;
257
258public:
259 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
260 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
261};
262
263class AMDGPULowerKernelArgumentsPass
264 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
265private:
266 TargetMachine &TM;
267
268public:
269 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
270 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
271};
272
273class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
274private:
275 TargetMachine &TM;
276
277public:
278 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
279 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
280};
281
282FunctionPass *createAMDGPUAnnotateUniformValues();
283
284ModulePass *createAMDGPUPrintfRuntimeBinding();
285void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
286extern char &AMDGPUPrintfRuntimeBindingID;
287
288void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
289extern char &AMDGPUResourceUsageAnalysisID;
290
291struct AMDGPUPrintfRuntimeBindingPass
292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
293 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
294};
295
296ModulePass* createAMDGPUUnifyMetadataPass();
297void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
298extern char &AMDGPUUnifyMetadataID;
299
300struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
301 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
302};
303
304void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
305extern char &SIOptimizeExecMaskingPreRAID;
306
307void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
308extern char &SIOptimizeVGPRLiveRangeID;
309
310void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
311extern char &AMDGPUAnnotateUniformValuesPassID;
312
313void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
314extern char &AMDGPUCodeGenPrepareID;
315
316void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
317extern char &AMDGPURemoveIncompatibleFunctionsID;
318
319void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
320extern char &AMDGPULateCodeGenPrepareID;
321
322FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
323void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
324extern char &AMDGPURewriteUndefForPHILegacyPassID;
325
326class AMDGPURewriteUndefForPHIPass
327 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
328public:
329 AMDGPURewriteUndefForPHIPass() = default;
330 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
331};
332
333void initializeSIAnnotateControlFlowPass(PassRegistry&);
334extern char &SIAnnotateControlFlowPassID;
335
336void initializeSIMemoryLegalizerPass(PassRegistry&);
337extern char &SIMemoryLegalizerID;
338
339void initializeSIModeRegisterPass(PassRegistry&);
340extern char &SIModeRegisterID;
341
342void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
343extern char &AMDGPUInsertDelayAluID;
344
345void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
346extern char &AMDGPUInsertSingleUseVDSTID;
347
348void initializeSIInsertHardClausesPass(PassRegistry &);
349extern char &SIInsertHardClausesID;
350
351void initializeSIInsertWaitcntsPass(PassRegistry&);
352extern char &SIInsertWaitcntsID;
353
354void initializeSIFormMemoryClausesPass(PassRegistry&);
355extern char &SIFormMemoryClausesID;
356
357void initializeSIPostRABundlerPass(PassRegistry&);
358extern char &SIPostRABundlerID;
359
360void initializeGCNCreateVOPDPass(PassRegistry &);
361extern char &GCNCreateVOPDID;
362
363void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
364extern char &AMDGPUUnifyDivergentExitNodesID;
365
366ImmutablePass *createAMDGPUAAWrapperPass();
367void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
368ImmutablePass *createAMDGPUExternalAAWrapperPass();
369void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
370
371void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
372
373ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
374void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
375extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
376
377void initializeGCNNSAReassignPass(PassRegistry &);
378extern char &GCNNSAReassignID;
379
380void initializeGCNPreRALongBranchRegPass(PassRegistry &);
381extern char &GCNPreRALongBranchRegID;
382
383void initializeGCNPreRAOptimizationsPass(PassRegistry &);
384extern char &GCNPreRAOptimizationsID;
385
386FunctionPass *createAMDGPUSetWavePriorityPass();
387void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
388
389void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
390extern char &GCNRewritePartialRegUsesID;
391
392namespace AMDGPU {
393enum TargetIndex {
394 TI_CONSTDATA_START,
395 TI_SCRATCH_RSRC_DWORD0,
396 TI_SCRATCH_RSRC_DWORD1,
397 TI_SCRATCH_RSRC_DWORD2,
398 TI_SCRATCH_RSRC_DWORD3
399};
400
401// FIXME: Missing constant_32bit
402inline bool isFlatGlobalAddrSpace(unsigned AS) {
403 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
404 AS == AMDGPUAS::FLAT_ADDRESS ||
405 AS == AMDGPUAS::CONSTANT_ADDRESS ||
406 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
407}
408
409inline bool isExtendedGlobalAddrSpace(unsigned AS) {
410 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
411 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
412 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
413}
414
415static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
416 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
417
418 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
419 return true;
420
421 // This array is indexed by address space value enum elements 0 ... to 9
422 // clang-format off
423 static const bool ASAliasRules[10][10] = {
424 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
425 /* Flat */ {true, true, false, true, true, true, true, true, true, true},
426 /* Global */ {true, true, false, false, true, false, true, true, true, true},
427 /* Region */ {false, false, true, false, false, false, false, false, false, false},
428 /* Group */ {true, false, false, true, false, false, false, false, false, false},
429 /* Constant */ {true, true, false, false, false, false, true, true, true, true},
430 /* Private */ {true, false, false, false, false, true, false, false, false, false},
431 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
432 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
433 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
434 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
435 };
436 // clang-format on
437
438 return ASAliasRules[AS1][AS2];
439}
440
441}
442
443} // End namespace llvm
444
445#endif
446

source code of llvm/lib/Target/AMDGPU/AMDGPU.h