1 | //===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file declares the targeting of the InstructionSelector class for |
10 | /// AMDGPU. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H |
14 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H |
15 | |
16 | #include "SIDefines.h" |
17 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
18 | #include "llvm/IR/InstrTypes.h" |
19 | |
20 | namespace { |
21 | #define GET_GLOBALISEL_PREDICATE_BITSET |
22 | #define AMDGPUSubtarget GCNSubtarget |
23 | #include "AMDGPUGenGlobalISel.inc" |
24 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
25 | #undef AMDGPUSubtarget |
26 | } |
27 | |
28 | namespace llvm { |
29 | |
30 | namespace AMDGPU { |
31 | struct ImageDimIntrinsicInfo; |
32 | } |
33 | |
34 | class AMDGPURegisterBankInfo; |
35 | class AMDGPUTargetMachine; |
36 | class BlockFrequencyInfo; |
37 | class ProfileSummaryInfo; |
38 | class GCNSubtarget; |
39 | class MachineInstr; |
40 | class MachineIRBuilder; |
41 | class MachineOperand; |
42 | class MachineRegisterInfo; |
43 | class RegisterBank; |
44 | class SIInstrInfo; |
45 | class SIRegisterInfo; |
46 | class TargetRegisterClass; |
47 | |
48 | class AMDGPUInstructionSelector final : public InstructionSelector { |
49 | private: |
50 | MachineRegisterInfo *MRI; |
51 | const GCNSubtarget *Subtarget; |
52 | |
53 | public: |
54 | AMDGPUInstructionSelector(const GCNSubtarget &STI, |
55 | const AMDGPURegisterBankInfo &RBI, |
56 | const AMDGPUTargetMachine &TM); |
57 | |
58 | bool select(MachineInstr &I) override; |
59 | static const char *getName(); |
60 | |
61 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, |
62 | CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, |
63 | BlockFrequencyInfo *BFI) override; |
64 | |
65 | private: |
66 | struct GEPInfo { |
67 | SmallVector<unsigned, 2> SgprParts; |
68 | SmallVector<unsigned, 2> VgprParts; |
69 | int64_t Imm = 0; |
70 | }; |
71 | |
72 | bool isSGPR(Register Reg) const; |
73 | |
74 | bool isInstrUniform(const MachineInstr &MI) const; |
75 | bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const; |
76 | |
77 | const RegisterBank *getArtifactRegBank( |
78 | Register Reg, const MachineRegisterInfo &MRI, |
79 | const TargetRegisterInfo &TRI) const; |
80 | |
81 | /// tblgen-erated 'select' implementation. |
82 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
83 | |
84 | MachineOperand getSubOperand64(MachineOperand &MO, |
85 | const TargetRegisterClass &SubRC, |
86 | unsigned SubIdx) const; |
87 | |
88 | bool constrainCopyLikeIntrin(MachineInstr &MI, unsigned NewOpc) const; |
89 | bool selectCOPY(MachineInstr &I) const; |
90 | bool selectPHI(MachineInstr &I) const; |
91 | bool selectG_TRUNC(MachineInstr &I) const; |
92 | bool selectG_SZA_EXT(MachineInstr &I) const; |
93 | bool selectG_FPEXT(MachineInstr &I) const; |
94 | bool selectG_CONSTANT(MachineInstr &I) const; |
95 | bool selectG_FNEG(MachineInstr &I) const; |
96 | bool selectG_FABS(MachineInstr &I) const; |
97 | bool selectG_AND_OR_XOR(MachineInstr &I) const; |
98 | bool selectG_ADD_SUB(MachineInstr &I) const; |
99 | bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; |
100 | bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const; |
101 | bool (MachineInstr &I) const; |
102 | bool selectG_FMA_FMAD(MachineInstr &I) const; |
103 | bool selectG_MERGE_VALUES(MachineInstr &I) const; |
104 | bool selectG_UNMERGE_VALUES(MachineInstr &I) const; |
105 | bool selectG_BUILD_VECTOR(MachineInstr &I) const; |
106 | bool selectG_PTR_ADD(MachineInstr &I) const; |
107 | bool selectG_IMPLICIT_DEF(MachineInstr &I) const; |
108 | bool selectG_INSERT(MachineInstr &I) const; |
109 | bool selectG_SBFX_UBFX(MachineInstr &I) const; |
110 | |
111 | bool selectInterpP1F16(MachineInstr &MI) const; |
112 | bool selectWritelane(MachineInstr &MI) const; |
113 | bool selectDivScale(MachineInstr &MI) const; |
114 | bool selectIntrinsicCmp(MachineInstr &MI) const; |
115 | bool selectBallot(MachineInstr &I) const; |
116 | bool selectInverseBallot(MachineInstr &I) const; |
117 | bool selectRelocConstant(MachineInstr &I) const; |
118 | bool selectGroupStaticSize(MachineInstr &I) const; |
119 | bool selectReturnAddress(MachineInstr &I) const; |
120 | bool selectG_INTRINSIC(MachineInstr &I) const; |
121 | |
122 | bool selectEndCfIntrinsic(MachineInstr &MI) const; |
123 | bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; |
124 | bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; |
125 | bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const; |
126 | bool selectSBarrier(MachineInstr &MI) const; |
127 | bool selectDSBvhStackIntrinsic(MachineInstr &MI) const; |
128 | |
129 | bool selectImageIntrinsic(MachineInstr &MI, |
130 | const AMDGPU::ImageDimIntrinsicInfo *Intr) const; |
131 | bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; |
132 | int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; |
133 | bool selectG_ICMP_or_FCMP(MachineInstr &I) const; |
134 | bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const; |
135 | void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, |
136 | SmallVectorImpl<GEPInfo> &AddrInfo) const; |
137 | |
138 | void initM0(MachineInstr &I) const; |
139 | bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const; |
140 | bool selectG_SELECT(MachineInstr &I) const; |
141 | bool selectG_BRCOND(MachineInstr &I) const; |
142 | bool selectG_GLOBAL_VALUE(MachineInstr &I) const; |
143 | bool selectG_PTRMASK(MachineInstr &I) const; |
144 | bool (MachineInstr &I) const; |
145 | bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const; |
146 | bool selectBufferLoadLds(MachineInstr &MI) const; |
147 | bool selectGlobalLoadLds(MachineInstr &MI) const; |
148 | bool selectBVHIntrinsic(MachineInstr &I) const; |
149 | bool selectSMFMACIntrin(MachineInstr &I) const; |
150 | bool selectWaveAddress(MachineInstr &I) const; |
151 | bool selectStackRestore(MachineInstr &MI) const; |
152 | bool selectNamedBarrierInst(MachineInstr &I, Intrinsic::ID IID) const; |
153 | bool selectSBarrierSignalIsfirst(MachineInstr &I, Intrinsic::ID IID) const; |
154 | bool selectSBarrierLeave(MachineInstr &I) const; |
155 | |
156 | std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root, |
157 | bool IsCanonicalizing = true, |
158 | bool AllowAbs = true, |
159 | bool OpSel = false) const; |
160 | |
161 | Register copyToVGPRIfSrcFolded(Register Src, unsigned Mods, |
162 | MachineOperand Root, MachineInstr *InsertPt, |
163 | bool ForceVGPR = false) const; |
164 | |
165 | InstructionSelector::ComplexRendererFns |
166 | selectVCSRC(MachineOperand &Root) const; |
167 | |
168 | InstructionSelector::ComplexRendererFns |
169 | selectVSRC0(MachineOperand &Root) const; |
170 | |
171 | InstructionSelector::ComplexRendererFns |
172 | selectVOP3Mods0(MachineOperand &Root) const; |
173 | InstructionSelector::ComplexRendererFns |
174 | selectVOP3BMods0(MachineOperand &Root) const; |
175 | InstructionSelector::ComplexRendererFns |
176 | selectVOP3OMods(MachineOperand &Root) const; |
177 | InstructionSelector::ComplexRendererFns |
178 | selectVOP3Mods(MachineOperand &Root) const; |
179 | InstructionSelector::ComplexRendererFns |
180 | selectVOP3ModsNonCanonicalizing(MachineOperand &Root) const; |
181 | InstructionSelector::ComplexRendererFns |
182 | selectVOP3BMods(MachineOperand &Root) const; |
183 | |
184 | ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const; |
185 | |
186 | std::pair<Register, unsigned> |
187 | selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI, |
188 | bool IsDOT = false) const; |
189 | |
190 | InstructionSelector::ComplexRendererFns |
191 | selectVOP3PMods(MachineOperand &Root) const; |
192 | |
193 | InstructionSelector::ComplexRendererFns |
194 | selectVOP3PModsDOT(MachineOperand &Root) const; |
195 | |
196 | InstructionSelector::ComplexRendererFns |
197 | selectVOP3PModsNeg(MachineOperand &Root) const; |
198 | |
199 | InstructionSelector::ComplexRendererFns |
200 | selectWMMAOpSelVOP3PMods(MachineOperand &Root) const; |
201 | |
202 | InstructionSelector::ComplexRendererFns |
203 | selectWMMAModsF32NegAbs(MachineOperand &Root) const; |
204 | InstructionSelector::ComplexRendererFns |
205 | selectWMMAModsF16Neg(MachineOperand &Root) const; |
206 | InstructionSelector::ComplexRendererFns |
207 | selectWMMAModsF16NegAbs(MachineOperand &Root) const; |
208 | InstructionSelector::ComplexRendererFns |
209 | selectWMMAVISrc(MachineOperand &Root) const; |
210 | InstructionSelector::ComplexRendererFns |
211 | selectSWMMACIndex8(MachineOperand &Root) const; |
212 | InstructionSelector::ComplexRendererFns |
213 | selectSWMMACIndex16(MachineOperand &Root) const; |
214 | |
215 | InstructionSelector::ComplexRendererFns |
216 | selectVOP3OpSelMods(MachineOperand &Root) const; |
217 | |
218 | InstructionSelector::ComplexRendererFns |
219 | selectVINTERPMods(MachineOperand &Root) const; |
220 | InstructionSelector::ComplexRendererFns |
221 | selectVINTERPModsHi(MachineOperand &Root) const; |
222 | |
223 | bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset, |
224 | int64_t *Offset) const; |
225 | InstructionSelector::ComplexRendererFns |
226 | selectSmrdImm(MachineOperand &Root) const; |
227 | InstructionSelector::ComplexRendererFns |
228 | selectSmrdImm32(MachineOperand &Root) const; |
229 | InstructionSelector::ComplexRendererFns |
230 | selectSmrdSgpr(MachineOperand &Root) const; |
231 | InstructionSelector::ComplexRendererFns |
232 | selectSmrdSgprImm(MachineOperand &Root) const; |
233 | |
234 | std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root, |
235 | uint64_t FlatVariant) const; |
236 | |
237 | InstructionSelector::ComplexRendererFns |
238 | selectFlatOffset(MachineOperand &Root) const; |
239 | InstructionSelector::ComplexRendererFns |
240 | selectGlobalOffset(MachineOperand &Root) const; |
241 | InstructionSelector::ComplexRendererFns |
242 | selectScratchOffset(MachineOperand &Root) const; |
243 | |
244 | InstructionSelector::ComplexRendererFns |
245 | selectGlobalSAddr(MachineOperand &Root) const; |
246 | |
247 | InstructionSelector::ComplexRendererFns |
248 | selectScratchSAddr(MachineOperand &Root) const; |
249 | bool checkFlatScratchSVSSwizzleBug(Register VAddr, Register SAddr, |
250 | uint64_t ImmOffset) const; |
251 | InstructionSelector::ComplexRendererFns |
252 | selectScratchSVAddr(MachineOperand &Root) const; |
253 | |
254 | InstructionSelector::ComplexRendererFns |
255 | selectMUBUFScratchOffen(MachineOperand &Root) const; |
256 | InstructionSelector::ComplexRendererFns |
257 | selectMUBUFScratchOffset(MachineOperand &Root) const; |
258 | |
259 | bool isDSOffsetLegal(Register Base, int64_t Offset) const; |
260 | bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1, |
261 | unsigned Size) const; |
262 | bool isFlatScratchBaseLegal(Register Addr) const; |
263 | bool isFlatScratchBaseLegalSV(Register Addr) const; |
264 | bool isFlatScratchBaseLegalSVImm(Register Addr) const; |
265 | |
266 | std::pair<Register, unsigned> |
267 | selectDS1Addr1OffsetImpl(MachineOperand &Root) const; |
268 | InstructionSelector::ComplexRendererFns |
269 | selectDS1Addr1Offset(MachineOperand &Root) const; |
270 | |
271 | InstructionSelector::ComplexRendererFns |
272 | selectDS64Bit4ByteAligned(MachineOperand &Root) const; |
273 | |
274 | InstructionSelector::ComplexRendererFns |
275 | selectDS128Bit8ByteAligned(MachineOperand &Root) const; |
276 | |
277 | std::pair<Register, unsigned> selectDSReadWrite2Impl(MachineOperand &Root, |
278 | unsigned size) const; |
279 | InstructionSelector::ComplexRendererFns |
280 | selectDSReadWrite2(MachineOperand &Root, unsigned size) const; |
281 | |
282 | std::pair<Register, int64_t> |
283 | getPtrBaseWithConstantOffset(Register Root, |
284 | const MachineRegisterInfo &MRI) const; |
285 | |
286 | // Parse out a chain of up to two g_ptr_add instructions. |
287 | // g_ptr_add (n0, _) |
288 | // g_ptr_add (n0, (n1 = g_ptr_add n2, n3)) |
289 | struct MUBUFAddressData { |
290 | Register N0, N2, N3; |
291 | int64_t Offset = 0; |
292 | }; |
293 | |
294 | bool shouldUseAddr64(MUBUFAddressData AddrData) const; |
295 | |
296 | void splitIllegalMUBUFOffset(MachineIRBuilder &B, |
297 | Register &SOffset, int64_t &ImmOffset) const; |
298 | |
299 | MUBUFAddressData parseMUBUFAddress(Register Src) const; |
300 | |
301 | bool selectMUBUFAddr64Impl(MachineOperand &Root, Register &VAddr, |
302 | Register &RSrcReg, Register &SOffset, |
303 | int64_t &Offset) const; |
304 | |
305 | bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg, |
306 | Register &SOffset, int64_t &Offset) const; |
307 | |
308 | InstructionSelector::ComplexRendererFns |
309 | selectBUFSOffset(MachineOperand &Root) const; |
310 | |
311 | InstructionSelector::ComplexRendererFns |
312 | selectMUBUFAddr64(MachineOperand &Root) const; |
313 | |
314 | InstructionSelector::ComplexRendererFns |
315 | selectMUBUFOffset(MachineOperand &Root) const; |
316 | |
317 | ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const; |
318 | ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const; |
319 | ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const; |
320 | |
321 | std::pair<Register, unsigned> selectVOP3PMadMixModsImpl(MachineOperand &Root, |
322 | bool &Matched) const; |
323 | ComplexRendererFns selectVOP3PMadMixModsExt(MachineOperand &Root) const; |
324 | ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const; |
325 | |
326 | void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, |
327 | int OpIdx = -1) const; |
328 | |
329 | void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
330 | int OpIdx) const; |
331 | |
332 | void renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
333 | int OpIdx) const; |
334 | |
335 | void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
336 | int OpIdx) const; |
337 | |
338 | void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
339 | int OpIdx) const; |
340 | |
341 | void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
342 | int OpIdx) const; |
343 | void (MachineInstrBuilder &MIB, const MachineInstr &MI, |
344 | int OpIdx) const; |
345 | void (MachineInstrBuilder &MIB, const MachineInstr &MI, |
346 | int OpIdx) const; |
347 | void (MachineInstrBuilder &MIB, const MachineInstr &MI, |
348 | int OpIdx) const; |
349 | |
350 | void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI, |
351 | int OpIdx) const; |
352 | |
353 | void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI, |
354 | int OpIdx) const; |
355 | |
356 | bool isInlineImmediate(const APInt &Imm) const; |
357 | bool isInlineImmediate(const APFloat &Imm) const; |
358 | |
359 | // Returns true if TargetOpcode::G_AND MachineInstr `MI`'s masking of the |
360 | // shift amount operand's `ShAmtBits` bits is unneeded. |
361 | bool isUnneededShiftMask(const MachineInstr &MI, unsigned ShAmtBits) const; |
362 | |
363 | const SIInstrInfo &TII; |
364 | const SIRegisterInfo &TRI; |
365 | const AMDGPURegisterBankInfo &RBI; |
366 | const AMDGPUTargetMachine &TM; |
367 | const GCNSubtarget &STI; |
368 | bool EnableLateStructurizeCFG; |
369 | #define GET_GLOBALISEL_PREDICATES_DECL |
370 | #define AMDGPUSubtarget GCNSubtarget |
371 | #include "AMDGPUGenGlobalISel.inc" |
372 | #undef GET_GLOBALISEL_PREDICATES_DECL |
373 | #undef AMDGPUSubtarget |
374 | |
375 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
376 | #include "AMDGPUGenGlobalISel.inc" |
377 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
378 | }; |
379 | |
380 | } // End llvm namespace. |
381 | #endif |
382 | |