1 | //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// SI DAG Lowering interface definition |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H |
16 | |
17 | #include "AMDGPUISelLowering.h" |
18 | #include "AMDGPUArgumentUsageInfo.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | |
21 | namespace llvm { |
22 | |
23 | class GCNSubtarget; |
24 | class SIMachineFunctionInfo; |
25 | class SIRegisterInfo; |
26 | |
27 | namespace AMDGPU { |
28 | struct ImageDimIntrinsicInfo; |
29 | } |
30 | |
31 | class SITargetLowering final : public AMDGPUTargetLowering { |
32 | private: |
33 | const GCNSubtarget *Subtarget; |
34 | |
35 | public: |
36 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
37 | CallingConv::ID CC, |
38 | EVT VT) const override; |
39 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
40 | CallingConv::ID CC, |
41 | EVT VT) const override; |
42 | |
43 | unsigned getVectorTypeBreakdownForCallingConv( |
44 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
45 | unsigned &NumIntermediates, MVT &RegisterVT) const override; |
46 | |
47 | private: |
48 | SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, |
49 | SDValue Chain, uint64_t Offset) const; |
50 | SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; |
51 | SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const; |
52 | SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, |
53 | const SDLoc &SL, SDValue Chain, |
54 | uint64_t Offset, Align Alignment, |
55 | bool Signed, |
56 | const ISD::InputArg *Arg = nullptr) const; |
57 | SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL, |
58 | Align Alignment, |
59 | ImplicitParameter Param) const; |
60 | |
61 | SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, |
62 | const SDLoc &SL, SDValue Chain, |
63 | const ISD::InputArg &Arg) const; |
64 | SDValue getPreloadedValue(SelectionDAG &DAG, |
65 | const SIMachineFunctionInfo &MFI, |
66 | EVT VT, |
67 | AMDGPUFunctionArgInfo::PreloadedValue) const; |
68 | |
69 | SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, |
70 | SelectionDAG &DAG) const override; |
71 | SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, |
72 | MVT VT, unsigned Offset) const; |
73 | SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, |
74 | SelectionDAG &DAG, bool WithChain) const; |
75 | SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, |
76 | SDValue CachePolicy, SelectionDAG &DAG) const; |
77 | |
78 | SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, |
79 | unsigned NewOpcode) const; |
80 | SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, |
81 | unsigned NewOpcode) const; |
82 | |
83 | SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const; |
84 | SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, |
85 | const ArgDescriptor &ArgDesc) const; |
86 | |
87 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
88 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
89 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
90 | |
91 | // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset |
92 | // (the offset that is included in bounds checking and swizzling, to be split |
93 | // between the instruction's voffset and immoffset fields) and soffset (the |
94 | // offset that is excluded from bounds checking and swizzling, to go in the |
95 | // instruction's soffset field). This function takes the first kind of |
96 | // offset and figures out how to split it between voffset and immoffset. |
97 | std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset, |
98 | SelectionDAG &DAG) const; |
99 | |
100 | SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const; |
101 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
102 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
103 | SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const; |
104 | SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const; |
105 | SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const; |
106 | SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const; |
107 | SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; |
108 | SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; |
109 | SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; |
110 | SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const; |
111 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
112 | SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; |
113 | SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const; |
114 | SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const; |
115 | SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const; |
116 | SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; |
117 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
118 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
119 | SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M, |
120 | SelectionDAG &DAG, ArrayRef<SDValue> Ops, |
121 | bool IsIntrinsic = false) const; |
122 | |
123 | SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG, |
124 | ArrayRef<SDValue> Ops) const; |
125 | |
126 | // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to |
127 | // dwordx4 if on SI. |
128 | SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
129 | ArrayRef<SDValue> Ops, EVT MemVT, |
130 | MachineMemOperand *MMO, SelectionDAG &DAG) const; |
131 | |
132 | SDValue handleD16VData(SDValue VData, SelectionDAG &DAG, |
133 | bool ImageStore = false) const; |
134 | |
135 | /// Converts \p Op, which must be of floating point type, to the |
136 | /// floating point type \p VT, by either extending or truncating it. |
137 | SDValue getFPExtOrFPRound(SelectionDAG &DAG, |
138 | SDValue Op, |
139 | const SDLoc &DL, |
140 | EVT VT) const; |
141 | |
142 | SDValue convertArgType( |
143 | SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, |
144 | bool Signed, const ISD::InputArg *Arg = nullptr) const; |
145 | |
146 | /// Custom lowering for ISD::FP_ROUND for MVT::f16. |
147 | SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
148 | SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; |
149 | SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const; |
150 | SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const; |
151 | SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; |
152 | SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; |
153 | |
154 | SDValue getSegmentAperture(unsigned AS, const SDLoc &DL, |
155 | SelectionDAG &DAG) const; |
156 | |
157 | SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; |
158 | SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
159 | SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
160 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
161 | SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
162 | SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
163 | SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
164 | |
165 | SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; |
166 | SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const; |
167 | SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const; |
168 | SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const; |
169 | SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; |
170 | |
171 | SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; |
172 | |
173 | SDValue performUCharToFloatCombine(SDNode *N, |
174 | DAGCombinerInfo &DCI) const; |
175 | SDValue performFCopySignCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
176 | |
177 | SDValue performSHLPtrCombine(SDNode *N, |
178 | unsigned AS, |
179 | EVT MemVT, |
180 | DAGCombinerInfo &DCI) const; |
181 | |
182 | SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const; |
183 | |
184 | SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL, |
185 | unsigned Opc, SDValue LHS, |
186 | const ConstantSDNode *CRHS) const; |
187 | |
188 | SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
189 | SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
190 | SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
191 | SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
192 | SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
193 | SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
194 | SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT, |
195 | const APFloat &C) const; |
196 | SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
197 | |
198 | SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, |
199 | SDValue Op0, SDValue Op1) const; |
200 | SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, |
201 | SDValue Src, SDValue MinVal, SDValue MaxVal, |
202 | bool Signed) const; |
203 | SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
204 | SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const; |
205 | SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
206 | SDValue (SDNode *N, DAGCombinerInfo &DCI) const; |
207 | SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
208 | SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
209 | |
210 | SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const; |
211 | unsigned getFusedOpcode(const SelectionDAG &DAG, |
212 | const SDNode *N0, const SDNode *N1) const; |
213 | SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const; |
214 | SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
215 | SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
216 | SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
217 | SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
218 | SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
219 | SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
220 | SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const; |
221 | SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
222 | SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
223 | SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
224 | SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
225 | |
226 | bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace, |
227 | uint64_t FlatVariant) const; |
228 | bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; |
229 | |
230 | unsigned isCFIntrinsic(const SDNode *Intr) const; |
231 | |
232 | public: |
233 | /// \returns True if fixup needs to be emitted for given global value \p GV, |
234 | /// false otherwise. |
235 | bool shouldEmitFixup(const GlobalValue *GV) const; |
236 | |
237 | /// \returns True if GOT relocation needs to be emitted for given global value |
238 | /// \p GV, false otherwise. |
239 | bool shouldEmitGOTReloc(const GlobalValue *GV) const; |
240 | |
241 | /// \returns True if PC-relative relocation needs to be emitted for given |
242 | /// global value \p GV, false otherwise. |
243 | bool shouldEmitPCReloc(const GlobalValue *GV) const; |
244 | |
245 | /// \returns true if this should use a literal constant for an LDS address, |
246 | /// and not emit a relocation for an LDS global. |
247 | bool shouldUseLDSConstAddress(const GlobalValue *GV) const; |
248 | |
249 | /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be |
250 | /// expanded into a set of cmp/select instructions. |
251 | static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, |
252 | bool IsDivergentIdx, |
253 | const GCNSubtarget *Subtarget); |
254 | |
255 | bool shouldExpandVectorDynExt(SDNode *N) const; |
256 | |
257 | private: |
258 | // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the |
259 | // three offsets (voffset, soffset and instoffset) into the SDValue[3] array |
260 | // pointed to by Offsets. |
261 | void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, |
262 | SDValue *Offsets, Align Alignment = Align(4)) const; |
263 | |
264 | // Convert the i128 that an addrspace(8) pointer is natively represented as |
265 | // into the v4i32 that all the buffer intrinsics expect to receive. We can't |
266 | // add register classes for i128 on pain of the promotion logic going haywire, |
267 | // so this slightly ugly hack is what we've got. If passed a non-pointer |
268 | // argument (as would be seen in older buffer intrinsics), does nothing. |
269 | SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const; |
270 | |
271 | // Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code |
272 | // represents ptr addrspace(8)) using the flags specified in the intrinsic. |
273 | SDValue lowerPointerAsRsrcIntrin(SDNode *Op, SelectionDAG &DAG) const; |
274 | |
275 | // Handle 8 bit and 16 bit buffer loads |
276 | SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, |
277 | ArrayRef<SDValue> Ops, |
278 | MachineMemOperand *MMO) const; |
279 | |
280 | // Handle 8 bit and 16 bit buffer stores |
281 | SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType, |
282 | SDLoc DL, SDValue Ops[], |
283 | MemSDNode *M) const; |
284 | |
285 | public: |
286 | SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI); |
287 | |
288 | const GCNSubtarget *getSubtarget() const; |
289 | |
290 | bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, |
291 | EVT SrcVT) const override; |
292 | |
293 | bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, |
294 | LLT SrcTy) const override; |
295 | |
296 | bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override; |
297 | |
298 | // While address space 7 should never make it to codegen, it still needs to |
299 | // have a MVT to prevent some analyses that query this function from breaking, |
300 | // so, to work around the lack of i160, map it to v5i32. |
301 | MVT getPointerTy(const DataLayout &DL, unsigned AS) const override; |
302 | MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override; |
303 | |
304 | bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
305 | MachineFunction &MF, |
306 | unsigned IntrinsicID) const override; |
307 | |
308 | void CollectTargetIntrinsicOperands(const CallInst &I, |
309 | SmallVectorImpl<SDValue> &Ops, |
310 | SelectionDAG &DAG) const override; |
311 | |
312 | bool getAddrModeArguments(IntrinsicInst * /*I*/, |
313 | SmallVectorImpl<Value*> &/*Ops*/, |
314 | Type *&/*AccessTy*/) const override; |
315 | |
316 | bool isLegalGlobalAddressingMode(const AddrMode &AM) const; |
317 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
318 | unsigned AS, |
319 | Instruction *I = nullptr) const override; |
320 | |
321 | bool canMergeStoresTo(unsigned AS, EVT MemVT, |
322 | const MachineFunction &MF) const override; |
323 | |
324 | bool allowsMisalignedMemoryAccessesImpl( |
325 | unsigned Size, unsigned AddrSpace, Align Alignment, |
326 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
327 | unsigned *IsFast = nullptr) const; |
328 | |
329 | bool allowsMisalignedMemoryAccesses( |
330 | LLT Ty, unsigned AddrSpace, Align Alignment, |
331 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
332 | unsigned *IsFast = nullptr) const override { |
333 | if (IsFast) |
334 | *IsFast = 0; |
335 | return allowsMisalignedMemoryAccessesImpl(Size: Ty.getSizeInBits(), AddrSpace, |
336 | Alignment, Flags, IsFast); |
337 | } |
338 | |
339 | bool allowsMisalignedMemoryAccesses( |
340 | EVT VT, unsigned AS, Align Alignment, |
341 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
342 | unsigned *IsFast = nullptr) const override; |
343 | |
344 | EVT getOptimalMemOpType(const MemOp &Op, |
345 | const AttributeList &FuncAttributes) const override; |
346 | |
347 | bool isMemOpUniform(const SDNode *N) const; |
348 | bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const; |
349 | |
350 | static bool isNonGlobalAddrSpace(unsigned AS); |
351 | |
352 | bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; |
353 | |
354 | TargetLoweringBase::LegalizeTypeAction |
355 | getPreferredVectorAction(MVT VT) const override; |
356 | |
357 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
358 | Type *Ty) const override; |
359 | |
360 | bool (EVT ResVT, EVT SrcVT, |
361 | unsigned Index) const override; |
362 | |
363 | bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; |
364 | |
365 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
366 | |
367 | unsigned combineRepeatedFPDivisors() const override { |
368 | // Combine multiple FDIVs with the same divisor into multiple FMULs by the |
369 | // reciprocal. |
370 | return 2; |
371 | } |
372 | |
373 | bool supportSplitCSR(MachineFunction *MF) const override; |
374 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
375 | void insertCopiesSplitCSR( |
376 | MachineBasicBlock *Entry, |
377 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
378 | |
379 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
380 | bool isVarArg, |
381 | const SmallVectorImpl<ISD::InputArg> &Ins, |
382 | const SDLoc &DL, SelectionDAG &DAG, |
383 | SmallVectorImpl<SDValue> &InVals) const override; |
384 | |
385 | bool CanLowerReturn(CallingConv::ID CallConv, |
386 | MachineFunction &MF, bool isVarArg, |
387 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
388 | LLVMContext &Context) const override; |
389 | |
390 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
391 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
392 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
393 | SelectionDAG &DAG) const override; |
394 | |
395 | void passSpecialInputs( |
396 | CallLoweringInfo &CLI, |
397 | CCState &CCInfo, |
398 | const SIMachineFunctionInfo &Info, |
399 | SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, |
400 | SmallVectorImpl<SDValue> &MemOpChains, |
401 | SDValue Chain) const; |
402 | |
403 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
404 | CallingConv::ID CallConv, bool isVarArg, |
405 | const SmallVectorImpl<ISD::InputArg> &Ins, |
406 | const SDLoc &DL, SelectionDAG &DAG, |
407 | SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
408 | SDValue ThisVal) const; |
409 | |
410 | bool mayBeEmittedAsTailCall(const CallInst *) const override; |
411 | |
412 | bool isEligibleForTailCallOptimization( |
413 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
414 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
415 | const SmallVectorImpl<SDValue> &OutVals, |
416 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; |
417 | |
418 | SDValue LowerCall(CallLoweringInfo &CLI, |
419 | SmallVectorImpl<SDValue> &InVals) const override; |
420 | |
421 | SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const; |
422 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
423 | SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; |
424 | SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
425 | |
426 | SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; |
427 | SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
428 | SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
429 | SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
430 | |
431 | Register getRegisterByName(const char* RegName, LLT VT, |
432 | const MachineFunction &MF) const override; |
433 | |
434 | MachineBasicBlock *splitKillBlock(MachineInstr &MI, |
435 | MachineBasicBlock *BB) const; |
436 | |
437 | void bundleInstWithWaitcnt(MachineInstr &MI) const; |
438 | MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI, |
439 | MachineBasicBlock *BB) const; |
440 | |
441 | MachineBasicBlock * |
442 | EmitInstrWithCustomInserter(MachineInstr &MI, |
443 | MachineBasicBlock *BB) const override; |
444 | |
445 | bool enableAggressiveFMAFusion(EVT VT) const override; |
446 | bool enableAggressiveFMAFusion(LLT Ty) const override; |
447 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
448 | EVT VT) const override; |
449 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; |
450 | LLT getPreferredShiftAmountTy(LLT Ty) const override; |
451 | |
452 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
453 | EVT VT) const override; |
454 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
455 | const LLT Ty) const override; |
456 | bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override; |
457 | bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override; |
458 | |
459 | SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
460 | SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
461 | SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
462 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
463 | |
464 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
465 | SelectionDAG &DAG) const override; |
466 | |
467 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
468 | SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; |
469 | void AddMemOpInit(MachineInstr &MI) const; |
470 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
471 | SDNode *Node) const override; |
472 | |
473 | SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; |
474 | |
475 | MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, |
476 | SDValue Ptr) const; |
477 | MachineSDNode *buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, |
478 | uint32_t RsrcDword1, uint64_t RsrcDword2And3) const; |
479 | std::pair<unsigned, const TargetRegisterClass *> |
480 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
481 | StringRef Constraint, MVT VT) const override; |
482 | ConstraintType getConstraintType(StringRef Constraint) const override; |
483 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
484 | std::vector<SDValue> &Ops, |
485 | SelectionDAG &DAG) const override; |
486 | bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const; |
487 | bool checkAsmConstraintVal(SDValue Op, StringRef Constraint, |
488 | uint64_t Val) const; |
489 | bool checkAsmConstraintValA(SDValue Op, |
490 | uint64_t Val, |
491 | unsigned MaxSize = 64) const; |
492 | SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, |
493 | SDValue V) const; |
494 | |
495 | void finalizeLowering(MachineFunction &MF) const override; |
496 | |
497 | void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, |
498 | const APInt &DemandedElts, |
499 | const SelectionDAG &DAG, |
500 | unsigned Depth = 0) const override; |
501 | void computeKnownBitsForFrameIndex(int FrameIdx, |
502 | KnownBits &Known, |
503 | const MachineFunction &MF) const override; |
504 | void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, |
505 | KnownBits &Known, |
506 | const APInt &DemandedElts, |
507 | const MachineRegisterInfo &MRI, |
508 | unsigned Depth = 0) const override; |
509 | |
510 | Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, |
511 | const MachineRegisterInfo &MRI, |
512 | unsigned Depth = 0) const override; |
513 | bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, |
514 | UniformityInfo *UA) const override; |
515 | |
516 | bool hasMemSDNodeUser(SDNode *N) const; |
517 | |
518 | bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
519 | SDValue N1) const override; |
520 | |
521 | bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
522 | Register N1) const override; |
523 | |
524 | bool isCanonicalized(SelectionDAG &DAG, SDValue Op, |
525 | unsigned MaxDepth = 5) const; |
526 | bool isCanonicalized(Register Reg, const MachineFunction &MF, |
527 | unsigned MaxDepth = 5) const; |
528 | bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; |
529 | bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; |
530 | |
531 | bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
532 | const TargetRegisterInfo *TRI, |
533 | const TargetInstrInfo *TII, unsigned &PhysReg, |
534 | int &Cost) const override; |
535 | |
536 | bool isKnownNeverNaNForTargetNode(SDValue Op, |
537 | const SelectionDAG &DAG, |
538 | bool SNaN = false, |
539 | unsigned Depth = 0) const override; |
540 | AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; |
541 | AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
542 | AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
543 | AtomicExpansionKind |
544 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
545 | void emitExpandAtomicRMW(AtomicRMWInst *AI) const override; |
546 | |
547 | LoadInst * |
548 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; |
549 | |
550 | const TargetRegisterClass *getRegClassFor(MVT VT, |
551 | bool isDivergent) const override; |
552 | bool requiresUniformRegister(MachineFunction &MF, |
553 | const Value *V) const override; |
554 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
555 | |
556 | void allocateHSAUserSGPRs(CCState &CCInfo, |
557 | MachineFunction &MF, |
558 | const SIRegisterInfo &TRI, |
559 | SIMachineFunctionInfo &Info) const; |
560 | |
561 | void allocatePreloadKernArgSGPRs(CCState &CCInfo, |
562 | SmallVectorImpl<CCValAssign> &ArgLocs, |
563 | const SmallVectorImpl<ISD::InputArg> &Ins, |
564 | MachineFunction &MF, |
565 | const SIRegisterInfo &TRI, |
566 | SIMachineFunctionInfo &Info) const; |
567 | |
568 | void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF, |
569 | const SIRegisterInfo &TRI, |
570 | SIMachineFunctionInfo &Info) const; |
571 | |
572 | void allocateSystemSGPRs(CCState &CCInfo, |
573 | MachineFunction &MF, |
574 | SIMachineFunctionInfo &Info, |
575 | CallingConv::ID CallConv, |
576 | bool IsShader) const; |
577 | |
578 | void allocateSpecialEntryInputVGPRs(CCState &CCInfo, |
579 | MachineFunction &MF, |
580 | const SIRegisterInfo &TRI, |
581 | SIMachineFunctionInfo &Info) const; |
582 | void allocateSpecialInputSGPRs( |
583 | CCState &CCInfo, |
584 | MachineFunction &MF, |
585 | const SIRegisterInfo &TRI, |
586 | SIMachineFunctionInfo &Info) const; |
587 | |
588 | void allocateSpecialInputVGPRs(CCState &CCInfo, |
589 | MachineFunction &MF, |
590 | const SIRegisterInfo &TRI, |
591 | SIMachineFunctionInfo &Info) const; |
592 | void allocateSpecialInputVGPRsFixed(CCState &CCInfo, |
593 | MachineFunction &MF, |
594 | const SIRegisterInfo &TRI, |
595 | SIMachineFunctionInfo &Info) const; |
596 | |
597 | MachineMemOperand::Flags |
598 | getTargetMMOFlags(const Instruction &I) const override; |
599 | }; |
600 | |
601 | // Returns true if argument is a boolean value which is not serialized into |
602 | // memory or argument and does not require v_cndmask_b32 to be deserialized. |
603 | bool isBoolSGPR(SDValue V); |
604 | |
605 | } // End namespace llvm |
606 | |
607 | #endif |
608 | |