1 | //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H |
16 | |
17 | #include "NVPTX.h" |
18 | #include "llvm/CodeGen/SelectionDAG.h" |
19 | #include "llvm/CodeGen/TargetLowering.h" |
20 | |
21 | namespace llvm { |
22 | namespace NVPTXISD { |
23 | enum NodeType : unsigned { |
24 | // Start the numbering from where ISD NodeType finishes. |
25 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
26 | Wrapper, |
27 | CALL, |
28 | RET_GLUE, |
29 | LOAD_PARAM, |
30 | DeclareParam, |
31 | DeclareScalarParam, |
32 | DeclareRetParam, |
33 | DeclareRet, |
34 | DeclareScalarRet, |
35 | PrintCall, |
36 | PrintConvergentCall, |
37 | PrintCallUni, |
38 | PrintConvergentCallUni, |
39 | CallArgBegin, |
40 | CallArg, |
41 | LastCallArg, |
42 | CallArgEnd, |
43 | CallVoid, |
44 | CallVal, |
45 | CallSymbol, |
46 | Prototype, |
47 | MoveParam, |
48 | PseudoUseParam, |
49 | RETURN, |
50 | CallSeqBegin, |
51 | CallSeqEnd, |
52 | CallPrototype, |
53 | ProxyReg, |
54 | FUN_SHFL_CLAMP, |
55 | FUN_SHFR_CLAMP, |
56 | MUL_WIDE_SIGNED, |
57 | MUL_WIDE_UNSIGNED, |
58 | IMAD, |
59 | SETP_F16X2, |
60 | SETP_BF16X2, |
61 | BFE, |
62 | BFI, |
63 | PRMT, |
64 | DYNAMIC_STACKALLOC, |
65 | Dummy, |
66 | |
67 | LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, |
68 | LoadV4, |
69 | LDGV2, // LDG.v2 |
70 | LDGV4, // LDG.v4 |
71 | LDUV2, // LDU.v2 |
72 | LDUV4, // LDU.v4 |
73 | StoreV2, |
74 | StoreV4, |
75 | LoadParam, |
76 | LoadParamV2, |
77 | LoadParamV4, |
78 | StoreParam, |
79 | StoreParamV2, |
80 | StoreParamV4, |
81 | StoreParamS32, // to sext and store a <32bit value, not used currently |
82 | StoreParamU32, // to zext and store a <32bit value, not used currently |
83 | StoreRetval, |
84 | StoreRetvalV2, |
85 | StoreRetvalV4, |
86 | |
87 | // Texture intrinsics |
88 | Tex1DFloatS32, |
89 | Tex1DFloatFloat, |
90 | Tex1DFloatFloatLevel, |
91 | Tex1DFloatFloatGrad, |
92 | Tex1DS32S32, |
93 | Tex1DS32Float, |
94 | Tex1DS32FloatLevel, |
95 | Tex1DS32FloatGrad, |
96 | Tex1DU32S32, |
97 | Tex1DU32Float, |
98 | Tex1DU32FloatLevel, |
99 | Tex1DU32FloatGrad, |
100 | Tex1DArrayFloatS32, |
101 | Tex1DArrayFloatFloat, |
102 | Tex1DArrayFloatFloatLevel, |
103 | Tex1DArrayFloatFloatGrad, |
104 | Tex1DArrayS32S32, |
105 | Tex1DArrayS32Float, |
106 | Tex1DArrayS32FloatLevel, |
107 | Tex1DArrayS32FloatGrad, |
108 | Tex1DArrayU32S32, |
109 | Tex1DArrayU32Float, |
110 | Tex1DArrayU32FloatLevel, |
111 | Tex1DArrayU32FloatGrad, |
112 | Tex2DFloatS32, |
113 | Tex2DFloatFloat, |
114 | Tex2DFloatFloatLevel, |
115 | Tex2DFloatFloatGrad, |
116 | Tex2DS32S32, |
117 | Tex2DS32Float, |
118 | Tex2DS32FloatLevel, |
119 | Tex2DS32FloatGrad, |
120 | Tex2DU32S32, |
121 | Tex2DU32Float, |
122 | Tex2DU32FloatLevel, |
123 | Tex2DU32FloatGrad, |
124 | Tex2DArrayFloatS32, |
125 | Tex2DArrayFloatFloat, |
126 | Tex2DArrayFloatFloatLevel, |
127 | Tex2DArrayFloatFloatGrad, |
128 | Tex2DArrayS32S32, |
129 | Tex2DArrayS32Float, |
130 | Tex2DArrayS32FloatLevel, |
131 | Tex2DArrayS32FloatGrad, |
132 | Tex2DArrayU32S32, |
133 | Tex2DArrayU32Float, |
134 | Tex2DArrayU32FloatLevel, |
135 | Tex2DArrayU32FloatGrad, |
136 | Tex3DFloatS32, |
137 | Tex3DFloatFloat, |
138 | Tex3DFloatFloatLevel, |
139 | Tex3DFloatFloatGrad, |
140 | Tex3DS32S32, |
141 | Tex3DS32Float, |
142 | Tex3DS32FloatLevel, |
143 | Tex3DS32FloatGrad, |
144 | Tex3DU32S32, |
145 | Tex3DU32Float, |
146 | Tex3DU32FloatLevel, |
147 | Tex3DU32FloatGrad, |
148 | TexCubeFloatFloat, |
149 | TexCubeFloatFloatLevel, |
150 | TexCubeS32Float, |
151 | TexCubeS32FloatLevel, |
152 | TexCubeU32Float, |
153 | TexCubeU32FloatLevel, |
154 | TexCubeArrayFloatFloat, |
155 | TexCubeArrayFloatFloatLevel, |
156 | TexCubeArrayS32Float, |
157 | TexCubeArrayS32FloatLevel, |
158 | TexCubeArrayU32Float, |
159 | TexCubeArrayU32FloatLevel, |
160 | Tld4R2DFloatFloat, |
161 | Tld4G2DFloatFloat, |
162 | Tld4B2DFloatFloat, |
163 | Tld4A2DFloatFloat, |
164 | Tld4R2DS64Float, |
165 | Tld4G2DS64Float, |
166 | Tld4B2DS64Float, |
167 | Tld4A2DS64Float, |
168 | Tld4R2DU64Float, |
169 | Tld4G2DU64Float, |
170 | Tld4B2DU64Float, |
171 | Tld4A2DU64Float, |
172 | TexUnified1DFloatS32, |
173 | TexUnified1DFloatFloat, |
174 | TexUnified1DFloatFloatLevel, |
175 | TexUnified1DFloatFloatGrad, |
176 | TexUnified1DS32S32, |
177 | TexUnified1DS32Float, |
178 | TexUnified1DS32FloatLevel, |
179 | TexUnified1DS32FloatGrad, |
180 | TexUnified1DU32S32, |
181 | TexUnified1DU32Float, |
182 | TexUnified1DU32FloatLevel, |
183 | TexUnified1DU32FloatGrad, |
184 | TexUnified1DArrayFloatS32, |
185 | TexUnified1DArrayFloatFloat, |
186 | TexUnified1DArrayFloatFloatLevel, |
187 | TexUnified1DArrayFloatFloatGrad, |
188 | TexUnified1DArrayS32S32, |
189 | TexUnified1DArrayS32Float, |
190 | TexUnified1DArrayS32FloatLevel, |
191 | TexUnified1DArrayS32FloatGrad, |
192 | TexUnified1DArrayU32S32, |
193 | TexUnified1DArrayU32Float, |
194 | TexUnified1DArrayU32FloatLevel, |
195 | TexUnified1DArrayU32FloatGrad, |
196 | TexUnified2DFloatS32, |
197 | TexUnified2DFloatFloat, |
198 | TexUnified2DFloatFloatLevel, |
199 | TexUnified2DFloatFloatGrad, |
200 | TexUnified2DS32S32, |
201 | TexUnified2DS32Float, |
202 | TexUnified2DS32FloatLevel, |
203 | TexUnified2DS32FloatGrad, |
204 | TexUnified2DU32S32, |
205 | TexUnified2DU32Float, |
206 | TexUnified2DU32FloatLevel, |
207 | TexUnified2DU32FloatGrad, |
208 | TexUnified2DArrayFloatS32, |
209 | TexUnified2DArrayFloatFloat, |
210 | TexUnified2DArrayFloatFloatLevel, |
211 | TexUnified2DArrayFloatFloatGrad, |
212 | TexUnified2DArrayS32S32, |
213 | TexUnified2DArrayS32Float, |
214 | TexUnified2DArrayS32FloatLevel, |
215 | TexUnified2DArrayS32FloatGrad, |
216 | TexUnified2DArrayU32S32, |
217 | TexUnified2DArrayU32Float, |
218 | TexUnified2DArrayU32FloatLevel, |
219 | TexUnified2DArrayU32FloatGrad, |
220 | TexUnified3DFloatS32, |
221 | TexUnified3DFloatFloat, |
222 | TexUnified3DFloatFloatLevel, |
223 | TexUnified3DFloatFloatGrad, |
224 | TexUnified3DS32S32, |
225 | TexUnified3DS32Float, |
226 | TexUnified3DS32FloatLevel, |
227 | TexUnified3DS32FloatGrad, |
228 | TexUnified3DU32S32, |
229 | TexUnified3DU32Float, |
230 | TexUnified3DU32FloatLevel, |
231 | TexUnified3DU32FloatGrad, |
232 | TexUnifiedCubeFloatFloat, |
233 | TexUnifiedCubeFloatFloatLevel, |
234 | TexUnifiedCubeS32Float, |
235 | TexUnifiedCubeS32FloatLevel, |
236 | TexUnifiedCubeU32Float, |
237 | TexUnifiedCubeU32FloatLevel, |
238 | TexUnifiedCubeArrayFloatFloat, |
239 | TexUnifiedCubeArrayFloatFloatLevel, |
240 | TexUnifiedCubeArrayS32Float, |
241 | TexUnifiedCubeArrayS32FloatLevel, |
242 | TexUnifiedCubeArrayU32Float, |
243 | TexUnifiedCubeArrayU32FloatLevel, |
244 | TexUnifiedCubeFloatFloatGrad, |
245 | TexUnifiedCubeS32FloatGrad, |
246 | TexUnifiedCubeU32FloatGrad, |
247 | TexUnifiedCubeArrayFloatFloatGrad, |
248 | TexUnifiedCubeArrayS32FloatGrad, |
249 | TexUnifiedCubeArrayU32FloatGrad, |
250 | Tld4UnifiedR2DFloatFloat, |
251 | Tld4UnifiedG2DFloatFloat, |
252 | Tld4UnifiedB2DFloatFloat, |
253 | Tld4UnifiedA2DFloatFloat, |
254 | Tld4UnifiedR2DS64Float, |
255 | Tld4UnifiedG2DS64Float, |
256 | Tld4UnifiedB2DS64Float, |
257 | Tld4UnifiedA2DS64Float, |
258 | Tld4UnifiedR2DU64Float, |
259 | Tld4UnifiedG2DU64Float, |
260 | Tld4UnifiedB2DU64Float, |
261 | Tld4UnifiedA2DU64Float, |
262 | |
263 | // Surface intrinsics |
264 | Suld1DI8Clamp, |
265 | Suld1DI16Clamp, |
266 | Suld1DI32Clamp, |
267 | Suld1DI64Clamp, |
268 | Suld1DV2I8Clamp, |
269 | Suld1DV2I16Clamp, |
270 | Suld1DV2I32Clamp, |
271 | Suld1DV2I64Clamp, |
272 | Suld1DV4I8Clamp, |
273 | Suld1DV4I16Clamp, |
274 | Suld1DV4I32Clamp, |
275 | |
276 | Suld1DArrayI8Clamp, |
277 | Suld1DArrayI16Clamp, |
278 | Suld1DArrayI32Clamp, |
279 | Suld1DArrayI64Clamp, |
280 | Suld1DArrayV2I8Clamp, |
281 | Suld1DArrayV2I16Clamp, |
282 | Suld1DArrayV2I32Clamp, |
283 | Suld1DArrayV2I64Clamp, |
284 | Suld1DArrayV4I8Clamp, |
285 | Suld1DArrayV4I16Clamp, |
286 | Suld1DArrayV4I32Clamp, |
287 | |
288 | Suld2DI8Clamp, |
289 | Suld2DI16Clamp, |
290 | Suld2DI32Clamp, |
291 | Suld2DI64Clamp, |
292 | Suld2DV2I8Clamp, |
293 | Suld2DV2I16Clamp, |
294 | Suld2DV2I32Clamp, |
295 | Suld2DV2I64Clamp, |
296 | Suld2DV4I8Clamp, |
297 | Suld2DV4I16Clamp, |
298 | Suld2DV4I32Clamp, |
299 | |
300 | Suld2DArrayI8Clamp, |
301 | Suld2DArrayI16Clamp, |
302 | Suld2DArrayI32Clamp, |
303 | Suld2DArrayI64Clamp, |
304 | Suld2DArrayV2I8Clamp, |
305 | Suld2DArrayV2I16Clamp, |
306 | Suld2DArrayV2I32Clamp, |
307 | Suld2DArrayV2I64Clamp, |
308 | Suld2DArrayV4I8Clamp, |
309 | Suld2DArrayV4I16Clamp, |
310 | Suld2DArrayV4I32Clamp, |
311 | |
312 | Suld3DI8Clamp, |
313 | Suld3DI16Clamp, |
314 | Suld3DI32Clamp, |
315 | Suld3DI64Clamp, |
316 | Suld3DV2I8Clamp, |
317 | Suld3DV2I16Clamp, |
318 | Suld3DV2I32Clamp, |
319 | Suld3DV2I64Clamp, |
320 | Suld3DV4I8Clamp, |
321 | Suld3DV4I16Clamp, |
322 | Suld3DV4I32Clamp, |
323 | |
324 | Suld1DI8Trap, |
325 | Suld1DI16Trap, |
326 | Suld1DI32Trap, |
327 | Suld1DI64Trap, |
328 | Suld1DV2I8Trap, |
329 | Suld1DV2I16Trap, |
330 | Suld1DV2I32Trap, |
331 | Suld1DV2I64Trap, |
332 | Suld1DV4I8Trap, |
333 | Suld1DV4I16Trap, |
334 | Suld1DV4I32Trap, |
335 | |
336 | Suld1DArrayI8Trap, |
337 | Suld1DArrayI16Trap, |
338 | Suld1DArrayI32Trap, |
339 | Suld1DArrayI64Trap, |
340 | Suld1DArrayV2I8Trap, |
341 | Suld1DArrayV2I16Trap, |
342 | Suld1DArrayV2I32Trap, |
343 | Suld1DArrayV2I64Trap, |
344 | Suld1DArrayV4I8Trap, |
345 | Suld1DArrayV4I16Trap, |
346 | Suld1DArrayV4I32Trap, |
347 | |
348 | Suld2DI8Trap, |
349 | Suld2DI16Trap, |
350 | Suld2DI32Trap, |
351 | Suld2DI64Trap, |
352 | Suld2DV2I8Trap, |
353 | Suld2DV2I16Trap, |
354 | Suld2DV2I32Trap, |
355 | Suld2DV2I64Trap, |
356 | Suld2DV4I8Trap, |
357 | Suld2DV4I16Trap, |
358 | Suld2DV4I32Trap, |
359 | |
360 | Suld2DArrayI8Trap, |
361 | Suld2DArrayI16Trap, |
362 | Suld2DArrayI32Trap, |
363 | Suld2DArrayI64Trap, |
364 | Suld2DArrayV2I8Trap, |
365 | Suld2DArrayV2I16Trap, |
366 | Suld2DArrayV2I32Trap, |
367 | Suld2DArrayV2I64Trap, |
368 | Suld2DArrayV4I8Trap, |
369 | Suld2DArrayV4I16Trap, |
370 | Suld2DArrayV4I32Trap, |
371 | |
372 | Suld3DI8Trap, |
373 | Suld3DI16Trap, |
374 | Suld3DI32Trap, |
375 | Suld3DI64Trap, |
376 | Suld3DV2I8Trap, |
377 | Suld3DV2I16Trap, |
378 | Suld3DV2I32Trap, |
379 | Suld3DV2I64Trap, |
380 | Suld3DV4I8Trap, |
381 | Suld3DV4I16Trap, |
382 | Suld3DV4I32Trap, |
383 | |
384 | Suld1DI8Zero, |
385 | Suld1DI16Zero, |
386 | Suld1DI32Zero, |
387 | Suld1DI64Zero, |
388 | Suld1DV2I8Zero, |
389 | Suld1DV2I16Zero, |
390 | Suld1DV2I32Zero, |
391 | Suld1DV2I64Zero, |
392 | Suld1DV4I8Zero, |
393 | Suld1DV4I16Zero, |
394 | Suld1DV4I32Zero, |
395 | |
396 | Suld1DArrayI8Zero, |
397 | Suld1DArrayI16Zero, |
398 | Suld1DArrayI32Zero, |
399 | Suld1DArrayI64Zero, |
400 | Suld1DArrayV2I8Zero, |
401 | Suld1DArrayV2I16Zero, |
402 | Suld1DArrayV2I32Zero, |
403 | Suld1DArrayV2I64Zero, |
404 | Suld1DArrayV4I8Zero, |
405 | Suld1DArrayV4I16Zero, |
406 | Suld1DArrayV4I32Zero, |
407 | |
408 | Suld2DI8Zero, |
409 | Suld2DI16Zero, |
410 | Suld2DI32Zero, |
411 | Suld2DI64Zero, |
412 | Suld2DV2I8Zero, |
413 | Suld2DV2I16Zero, |
414 | Suld2DV2I32Zero, |
415 | Suld2DV2I64Zero, |
416 | Suld2DV4I8Zero, |
417 | Suld2DV4I16Zero, |
418 | Suld2DV4I32Zero, |
419 | |
420 | Suld2DArrayI8Zero, |
421 | Suld2DArrayI16Zero, |
422 | Suld2DArrayI32Zero, |
423 | Suld2DArrayI64Zero, |
424 | Suld2DArrayV2I8Zero, |
425 | Suld2DArrayV2I16Zero, |
426 | Suld2DArrayV2I32Zero, |
427 | Suld2DArrayV2I64Zero, |
428 | Suld2DArrayV4I8Zero, |
429 | Suld2DArrayV4I16Zero, |
430 | Suld2DArrayV4I32Zero, |
431 | |
432 | Suld3DI8Zero, |
433 | Suld3DI16Zero, |
434 | Suld3DI32Zero, |
435 | Suld3DI64Zero, |
436 | Suld3DV2I8Zero, |
437 | Suld3DV2I16Zero, |
438 | Suld3DV2I32Zero, |
439 | Suld3DV2I64Zero, |
440 | Suld3DV4I8Zero, |
441 | Suld3DV4I16Zero, |
442 | Suld3DV4I32Zero |
443 | }; |
444 | } |
445 | |
446 | class NVPTXSubtarget; |
447 | |
448 | //===--------------------------------------------------------------------===// |
449 | // TargetLowering Implementation |
450 | //===--------------------------------------------------------------------===// |
451 | class NVPTXTargetLowering : public TargetLowering { |
452 | public: |
453 | explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, |
454 | const NVPTXSubtarget &STI); |
455 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
456 | |
457 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
458 | |
459 | const char *getTargetNodeName(unsigned Opcode) const override; |
460 | |
461 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
462 | MachineFunction &MF, |
463 | unsigned Intrinsic) const override; |
464 | |
465 | /// getFunctionParamOptimizedAlign - since function arguments are passed via |
466 | /// .param space, we may want to increase their alignment in a way that |
467 | /// ensures that we can effectively vectorize their loads & stores. We can |
468 | /// increase alignment only if the function has internal or has private |
469 | /// linkage as for other linkage types callers may already rely on default |
470 | /// alignment. To allow using 128-bit vectorized loads/stores, this function |
471 | /// ensures that alignment is 16 or greater. |
472 | Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, |
473 | const DataLayout &DL) const; |
474 | |
475 | /// Helper for computing alignment of a device function byval parameter. |
476 | Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, |
477 | Align InitialAlign, |
478 | const DataLayout &DL) const; |
479 | |
480 | // Helper for getting a function parameter name. Name is composed from |
481 | // its index and the function name. Negative index corresponds to special |
482 | // parameter (unsized array) used for passing variable arguments. |
483 | std::string getParamName(const Function *F, int Idx) const; |
484 | |
485 | /// isLegalAddressingMode - Return true if the addressing mode represented |
486 | /// by AM is legal for this target, for a load/store of the specified type |
487 | /// Used to guide target specific optimizations, like loop strength |
488 | /// reduction (LoopStrengthReduce.cpp) and memory optimization for |
489 | /// address mode (CodeGenPrepare.cpp) |
490 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
491 | unsigned AS, |
492 | Instruction *I = nullptr) const override; |
493 | |
494 | bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { |
495 | // Truncating 64-bit to 32-bit is free in SASS. |
496 | if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
497 | return false; |
498 | return SrcTy->getPrimitiveSizeInBits() == 64 && |
499 | DstTy->getPrimitiveSizeInBits() == 32; |
500 | } |
501 | |
502 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, |
503 | EVT VT) const override { |
504 | if (VT.isVector()) |
505 | return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); |
506 | return MVT::i1; |
507 | } |
508 | |
509 | ConstraintType getConstraintType(StringRef Constraint) const override; |
510 | std::pair<unsigned, const TargetRegisterClass *> |
511 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
512 | StringRef Constraint, MVT VT) const override; |
513 | |
514 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
515 | bool isVarArg, |
516 | const SmallVectorImpl<ISD::InputArg> &Ins, |
517 | const SDLoc &dl, SelectionDAG &DAG, |
518 | SmallVectorImpl<SDValue> &InVals) const override; |
519 | |
520 | SDValue LowerCall(CallLoweringInfo &CLI, |
521 | SmallVectorImpl<SDValue> &InVals) const override; |
522 | |
523 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
524 | |
525 | std::string |
526 | getPrototype(const DataLayout &DL, Type *, const ArgListTy &, |
527 | const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment, |
528 | std::optional<std::pair<unsigned, const APInt &>> VAInfo, |
529 | const CallBase &CB, unsigned UniqueCallSite) const; |
530 | |
531 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
532 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
533 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, |
534 | SelectionDAG &DAG) const override; |
535 | |
536 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
537 | std::vector<SDValue> &Ops, |
538 | SelectionDAG &DAG) const override; |
539 | |
540 | const NVPTXTargetMachine *nvTM; |
541 | |
542 | // PTX always uses 32-bit shift amounts |
543 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { |
544 | return MVT::i32; |
545 | } |
546 | |
547 | TargetLoweringBase::LegalizeTypeAction |
548 | getPreferredVectorAction(MVT VT) const override; |
549 | |
550 | // Get the degree of precision we want from 32-bit floating point division |
551 | // operations. |
552 | // |
553 | // 0 - Use ptx div.approx |
554 | // 1 - Use ptx.div.full (approximate, but less so than div.approx) |
555 | // 2 - Use IEEE-compliant div instructions, if available. |
556 | int getDivF32Level() const; |
557 | |
558 | // Get whether we should use a precise or approximate 32-bit floating point |
559 | // sqrt instruction. |
560 | bool usePrecSqrtF32() const; |
561 | |
562 | // Get whether we should use instructions that flush floating-point denormals |
563 | // to sign-preserving zero. |
564 | bool useF32FTZ(const MachineFunction &MF) const; |
565 | |
566 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
567 | int &, bool &UseOneConst, |
568 | bool Reciprocal) const override; |
569 | |
570 | unsigned combineRepeatedFPDivisors() const override { return 2; } |
571 | |
572 | bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const; |
573 | bool allowUnsafeFPMath(MachineFunction &MF) const; |
574 | |
575 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
576 | EVT) const override { |
577 | return true; |
578 | } |
579 | |
580 | bool enableAggressiveFMAFusion(EVT VT) const override { return true; } |
581 | |
582 | // The default is to transform llvm.ctlz(x, false) (where false indicates that |
583 | // x == 0 is not undefined behavior) into a branch that checks whether x is 0 |
584 | // and avoids calling ctlz in that case. We have a dedicated ctlz |
585 | // instruction, so we say that ctlz is cheap to speculate. |
586 | bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } |
587 | |
588 | AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { |
589 | return AtomicExpansionKind::None; |
590 | } |
591 | |
592 | AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { |
593 | return AtomicExpansionKind::None; |
594 | } |
595 | |
596 | AtomicExpansionKind |
597 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
598 | |
599 | bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { |
600 | // There's rarely any point of packing something into a vector type if we |
601 | // already have the source data. |
602 | return true; |
603 | } |
604 | |
605 | private: |
606 | const NVPTXSubtarget &STI; // cache the subtarget here |
607 | SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; |
608 | |
609 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
610 | SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
611 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
612 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
613 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
614 | |
615 | SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; |
616 | SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; |
617 | SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; |
618 | |
619 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
620 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
621 | |
622 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
623 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
624 | |
625 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
626 | SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; |
627 | |
628 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
629 | SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; |
630 | SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; |
631 | |
632 | SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; |
633 | SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; |
634 | |
635 | SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; |
636 | |
637 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
638 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
639 | |
640 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
641 | SelectionDAG &DAG) const override; |
642 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
643 | |
644 | Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx, |
645 | const DataLayout &DL) const; |
646 | }; |
647 | |
648 | } // namespace llvm |
649 | |
650 | #endif |
651 | |