1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the NVPTX target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXISelDAGToDAG.h"
14#include "MCTargetDesc/NVPTXBaseInfo.h"
15#include "NVPTXUtilities.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/Instructions.h"
20#include "llvm/IR/IntrinsicsNVPTX.h"
21#include "llvm/Support/AtomicOrdering.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/raw_ostream.h"
26#include "llvm/Target/TargetIntrinsicInfo.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
32
33static cl::opt<bool>
34 EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(Val: true), cl::Hidden,
35 cl::desc("Enable reciprocal sqrt optimization"));
36
37/// createNVPTXISelDag - This pass converts a legalized DAG into a
38/// NVPTX-specific DAG, ready for instruction scheduling.
39FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
40 llvm::CodeGenOptLevel OptLevel) {
41 return new NVPTXDAGToDAGISel(TM, OptLevel);
42}
43
44char NVPTXDAGToDAGISel::ID = 0;
45
46INITIALIZE_PASS(NVPTXDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
47
48NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
49 CodeGenOptLevel OptLevel)
50 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {
51 doMulWide = (OptLevel > CodeGenOptLevel::None);
52}
53
54bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
55 Subtarget = &MF.getSubtarget<NVPTXSubtarget>();
56 return SelectionDAGISel::runOnMachineFunction(MF);
57}
58
59int NVPTXDAGToDAGISel::getDivF32Level() const {
60 return Subtarget->getTargetLowering()->getDivF32Level();
61}
62
63bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
64 return Subtarget->getTargetLowering()->usePrecSqrtF32();
65}
66
67bool NVPTXDAGToDAGISel::useF32FTZ() const {
68 return Subtarget->getTargetLowering()->useF32FTZ(MF: *MF);
69}
70
71bool NVPTXDAGToDAGISel::allowFMA() const {
72 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
73 return TL->allowFMA(MF&: *MF, OptLevel);
74}
75
76bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
77 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
78 return TL->allowUnsafeFPMath(MF&: *MF);
79}
80
81bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
82
83/// Select - Select instructions not customized! Used for
84/// expanded, promoted and normal instructions.
85void NVPTXDAGToDAGISel::Select(SDNode *N) {
86
87 if (N->isMachineOpcode()) {
88 N->setNodeId(-1);
89 return; // Already selected.
90 }
91
92 switch (N->getOpcode()) {
93 case ISD::LOAD:
94 case ISD::ATOMIC_LOAD:
95 if (tryLoad(N))
96 return;
97 break;
98 case ISD::STORE:
99 case ISD::ATOMIC_STORE:
100 if (tryStore(N))
101 return;
102 break;
103 case ISD::EXTRACT_VECTOR_ELT:
104 if (tryEXTRACT_VECTOR_ELEMENT(N))
105 return;
106 break;
107 case NVPTXISD::SETP_F16X2:
108 SelectSETP_F16X2(N);
109 return;
110 case NVPTXISD::SETP_BF16X2:
111 SelectSETP_BF16X2(N);
112 return;
113 case NVPTXISD::LoadV2:
114 case NVPTXISD::LoadV4:
115 if (tryLoadVector(N))
116 return;
117 break;
118 case NVPTXISD::LDGV2:
119 case NVPTXISD::LDGV4:
120 case NVPTXISD::LDUV2:
121 case NVPTXISD::LDUV4:
122 if (tryLDGLDU(N))
123 return;
124 break;
125 case NVPTXISD::StoreV2:
126 case NVPTXISD::StoreV4:
127 if (tryStoreVector(N))
128 return;
129 break;
130 case NVPTXISD::LoadParam:
131 case NVPTXISD::LoadParamV2:
132 case NVPTXISD::LoadParamV4:
133 if (tryLoadParam(N))
134 return;
135 break;
136 case NVPTXISD::StoreRetval:
137 case NVPTXISD::StoreRetvalV2:
138 case NVPTXISD::StoreRetvalV4:
139 if (tryStoreRetval(N))
140 return;
141 break;
142 case NVPTXISD::StoreParam:
143 case NVPTXISD::StoreParamV2:
144 case NVPTXISD::StoreParamV4:
145 case NVPTXISD::StoreParamS32:
146 case NVPTXISD::StoreParamU32:
147 if (tryStoreParam(N))
148 return;
149 break;
150 case ISD::INTRINSIC_WO_CHAIN:
151 if (tryIntrinsicNoChain(N))
152 return;
153 break;
154 case ISD::INTRINSIC_W_CHAIN:
155 if (tryIntrinsicChain(N))
156 return;
157 break;
158 case NVPTXISD::Tex1DFloatS32:
159 case NVPTXISD::Tex1DFloatFloat:
160 case NVPTXISD::Tex1DFloatFloatLevel:
161 case NVPTXISD::Tex1DFloatFloatGrad:
162 case NVPTXISD::Tex1DS32S32:
163 case NVPTXISD::Tex1DS32Float:
164 case NVPTXISD::Tex1DS32FloatLevel:
165 case NVPTXISD::Tex1DS32FloatGrad:
166 case NVPTXISD::Tex1DU32S32:
167 case NVPTXISD::Tex1DU32Float:
168 case NVPTXISD::Tex1DU32FloatLevel:
169 case NVPTXISD::Tex1DU32FloatGrad:
170 case NVPTXISD::Tex1DArrayFloatS32:
171 case NVPTXISD::Tex1DArrayFloatFloat:
172 case NVPTXISD::Tex1DArrayFloatFloatLevel:
173 case NVPTXISD::Tex1DArrayFloatFloatGrad:
174 case NVPTXISD::Tex1DArrayS32S32:
175 case NVPTXISD::Tex1DArrayS32Float:
176 case NVPTXISD::Tex1DArrayS32FloatLevel:
177 case NVPTXISD::Tex1DArrayS32FloatGrad:
178 case NVPTXISD::Tex1DArrayU32S32:
179 case NVPTXISD::Tex1DArrayU32Float:
180 case NVPTXISD::Tex1DArrayU32FloatLevel:
181 case NVPTXISD::Tex1DArrayU32FloatGrad:
182 case NVPTXISD::Tex2DFloatS32:
183 case NVPTXISD::Tex2DFloatFloat:
184 case NVPTXISD::Tex2DFloatFloatLevel:
185 case NVPTXISD::Tex2DFloatFloatGrad:
186 case NVPTXISD::Tex2DS32S32:
187 case NVPTXISD::Tex2DS32Float:
188 case NVPTXISD::Tex2DS32FloatLevel:
189 case NVPTXISD::Tex2DS32FloatGrad:
190 case NVPTXISD::Tex2DU32S32:
191 case NVPTXISD::Tex2DU32Float:
192 case NVPTXISD::Tex2DU32FloatLevel:
193 case NVPTXISD::Tex2DU32FloatGrad:
194 case NVPTXISD::Tex2DArrayFloatS32:
195 case NVPTXISD::Tex2DArrayFloatFloat:
196 case NVPTXISD::Tex2DArrayFloatFloatLevel:
197 case NVPTXISD::Tex2DArrayFloatFloatGrad:
198 case NVPTXISD::Tex2DArrayS32S32:
199 case NVPTXISD::Tex2DArrayS32Float:
200 case NVPTXISD::Tex2DArrayS32FloatLevel:
201 case NVPTXISD::Tex2DArrayS32FloatGrad:
202 case NVPTXISD::Tex2DArrayU32S32:
203 case NVPTXISD::Tex2DArrayU32Float:
204 case NVPTXISD::Tex2DArrayU32FloatLevel:
205 case NVPTXISD::Tex2DArrayU32FloatGrad:
206 case NVPTXISD::Tex3DFloatS32:
207 case NVPTXISD::Tex3DFloatFloat:
208 case NVPTXISD::Tex3DFloatFloatLevel:
209 case NVPTXISD::Tex3DFloatFloatGrad:
210 case NVPTXISD::Tex3DS32S32:
211 case NVPTXISD::Tex3DS32Float:
212 case NVPTXISD::Tex3DS32FloatLevel:
213 case NVPTXISD::Tex3DS32FloatGrad:
214 case NVPTXISD::Tex3DU32S32:
215 case NVPTXISD::Tex3DU32Float:
216 case NVPTXISD::Tex3DU32FloatLevel:
217 case NVPTXISD::Tex3DU32FloatGrad:
218 case NVPTXISD::TexCubeFloatFloat:
219 case NVPTXISD::TexCubeFloatFloatLevel:
220 case NVPTXISD::TexCubeS32Float:
221 case NVPTXISD::TexCubeS32FloatLevel:
222 case NVPTXISD::TexCubeU32Float:
223 case NVPTXISD::TexCubeU32FloatLevel:
224 case NVPTXISD::TexCubeArrayFloatFloat:
225 case NVPTXISD::TexCubeArrayFloatFloatLevel:
226 case NVPTXISD::TexCubeArrayS32Float:
227 case NVPTXISD::TexCubeArrayS32FloatLevel:
228 case NVPTXISD::TexCubeArrayU32Float:
229 case NVPTXISD::TexCubeArrayU32FloatLevel:
230 case NVPTXISD::Tld4R2DFloatFloat:
231 case NVPTXISD::Tld4G2DFloatFloat:
232 case NVPTXISD::Tld4B2DFloatFloat:
233 case NVPTXISD::Tld4A2DFloatFloat:
234 case NVPTXISD::Tld4R2DS64Float:
235 case NVPTXISD::Tld4G2DS64Float:
236 case NVPTXISD::Tld4B2DS64Float:
237 case NVPTXISD::Tld4A2DS64Float:
238 case NVPTXISD::Tld4R2DU64Float:
239 case NVPTXISD::Tld4G2DU64Float:
240 case NVPTXISD::Tld4B2DU64Float:
241 case NVPTXISD::Tld4A2DU64Float:
242 case NVPTXISD::TexUnified1DFloatS32:
243 case NVPTXISD::TexUnified1DFloatFloat:
244 case NVPTXISD::TexUnified1DFloatFloatLevel:
245 case NVPTXISD::TexUnified1DFloatFloatGrad:
246 case NVPTXISD::TexUnified1DS32S32:
247 case NVPTXISD::TexUnified1DS32Float:
248 case NVPTXISD::TexUnified1DS32FloatLevel:
249 case NVPTXISD::TexUnified1DS32FloatGrad:
250 case NVPTXISD::TexUnified1DU32S32:
251 case NVPTXISD::TexUnified1DU32Float:
252 case NVPTXISD::TexUnified1DU32FloatLevel:
253 case NVPTXISD::TexUnified1DU32FloatGrad:
254 case NVPTXISD::TexUnified1DArrayFloatS32:
255 case NVPTXISD::TexUnified1DArrayFloatFloat:
256 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
257 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
258 case NVPTXISD::TexUnified1DArrayS32S32:
259 case NVPTXISD::TexUnified1DArrayS32Float:
260 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
261 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
262 case NVPTXISD::TexUnified1DArrayU32S32:
263 case NVPTXISD::TexUnified1DArrayU32Float:
264 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
265 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
266 case NVPTXISD::TexUnified2DFloatS32:
267 case NVPTXISD::TexUnified2DFloatFloat:
268 case NVPTXISD::TexUnified2DFloatFloatLevel:
269 case NVPTXISD::TexUnified2DFloatFloatGrad:
270 case NVPTXISD::TexUnified2DS32S32:
271 case NVPTXISD::TexUnified2DS32Float:
272 case NVPTXISD::TexUnified2DS32FloatLevel:
273 case NVPTXISD::TexUnified2DS32FloatGrad:
274 case NVPTXISD::TexUnified2DU32S32:
275 case NVPTXISD::TexUnified2DU32Float:
276 case NVPTXISD::TexUnified2DU32FloatLevel:
277 case NVPTXISD::TexUnified2DU32FloatGrad:
278 case NVPTXISD::TexUnified2DArrayFloatS32:
279 case NVPTXISD::TexUnified2DArrayFloatFloat:
280 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
281 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
282 case NVPTXISD::TexUnified2DArrayS32S32:
283 case NVPTXISD::TexUnified2DArrayS32Float:
284 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
285 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
286 case NVPTXISD::TexUnified2DArrayU32S32:
287 case NVPTXISD::TexUnified2DArrayU32Float:
288 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
289 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
290 case NVPTXISD::TexUnified3DFloatS32:
291 case NVPTXISD::TexUnified3DFloatFloat:
292 case NVPTXISD::TexUnified3DFloatFloatLevel:
293 case NVPTXISD::TexUnified3DFloatFloatGrad:
294 case NVPTXISD::TexUnified3DS32S32:
295 case NVPTXISD::TexUnified3DS32Float:
296 case NVPTXISD::TexUnified3DS32FloatLevel:
297 case NVPTXISD::TexUnified3DS32FloatGrad:
298 case NVPTXISD::TexUnified3DU32S32:
299 case NVPTXISD::TexUnified3DU32Float:
300 case NVPTXISD::TexUnified3DU32FloatLevel:
301 case NVPTXISD::TexUnified3DU32FloatGrad:
302 case NVPTXISD::TexUnifiedCubeFloatFloat:
303 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
304 case NVPTXISD::TexUnifiedCubeS32Float:
305 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
306 case NVPTXISD::TexUnifiedCubeU32Float:
307 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
309 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayS32Float:
311 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayU32Float:
313 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
314 case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
315 case NVPTXISD::TexUnifiedCubeS32FloatGrad:
316 case NVPTXISD::TexUnifiedCubeU32FloatGrad:
317 case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
318 case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
319 case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
320 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
321 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
322 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
323 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
324 case NVPTXISD::Tld4UnifiedR2DS64Float:
325 case NVPTXISD::Tld4UnifiedG2DS64Float:
326 case NVPTXISD::Tld4UnifiedB2DS64Float:
327 case NVPTXISD::Tld4UnifiedA2DS64Float:
328 case NVPTXISD::Tld4UnifiedR2DU64Float:
329 case NVPTXISD::Tld4UnifiedG2DU64Float:
330 case NVPTXISD::Tld4UnifiedB2DU64Float:
331 case NVPTXISD::Tld4UnifiedA2DU64Float:
332 if (tryTextureIntrinsic(N))
333 return;
334 break;
335 case NVPTXISD::Suld1DI8Clamp:
336 case NVPTXISD::Suld1DI16Clamp:
337 case NVPTXISD::Suld1DI32Clamp:
338 case NVPTXISD::Suld1DI64Clamp:
339 case NVPTXISD::Suld1DV2I8Clamp:
340 case NVPTXISD::Suld1DV2I16Clamp:
341 case NVPTXISD::Suld1DV2I32Clamp:
342 case NVPTXISD::Suld1DV2I64Clamp:
343 case NVPTXISD::Suld1DV4I8Clamp:
344 case NVPTXISD::Suld1DV4I16Clamp:
345 case NVPTXISD::Suld1DV4I32Clamp:
346 case NVPTXISD::Suld1DArrayI8Clamp:
347 case NVPTXISD::Suld1DArrayI16Clamp:
348 case NVPTXISD::Suld1DArrayI32Clamp:
349 case NVPTXISD::Suld1DArrayI64Clamp:
350 case NVPTXISD::Suld1DArrayV2I8Clamp:
351 case NVPTXISD::Suld1DArrayV2I16Clamp:
352 case NVPTXISD::Suld1DArrayV2I32Clamp:
353 case NVPTXISD::Suld1DArrayV2I64Clamp:
354 case NVPTXISD::Suld1DArrayV4I8Clamp:
355 case NVPTXISD::Suld1DArrayV4I16Clamp:
356 case NVPTXISD::Suld1DArrayV4I32Clamp:
357 case NVPTXISD::Suld2DI8Clamp:
358 case NVPTXISD::Suld2DI16Clamp:
359 case NVPTXISD::Suld2DI32Clamp:
360 case NVPTXISD::Suld2DI64Clamp:
361 case NVPTXISD::Suld2DV2I8Clamp:
362 case NVPTXISD::Suld2DV2I16Clamp:
363 case NVPTXISD::Suld2DV2I32Clamp:
364 case NVPTXISD::Suld2DV2I64Clamp:
365 case NVPTXISD::Suld2DV4I8Clamp:
366 case NVPTXISD::Suld2DV4I16Clamp:
367 case NVPTXISD::Suld2DV4I32Clamp:
368 case NVPTXISD::Suld2DArrayI8Clamp:
369 case NVPTXISD::Suld2DArrayI16Clamp:
370 case NVPTXISD::Suld2DArrayI32Clamp:
371 case NVPTXISD::Suld2DArrayI64Clamp:
372 case NVPTXISD::Suld2DArrayV2I8Clamp:
373 case NVPTXISD::Suld2DArrayV2I16Clamp:
374 case NVPTXISD::Suld2DArrayV2I32Clamp:
375 case NVPTXISD::Suld2DArrayV2I64Clamp:
376 case NVPTXISD::Suld2DArrayV4I8Clamp:
377 case NVPTXISD::Suld2DArrayV4I16Clamp:
378 case NVPTXISD::Suld2DArrayV4I32Clamp:
379 case NVPTXISD::Suld3DI8Clamp:
380 case NVPTXISD::Suld3DI16Clamp:
381 case NVPTXISD::Suld3DI32Clamp:
382 case NVPTXISD::Suld3DI64Clamp:
383 case NVPTXISD::Suld3DV2I8Clamp:
384 case NVPTXISD::Suld3DV2I16Clamp:
385 case NVPTXISD::Suld3DV2I32Clamp:
386 case NVPTXISD::Suld3DV2I64Clamp:
387 case NVPTXISD::Suld3DV4I8Clamp:
388 case NVPTXISD::Suld3DV4I16Clamp:
389 case NVPTXISD::Suld3DV4I32Clamp:
390 case NVPTXISD::Suld1DI8Trap:
391 case NVPTXISD::Suld1DI16Trap:
392 case NVPTXISD::Suld1DI32Trap:
393 case NVPTXISD::Suld1DI64Trap:
394 case NVPTXISD::Suld1DV2I8Trap:
395 case NVPTXISD::Suld1DV2I16Trap:
396 case NVPTXISD::Suld1DV2I32Trap:
397 case NVPTXISD::Suld1DV2I64Trap:
398 case NVPTXISD::Suld1DV4I8Trap:
399 case NVPTXISD::Suld1DV4I16Trap:
400 case NVPTXISD::Suld1DV4I32Trap:
401 case NVPTXISD::Suld1DArrayI8Trap:
402 case NVPTXISD::Suld1DArrayI16Trap:
403 case NVPTXISD::Suld1DArrayI32Trap:
404 case NVPTXISD::Suld1DArrayI64Trap:
405 case NVPTXISD::Suld1DArrayV2I8Trap:
406 case NVPTXISD::Suld1DArrayV2I16Trap:
407 case NVPTXISD::Suld1DArrayV2I32Trap:
408 case NVPTXISD::Suld1DArrayV2I64Trap:
409 case NVPTXISD::Suld1DArrayV4I8Trap:
410 case NVPTXISD::Suld1DArrayV4I16Trap:
411 case NVPTXISD::Suld1DArrayV4I32Trap:
412 case NVPTXISD::Suld2DI8Trap:
413 case NVPTXISD::Suld2DI16Trap:
414 case NVPTXISD::Suld2DI32Trap:
415 case NVPTXISD::Suld2DI64Trap:
416 case NVPTXISD::Suld2DV2I8Trap:
417 case NVPTXISD::Suld2DV2I16Trap:
418 case NVPTXISD::Suld2DV2I32Trap:
419 case NVPTXISD::Suld2DV2I64Trap:
420 case NVPTXISD::Suld2DV4I8Trap:
421 case NVPTXISD::Suld2DV4I16Trap:
422 case NVPTXISD::Suld2DV4I32Trap:
423 case NVPTXISD::Suld2DArrayI8Trap:
424 case NVPTXISD::Suld2DArrayI16Trap:
425 case NVPTXISD::Suld2DArrayI32Trap:
426 case NVPTXISD::Suld2DArrayI64Trap:
427 case NVPTXISD::Suld2DArrayV2I8Trap:
428 case NVPTXISD::Suld2DArrayV2I16Trap:
429 case NVPTXISD::Suld2DArrayV2I32Trap:
430 case NVPTXISD::Suld2DArrayV2I64Trap:
431 case NVPTXISD::Suld2DArrayV4I8Trap:
432 case NVPTXISD::Suld2DArrayV4I16Trap:
433 case NVPTXISD::Suld2DArrayV4I32Trap:
434 case NVPTXISD::Suld3DI8Trap:
435 case NVPTXISD::Suld3DI16Trap:
436 case NVPTXISD::Suld3DI32Trap:
437 case NVPTXISD::Suld3DI64Trap:
438 case NVPTXISD::Suld3DV2I8Trap:
439 case NVPTXISD::Suld3DV2I16Trap:
440 case NVPTXISD::Suld3DV2I32Trap:
441 case NVPTXISD::Suld3DV2I64Trap:
442 case NVPTXISD::Suld3DV4I8Trap:
443 case NVPTXISD::Suld3DV4I16Trap:
444 case NVPTXISD::Suld3DV4I32Trap:
445 case NVPTXISD::Suld1DI8Zero:
446 case NVPTXISD::Suld1DI16Zero:
447 case NVPTXISD::Suld1DI32Zero:
448 case NVPTXISD::Suld1DI64Zero:
449 case NVPTXISD::Suld1DV2I8Zero:
450 case NVPTXISD::Suld1DV2I16Zero:
451 case NVPTXISD::Suld1DV2I32Zero:
452 case NVPTXISD::Suld1DV2I64Zero:
453 case NVPTXISD::Suld1DV4I8Zero:
454 case NVPTXISD::Suld1DV4I16Zero:
455 case NVPTXISD::Suld1DV4I32Zero:
456 case NVPTXISD::Suld1DArrayI8Zero:
457 case NVPTXISD::Suld1DArrayI16Zero:
458 case NVPTXISD::Suld1DArrayI32Zero:
459 case NVPTXISD::Suld1DArrayI64Zero:
460 case NVPTXISD::Suld1DArrayV2I8Zero:
461 case NVPTXISD::Suld1DArrayV2I16Zero:
462 case NVPTXISD::Suld1DArrayV2I32Zero:
463 case NVPTXISD::Suld1DArrayV2I64Zero:
464 case NVPTXISD::Suld1DArrayV4I8Zero:
465 case NVPTXISD::Suld1DArrayV4I16Zero:
466 case NVPTXISD::Suld1DArrayV4I32Zero:
467 case NVPTXISD::Suld2DI8Zero:
468 case NVPTXISD::Suld2DI16Zero:
469 case NVPTXISD::Suld2DI32Zero:
470 case NVPTXISD::Suld2DI64Zero:
471 case NVPTXISD::Suld2DV2I8Zero:
472 case NVPTXISD::Suld2DV2I16Zero:
473 case NVPTXISD::Suld2DV2I32Zero:
474 case NVPTXISD::Suld2DV2I64Zero:
475 case NVPTXISD::Suld2DV4I8Zero:
476 case NVPTXISD::Suld2DV4I16Zero:
477 case NVPTXISD::Suld2DV4I32Zero:
478 case NVPTXISD::Suld2DArrayI8Zero:
479 case NVPTXISD::Suld2DArrayI16Zero:
480 case NVPTXISD::Suld2DArrayI32Zero:
481 case NVPTXISD::Suld2DArrayI64Zero:
482 case NVPTXISD::Suld2DArrayV2I8Zero:
483 case NVPTXISD::Suld2DArrayV2I16Zero:
484 case NVPTXISD::Suld2DArrayV2I32Zero:
485 case NVPTXISD::Suld2DArrayV2I64Zero:
486 case NVPTXISD::Suld2DArrayV4I8Zero:
487 case NVPTXISD::Suld2DArrayV4I16Zero:
488 case NVPTXISD::Suld2DArrayV4I32Zero:
489 case NVPTXISD::Suld3DI8Zero:
490 case NVPTXISD::Suld3DI16Zero:
491 case NVPTXISD::Suld3DI32Zero:
492 case NVPTXISD::Suld3DI64Zero:
493 case NVPTXISD::Suld3DV2I8Zero:
494 case NVPTXISD::Suld3DV2I16Zero:
495 case NVPTXISD::Suld3DV2I32Zero:
496 case NVPTXISD::Suld3DV2I64Zero:
497 case NVPTXISD::Suld3DV4I8Zero:
498 case NVPTXISD::Suld3DV4I16Zero:
499 case NVPTXISD::Suld3DV4I32Zero:
500 if (trySurfaceIntrinsic(N))
501 return;
502 break;
503 case ISD::AND:
504 case ISD::SRA:
505 case ISD::SRL:
506 // Try to select BFE
507 if (tryBFE(N))
508 return;
509 break;
510 case ISD::ADDRSPACECAST:
511 SelectAddrSpaceCast(N);
512 return;
513 case ISD::ConstantFP:
514 if (tryConstantFP(N))
515 return;
516 break;
517 default:
518 break;
519 }
520 SelectCode(N);
521}
522
523bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
524 unsigned IID = N->getConstantOperandVal(Num: 1);
525 switch (IID) {
526 default:
527 return false;
528 case Intrinsic::nvvm_ldg_global_f:
529 case Intrinsic::nvvm_ldg_global_i:
530 case Intrinsic::nvvm_ldg_global_p:
531 case Intrinsic::nvvm_ldu_global_f:
532 case Intrinsic::nvvm_ldu_global_i:
533 case Intrinsic::nvvm_ldu_global_p:
534 return tryLDGLDU(N);
535 }
536}
537
538// There's no way to specify FP16 and BF16 immediates in .(b)f16 ops, so we
539// have to load them into an .(b)f16 register first.
540bool NVPTXDAGToDAGISel::tryConstantFP(SDNode *N) {
541 if (N->getValueType(ResNo: 0) != MVT::f16 && N->getValueType(ResNo: 0) != MVT::bf16)
542 return false;
543 SDValue Val = CurDAG->getTargetConstantFP(
544 Val: cast<ConstantFPSDNode>(Val: N)->getValueAPF(), DL: SDLoc(N), VT: N->getValueType(ResNo: 0));
545 SDNode *LoadConstF16 = CurDAG->getMachineNode(
546 (N->getValueType(ResNo: 0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
547 : NVPTX::LOAD_CONST_BF16),
548 SDLoc(N), N->getValueType(ResNo: 0), Val);
549 ReplaceNode(F: N, T: LoadConstF16);
550 return true;
551}
552
553// Map ISD:CONDCODE value to appropriate CmpMode expected by
554// NVPTXInstPrinter::printCmpMode()
555static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
556 using NVPTX::PTXCmpMode::CmpMode;
557 unsigned PTXCmpMode = [](ISD::CondCode CC) {
558 switch (CC) {
559 default:
560 llvm_unreachable("Unexpected condition code.");
561 case ISD::SETOEQ:
562 return CmpMode::EQ;
563 case ISD::SETOGT:
564 return CmpMode::GT;
565 case ISD::SETOGE:
566 return CmpMode::GE;
567 case ISD::SETOLT:
568 return CmpMode::LT;
569 case ISD::SETOLE:
570 return CmpMode::LE;
571 case ISD::SETONE:
572 return CmpMode::NE;
573 case ISD::SETO:
574 return CmpMode::NUM;
575 case ISD::SETUO:
576 return CmpMode::NotANumber;
577 case ISD::SETUEQ:
578 return CmpMode::EQU;
579 case ISD::SETUGT:
580 return CmpMode::GTU;
581 case ISD::SETUGE:
582 return CmpMode::GEU;
583 case ISD::SETULT:
584 return CmpMode::LTU;
585 case ISD::SETULE:
586 return CmpMode::LEU;
587 case ISD::SETUNE:
588 return CmpMode::NEU;
589 case ISD::SETEQ:
590 return CmpMode::EQ;
591 case ISD::SETGT:
592 return CmpMode::GT;
593 case ISD::SETGE:
594 return CmpMode::GE;
595 case ISD::SETLT:
596 return CmpMode::LT;
597 case ISD::SETLE:
598 return CmpMode::LE;
599 case ISD::SETNE:
600 return CmpMode::NE;
601 }
602 }(CondCode.get());
603
604 if (FTZ)
605 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
606
607 return PTXCmpMode;
608}
609
610bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
611 unsigned PTXCmpMode =
612 getPTXCmpMode(CondCode: *cast<CondCodeSDNode>(Val: N->getOperand(Num: 2)), FTZ: useF32FTZ());
613 SDLoc DL(N);
614 SDNode *SetP = CurDAG->getMachineNode(
615 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(Num: 0),
616 N->getOperand(Num: 1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
617 ReplaceNode(F: N, T: SetP);
618 return true;
619}
620
621bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
622 unsigned PTXCmpMode =
623 getPTXCmpMode(CondCode: *cast<CondCodeSDNode>(Val: N->getOperand(Num: 2)), FTZ: useF32FTZ());
624 SDLoc DL(N);
625 SDNode *SetP = CurDAG->getMachineNode(
626 NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(Num: 0),
627 N->getOperand(Num: 1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
628 ReplaceNode(F: N, T: SetP);
629 return true;
630}
631
632// Find all instances of extract_vector_elt that use this v2f16 vector
633// and coalesce them into a scattering move instruction.
634bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
635 SDValue Vector = N->getOperand(Num: 0);
636
637 // We only care about 16x2 as it's the only real vector type we
638 // need to deal with.
639 MVT VT = Vector.getSimpleValueType();
640 if (!Isv2x16VT(VT))
641 return false;
642 // Find and record all uses of this vector that extract element 0 or 1.
643 SmallVector<SDNode *, 4> E0, E1;
644 for (auto *U : Vector.getNode()->uses()) {
645 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
646 continue;
647 if (U->getOperand(Num: 0) != Vector)
648 continue;
649 if (const ConstantSDNode *IdxConst =
650 dyn_cast<ConstantSDNode>(Val: U->getOperand(Num: 1))) {
651 if (IdxConst->getZExtValue() == 0)
652 E0.push_back(Elt: U);
653 else if (IdxConst->getZExtValue() == 1)
654 E1.push_back(Elt: U);
655 else
656 llvm_unreachable("Invalid vector index.");
657 }
658 }
659
660 // There's no point scattering f16x2 if we only ever access one
661 // element of it.
662 if (E0.empty() || E1.empty())
663 return false;
664
665 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
666 // into f16,f16 SplitF16x2(V)
667 MVT EltVT = VT.getVectorElementType();
668 SDNode *ScatterOp =
669 CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
670 for (auto *Node : E0)
671 ReplaceUses(F: SDValue(Node, 0), T: SDValue(ScatterOp, 0));
672 for (auto *Node : E1)
673 ReplaceUses(F: SDValue(Node, 0), T: SDValue(ScatterOp, 1));
674
675 return true;
676}
677
678static unsigned int getCodeAddrSpace(MemSDNode *N) {
679 const Value *Src = N->getMemOperand()->getValue();
680
681 if (!Src)
682 return NVPTX::PTXLdStInstCode::GENERIC;
683
684 if (auto *PT = dyn_cast<PointerType>(Val: Src->getType())) {
685 switch (PT->getAddressSpace()) {
686 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
687 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
688 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
689 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
690 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
691 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
692 default: break;
693 }
694 }
695 return NVPTX::PTXLdStInstCode::GENERIC;
696}
697
698static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
699 unsigned CodeAddrSpace, MachineFunction *F) {
700 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
701 // space.
702 //
703 // We have two ways of identifying invariant loads: Loads may be explicitly
704 // marked as invariant, or we may infer them to be invariant.
705 //
706 // We currently infer invariance for loads from
707 // - constant global variables, and
708 // - kernel function pointer params that are noalias (i.e. __restrict) and
709 // never written to.
710 //
711 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
712 // not during the SelectionDAG phase).
713 //
714 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
715 // explicitly invariant loads because these are how clang tells us to use ldg
716 // when the user uses a builtin.
717 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
718 return false;
719
720 if (N->isInvariant())
721 return true;
722
723 bool IsKernelFn = isKernelFunction(F->getFunction());
724
725 // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
726 // because the former looks through phi nodes while the latter does not. We
727 // need to look through phi nodes to handle pointer induction variables.
728 SmallVector<const Value *, 8> Objs;
729 getUnderlyingObjects(V: N->getMemOperand()->getValue(), Objects&: Objs);
730
731 return all_of(Range&: Objs, P: [&](const Value *V) {
732 if (auto *A = dyn_cast<const Argument>(Val: V))
733 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
734 if (auto *GV = dyn_cast<const GlobalVariable>(Val: V))
735 return GV->isConstant();
736 return false;
737 });
738}
739
740bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
741 unsigned IID = N->getConstantOperandVal(Num: 0);
742 switch (IID) {
743 default:
744 return false;
745 case Intrinsic::nvvm_texsurf_handle_internal:
746 SelectTexSurfHandle(N);
747 return true;
748 }
749}
750
751void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
752 // Op 0 is the intrinsic ID
753 SDValue Wrapper = N->getOperand(Num: 1);
754 SDValue GlobalVal = Wrapper.getOperand(i: 0);
755 ReplaceNode(F: N, T: CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
756 MVT::i64, GlobalVal));
757}
758
759void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
760 SDValue Src = N->getOperand(Num: 0);
761 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(Val: N);
762 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
763 unsigned DstAddrSpace = CastN->getDestAddressSpace();
764 assert(SrcAddrSpace != DstAddrSpace &&
765 "addrspacecast must be between different address spaces");
766
767 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
768 // Specific to generic
769 unsigned Opc;
770 switch (SrcAddrSpace) {
771 default: report_fatal_error(reason: "Bad address space in addrspacecast");
772 case ADDRESS_SPACE_GLOBAL:
773 Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
774 break;
775 case ADDRESS_SPACE_SHARED:
776 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
777 ? NVPTX::cvta_shared_6432
778 : NVPTX::cvta_shared_64)
779 : NVPTX::cvta_shared;
780 break;
781 case ADDRESS_SPACE_CONST:
782 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
783 ? NVPTX::cvta_const_6432
784 : NVPTX::cvta_const_64)
785 : NVPTX::cvta_const;
786 break;
787 case ADDRESS_SPACE_LOCAL:
788 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
789 ? NVPTX::cvta_local_6432
790 : NVPTX::cvta_local_64)
791 : NVPTX::cvta_local;
792 break;
793 }
794 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
795 Op1: Src));
796 return;
797 } else {
798 // Generic to specific
799 if (SrcAddrSpace != 0)
800 report_fatal_error(reason: "Cannot cast between two non-generic address spaces");
801 unsigned Opc;
802 switch (DstAddrSpace) {
803 default: report_fatal_error(reason: "Bad address space in addrspacecast");
804 case ADDRESS_SPACE_GLOBAL:
805 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
806 break;
807 case ADDRESS_SPACE_SHARED:
808 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
809 ? NVPTX::cvta_to_shared_3264
810 : NVPTX::cvta_to_shared_64)
811 : NVPTX::cvta_to_shared;
812 break;
813 case ADDRESS_SPACE_CONST:
814 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
815 ? NVPTX::cvta_to_const_3264
816 : NVPTX::cvta_to_const_64)
817 : NVPTX::cvta_to_const;
818 break;
819 case ADDRESS_SPACE_LOCAL:
820 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
821 ? NVPTX::cvta_to_local_3264
822 : NVPTX::cvta_to_local_64)
823 : NVPTX::cvta_to_local;
824 break;
825 case ADDRESS_SPACE_PARAM:
826 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
827 : NVPTX::nvvm_ptr_gen_to_param;
828 break;
829 }
830 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
831 Op1: Src));
832 return;
833 }
834}
835
836// Helper function template to reduce amount of boilerplate code for
837// opcode selection.
838static std::optional<unsigned>
839pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
840 unsigned Opcode_i16, unsigned Opcode_i32,
841 std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
842 std::optional<unsigned> Opcode_f64) {
843 switch (VT) {
844 case MVT::i1:
845 case MVT::i8:
846 return Opcode_i8;
847 case MVT::i16:
848 return Opcode_i16;
849 case MVT::i32:
850 return Opcode_i32;
851 case MVT::i64:
852 return Opcode_i64;
853 case MVT::f16:
854 case MVT::bf16:
855 return Opcode_i16;
856 case MVT::v2f16:
857 case MVT::v2bf16:
858 case MVT::v2i16:
859 case MVT::v4i8:
860 return Opcode_i32;
861 case MVT::f32:
862 return Opcode_f32;
863 case MVT::f64:
864 return Opcode_f64;
865 default:
866 return std::nullopt;
867 }
868}
869
870static int getLdStRegType(EVT VT) {
871 if (VT.isFloatingPoint())
872 switch (VT.getSimpleVT().SimpleTy) {
873 case MVT::f16:
874 case MVT::bf16:
875 case MVT::v2f16:
876 case MVT::v2bf16:
877 return NVPTX::PTXLdStInstCode::Untyped;
878 default:
879 return NVPTX::PTXLdStInstCode::Float;
880 }
881 else
882 return NVPTX::PTXLdStInstCode::Unsigned;
883}
884
885bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
886 SDLoc dl(N);
887 MemSDNode *LD = cast<MemSDNode>(Val: N);
888 assert(LD->readMem() && "Expected load");
889 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(Val: N);
890 EVT LoadedVT = LD->getMemoryVT();
891 SDNode *NVPTXLD = nullptr;
892
893 // do not support pre/post inc/dec
894 if (PlainLoad && PlainLoad->isIndexed())
895 return false;
896
897 if (!LoadedVT.isSimple())
898 return false;
899
900 AtomicOrdering Ordering = LD->getSuccessOrdering();
901 // In order to lower atomic loads with stronger guarantees we would need to
902 // use load.acquire or insert fences. However these features were only added
903 // with PTX ISA 6.0 / sm_70.
904 // TODO: Check if we can actually use the new instructions and implement them.
905 if (isStrongerThanMonotonic(AO: Ordering))
906 return false;
907
908 // Address Space Setting
909 unsigned int CodeAddrSpace = getCodeAddrSpace(N: LD);
910 if (canLowerToLDG(N: LD, Subtarget: *Subtarget, CodeAddrSpace, F: MF)) {
911 return tryLDGLDU(N);
912 }
913
914 unsigned int PointerSize =
915 CurDAG->getDataLayout().getPointerSizeInBits(AS: LD->getAddressSpace());
916
917 // Volatile Setting
918 // - .volatile is only available for .global and .shared
919 // - .volatile has the same memory synchronization semantics as .relaxed.sys
920 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
921 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924 isVolatile = false;
925
926 // Type Setting: fromType + fromTypeWidth
927 //
928 // Sign : ISD::SEXTLOAD
929 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
930 // type is integer
931 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
932 MVT SimpleVT = LoadedVT.getSimpleVT();
933 MVT ScalarVT = SimpleVT.getScalarType();
934 // Read at least 8 bits (predicates are stored as 8-bit values)
935 unsigned fromTypeWidth = std::max(a: 8U, b: (unsigned)ScalarVT.getSizeInBits());
936 unsigned int fromType;
937
938 // Vector Setting
939 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
940 if (SimpleVT.isVector()) {
941 assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
942 "Unexpected vector type");
943 // v2f16/v2bf16/v2i16 is loaded using ld.b32
944 fromTypeWidth = 32;
945 }
946
947 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
948 fromType = NVPTX::PTXLdStInstCode::Signed;
949 else
950 fromType = getLdStRegType(VT: ScalarVT);
951
952 // Create the machine instruction DAG
953 SDValue Chain = N->getOperand(Num: 0);
954 SDValue N1 = N->getOperand(Num: 1);
955 SDValue Addr;
956 SDValue Offset, Base;
957 std::optional<unsigned> Opcode;
958 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(ResNo: 0).SimpleTy;
959
960 if (SelectDirectAddr(N: N1, Address&: Addr)) {
961 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
962 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
963 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
964 if (!Opcode)
965 return false;
966 SDValue Ops[] = { getI32Imm(Imm: isVolatile, DL: dl), getI32Imm(Imm: CodeAddrSpace, DL: dl),
967 getI32Imm(Imm: vecType, DL: dl), getI32Imm(Imm: fromType, DL: dl),
968 getI32Imm(Imm: fromTypeWidth, DL: dl), Addr, Chain };
969 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
970 } else if (PointerSize == 64 ? SelectADDRsi64(OpNode: N1.getNode(), Addr: N1, Base, Offset)
971 : SelectADDRsi(OpNode: N1.getNode(), Addr: N1, Base, Offset)) {
972 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
973 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
974 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
975 if (!Opcode)
976 return false;
977 SDValue Ops[] = { getI32Imm(Imm: isVolatile, DL: dl), getI32Imm(Imm: CodeAddrSpace, DL: dl),
978 getI32Imm(Imm: vecType, DL: dl), getI32Imm(Imm: fromType, DL: dl),
979 getI32Imm(Imm: fromTypeWidth, DL: dl), Base, Offset, Chain };
980 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
981 } else if (PointerSize == 64 ? SelectADDRri64(OpNode: N1.getNode(), Addr: N1, Base, Offset)
982 : SelectADDRri(OpNode: N1.getNode(), Addr: N1, Base, Offset)) {
983 if (PointerSize == 64)
984 Opcode =
985 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
986 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
987 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
988 else
989 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
990 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
991 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
992 if (!Opcode)
993 return false;
994 SDValue Ops[] = { getI32Imm(Imm: isVolatile, DL: dl), getI32Imm(Imm: CodeAddrSpace, DL: dl),
995 getI32Imm(Imm: vecType, DL: dl), getI32Imm(Imm: fromType, DL: dl),
996 getI32Imm(Imm: fromTypeWidth, DL: dl), Base, Offset, Chain };
997 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
998 } else {
999 if (PointerSize == 64)
1000 Opcode =
1001 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1002 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
1003 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
1004 else
1005 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
1006 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1007 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1008 if (!Opcode)
1009 return false;
1010 SDValue Ops[] = { getI32Imm(Imm: isVolatile, DL: dl), getI32Imm(Imm: CodeAddrSpace, DL: dl),
1011 getI32Imm(Imm: vecType, DL: dl), getI32Imm(Imm: fromType, DL: dl),
1012 getI32Imm(Imm: fromTypeWidth, DL: dl), N1, Chain };
1013 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
1014 }
1015
1016 if (!NVPTXLD)
1017 return false;
1018
1019 MachineMemOperand *MemRef = cast<MemSDNode>(Val: N)->getMemOperand();
1020 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: NVPTXLD), NewMemRefs: {MemRef});
1021
1022 ReplaceNode(F: N, T: NVPTXLD);
1023 return true;
1024}
1025
1026bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1027
1028 SDValue Chain = N->getOperand(Num: 0);
1029 SDValue Op1 = N->getOperand(Num: 1);
1030 SDValue Addr, Offset, Base;
1031 std::optional<unsigned> Opcode;
1032 SDLoc DL(N);
1033 SDNode *LD;
1034 MemSDNode *MemSD = cast<MemSDNode>(Val: N);
1035 EVT LoadedVT = MemSD->getMemoryVT();
1036
1037 if (!LoadedVT.isSimple())
1038 return false;
1039
1040 // Address Space Setting
1041 unsigned int CodeAddrSpace = getCodeAddrSpace(N: MemSD);
1042 if (canLowerToLDG(N: MemSD, Subtarget: *Subtarget, CodeAddrSpace, F: MF)) {
1043 return tryLDGLDU(N);
1044 }
1045
1046 unsigned int PointerSize =
1047 CurDAG->getDataLayout().getPointerSizeInBits(AS: MemSD->getAddressSpace());
1048
1049 // Volatile Setting
1050 // - .volatile is only availalble for .global and .shared
1051 bool IsVolatile = MemSD->isVolatile();
1052 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1053 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1054 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1055 IsVolatile = false;
1056
1057 // Vector Setting
1058 MVT SimpleVT = LoadedVT.getSimpleVT();
1059
1060 // Type Setting: fromType + fromTypeWidth
1061 //
1062 // Sign : ISD::SEXTLOAD
1063 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1064 // type is integer
1065 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1066 MVT ScalarVT = SimpleVT.getScalarType();
1067 // Read at least 8 bits (predicates are stored as 8-bit values)
1068 unsigned FromTypeWidth = std::max(a: 8U, b: (unsigned)ScalarVT.getSizeInBits());
1069 unsigned int FromType;
1070 // The last operand holds the original LoadSDNode::getExtensionType() value
1071 unsigned ExtensionType = cast<ConstantSDNode>(
1072 Val: N->getOperand(Num: N->getNumOperands() - 1))->getZExtValue();
1073 if (ExtensionType == ISD::SEXTLOAD)
1074 FromType = NVPTX::PTXLdStInstCode::Signed;
1075 else
1076 FromType = getLdStRegType(VT: ScalarVT);
1077
1078 unsigned VecType;
1079
1080 switch (N->getOpcode()) {
1081 case NVPTXISD::LoadV2:
1082 VecType = NVPTX::PTXLdStInstCode::V2;
1083 break;
1084 case NVPTXISD::LoadV4:
1085 VecType = NVPTX::PTXLdStInstCode::V4;
1086 break;
1087 default:
1088 return false;
1089 }
1090
1091 EVT EltVT = N->getValueType(ResNo: 0);
1092
1093 // v8x16 is a special case. PTX doesn't have ld.v8.16
1094 // instruction. Instead, we split the vector into v2x16 chunks and
1095 // load them with ld.v4.b32.
1096 if (Isv2x16VT(VT: EltVT)) {
1097 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1098 EltVT = MVT::i32;
1099 FromType = NVPTX::PTXLdStInstCode::Untyped;
1100 FromTypeWidth = 32;
1101 }
1102
1103 if (SelectDirectAddr(N: Op1, Address&: Addr)) {
1104 switch (N->getOpcode()) {
1105 default:
1106 return false;
1107 case NVPTXISD::LoadV2:
1108 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1109 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1110 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1111 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1112 break;
1113 case NVPTXISD::LoadV4:
1114 Opcode =
1115 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1116 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1117 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1118 break;
1119 }
1120 if (!Opcode)
1121 return false;
1122 SDValue Ops[] = { getI32Imm(Imm: IsVolatile, DL), getI32Imm(Imm: CodeAddrSpace, DL),
1123 getI32Imm(Imm: VecType, DL), getI32Imm(Imm: FromType, DL),
1124 getI32Imm(Imm: FromTypeWidth, DL), Addr, Chain };
1125 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: N->getVTList(), Ops);
1126 } else if (PointerSize == 64
1127 ? SelectADDRsi64(OpNode: Op1.getNode(), Addr: Op1, Base, Offset)
1128 : SelectADDRsi(OpNode: Op1.getNode(), Addr: Op1, Base, Offset)) {
1129 switch (N->getOpcode()) {
1130 default:
1131 return false;
1132 case NVPTXISD::LoadV2:
1133 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1134 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1135 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1136 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1137 break;
1138 case NVPTXISD::LoadV4:
1139 Opcode =
1140 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1141 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1142 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1143 break;
1144 }
1145 if (!Opcode)
1146 return false;
1147 SDValue Ops[] = { getI32Imm(Imm: IsVolatile, DL), getI32Imm(Imm: CodeAddrSpace, DL),
1148 getI32Imm(Imm: VecType, DL), getI32Imm(Imm: FromType, DL),
1149 getI32Imm(Imm: FromTypeWidth, DL), Base, Offset, Chain };
1150 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: N->getVTList(), Ops);
1151 } else if (PointerSize == 64
1152 ? SelectADDRri64(OpNode: Op1.getNode(), Addr: Op1, Base, Offset)
1153 : SelectADDRri(OpNode: Op1.getNode(), Addr: Op1, Base, Offset)) {
1154 if (PointerSize == 64) {
1155 switch (N->getOpcode()) {
1156 default:
1157 return false;
1158 case NVPTXISD::LoadV2:
1159 Opcode =
1160 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1161 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1162 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1163 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1164 break;
1165 case NVPTXISD::LoadV4:
1166 Opcode = pickOpcodeForVT(
1167 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1168 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1169 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1170 break;
1171 }
1172 } else {
1173 switch (N->getOpcode()) {
1174 default:
1175 return false;
1176 case NVPTXISD::LoadV2:
1177 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1178 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1179 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1180 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1181 break;
1182 case NVPTXISD::LoadV4:
1183 Opcode =
1184 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1185 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1186 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1187 break;
1188 }
1189 }
1190 if (!Opcode)
1191 return false;
1192 SDValue Ops[] = { getI32Imm(Imm: IsVolatile, DL), getI32Imm(Imm: CodeAddrSpace, DL),
1193 getI32Imm(Imm: VecType, DL), getI32Imm(Imm: FromType, DL),
1194 getI32Imm(Imm: FromTypeWidth, DL), Base, Offset, Chain };
1195
1196 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: N->getVTList(), Ops);
1197 } else {
1198 if (PointerSize == 64) {
1199 switch (N->getOpcode()) {
1200 default:
1201 return false;
1202 case NVPTXISD::LoadV2:
1203 Opcode = pickOpcodeForVT(
1204 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1205 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1206 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1207 NVPTX::LDV_f64_v2_areg_64);
1208 break;
1209 case NVPTXISD::LoadV4:
1210 Opcode = pickOpcodeForVT(
1211 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1212 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1213 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1214 break;
1215 }
1216 } else {
1217 switch (N->getOpcode()) {
1218 default:
1219 return false;
1220 case NVPTXISD::LoadV2:
1221 Opcode =
1222 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1223 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1224 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1225 NVPTX::LDV_f64_v2_areg);
1226 break;
1227 case NVPTXISD::LoadV4:
1228 Opcode =
1229 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1230 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1231 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1232 break;
1233 }
1234 }
1235 if (!Opcode)
1236 return false;
1237 SDValue Ops[] = { getI32Imm(Imm: IsVolatile, DL), getI32Imm(Imm: CodeAddrSpace, DL),
1238 getI32Imm(Imm: VecType, DL), getI32Imm(Imm: FromType, DL),
1239 getI32Imm(Imm: FromTypeWidth, DL), Op1, Chain };
1240 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: N->getVTList(), Ops);
1241 }
1242
1243 MachineMemOperand *MemRef = cast<MemSDNode>(Val: N)->getMemOperand();
1244 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: LD), NewMemRefs: {MemRef});
1245
1246 ReplaceNode(F: N, T: LD);
1247 return true;
1248}
1249
1250bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1251
1252 SDValue Chain = N->getOperand(Num: 0);
1253 SDValue Op1;
1254 MemSDNode *Mem;
1255 bool IsLDG = true;
1256
1257 // If this is an LDG intrinsic, the address is the third operand. If its an
1258 // LDG/LDU SD node (from custom vector handling), then its the second operand
1259 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1260 Op1 = N->getOperand(Num: 2);
1261 Mem = cast<MemIntrinsicSDNode>(Val: N);
1262 unsigned IID = N->getConstantOperandVal(Num: 1);
1263 switch (IID) {
1264 default:
1265 return false;
1266 case Intrinsic::nvvm_ldg_global_f:
1267 case Intrinsic::nvvm_ldg_global_i:
1268 case Intrinsic::nvvm_ldg_global_p:
1269 IsLDG = true;
1270 break;
1271 case Intrinsic::nvvm_ldu_global_f:
1272 case Intrinsic::nvvm_ldu_global_i:
1273 case Intrinsic::nvvm_ldu_global_p:
1274 IsLDG = false;
1275 break;
1276 }
1277 } else {
1278 Op1 = N->getOperand(Num: 1);
1279 Mem = cast<MemSDNode>(Val: N);
1280 }
1281
1282 std::optional<unsigned> Opcode;
1283 SDLoc DL(N);
1284 SDNode *LD;
1285 SDValue Base, Offset, Addr;
1286 EVT OrigType = N->getValueType(ResNo: 0);
1287
1288 EVT EltVT = Mem->getMemoryVT();
1289 unsigned NumElts = 1;
1290 if (EltVT.isVector()) {
1291 NumElts = EltVT.getVectorNumElements();
1292 EltVT = EltVT.getVectorElementType();
1293 // vectors of 16bits type are loaded/stored as multiples of v2x16 elements.
1294 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1295 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1296 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1297 assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1298 EltVT = OrigType;
1299 NumElts /= 2;
1300 } else if (OrigType == MVT::v4i8) {
1301 EltVT = OrigType;
1302 NumElts = 1;
1303 }
1304 }
1305
1306 // Build the "promoted" result VTList for the load. If we are really loading
1307 // i8s, then the return type will be promoted to i16 since we do not expose
1308 // 8-bit registers in NVPTX.
1309 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1310 SmallVector<EVT, 5> InstVTs;
1311 for (unsigned i = 0; i != NumElts; ++i) {
1312 InstVTs.push_back(Elt: NodeVT);
1313 }
1314 InstVTs.push_back(MVT::Other);
1315 SDVTList InstVTList = CurDAG->getVTList(VTs: InstVTs);
1316
1317 if (SelectDirectAddr(N: Op1, Address&: Addr)) {
1318 switch (N->getOpcode()) {
1319 default:
1320 return false;
1321 case ISD::LOAD:
1322 case ISD::INTRINSIC_W_CHAIN:
1323 if (IsLDG)
1324 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1325 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1326 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1327 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1328 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1329 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1330 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1331 else
1332 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1333 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1334 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1335 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1336 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1337 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1338 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1339 break;
1340 case NVPTXISD::LoadV2:
1341 case NVPTXISD::LDGV2:
1342 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1343 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1344 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1345 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1346 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1347 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1348 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1349 break;
1350 case NVPTXISD::LDUV2:
1351 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1352 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1353 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1354 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1355 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1356 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1357 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1358 break;
1359 case NVPTXISD::LoadV4:
1360 case NVPTXISD::LDGV4:
1361 Opcode = pickOpcodeForVT(
1362 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1363 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1364 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1365 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1366 break;
1367 case NVPTXISD::LDUV4:
1368 Opcode = pickOpcodeForVT(
1369 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1370 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1371 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1372 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1373 break;
1374 }
1375 if (!Opcode)
1376 return false;
1377 SDValue Ops[] = { Addr, Chain };
1378 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: InstVTList, Ops);
1379 } else if (TM.is64Bit() ? SelectADDRri64(OpNode: Op1.getNode(), Addr: Op1, Base, Offset)
1380 : SelectADDRri(OpNode: Op1.getNode(), Addr: Op1, Base, Offset)) {
1381 if (TM.is64Bit()) {
1382 switch (N->getOpcode()) {
1383 default:
1384 return false;
1385 case ISD::LOAD:
1386 case ISD::INTRINSIC_W_CHAIN:
1387 if (IsLDG)
1388 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1389 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1390 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1391 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1392 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1393 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1394 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1395 else
1396 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1397 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1398 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1399 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1400 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1401 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1402 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1403 break;
1404 case NVPTXISD::LoadV2:
1405 case NVPTXISD::LDGV2:
1406 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1407 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1408 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1409 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1410 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1411 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1412 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1413 break;
1414 case NVPTXISD::LDUV2:
1415 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1416 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1417 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1418 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1419 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1420 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1421 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1422 break;
1423 case NVPTXISD::LoadV4:
1424 case NVPTXISD::LDGV4:
1425 Opcode = pickOpcodeForVT(
1426 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1427 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1429 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1430 break;
1431 case NVPTXISD::LDUV4:
1432 Opcode = pickOpcodeForVT(
1433 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1434 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1435 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1436 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1437 break;
1438 }
1439 } else {
1440 switch (N->getOpcode()) {
1441 default:
1442 return false;
1443 case ISD::LOAD:
1444 case ISD::INTRINSIC_W_CHAIN:
1445 if (IsLDG)
1446 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1447 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1448 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1449 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1450 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1451 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1453 else
1454 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1455 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1460 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1461 break;
1462 case NVPTXISD::LoadV2:
1463 case NVPTXISD::LDGV2:
1464 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1465 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1466 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1467 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1468 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1469 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1470 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1471 break;
1472 case NVPTXISD::LDUV2:
1473 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1474 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1475 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1476 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1477 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1478 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1479 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1480 break;
1481 case NVPTXISD::LoadV4:
1482 case NVPTXISD::LDGV4:
1483 Opcode = pickOpcodeForVT(
1484 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1485 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1487 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1488 break;
1489 case NVPTXISD::LDUV4:
1490 Opcode = pickOpcodeForVT(
1491 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1492 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1493 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1494 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1495 break;
1496 }
1497 }
1498 if (!Opcode)
1499 return false;
1500 SDValue Ops[] = {Base, Offset, Chain};
1501 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: InstVTList, Ops);
1502 } else {
1503 if (TM.is64Bit()) {
1504 switch (N->getOpcode()) {
1505 default:
1506 return false;
1507 case ISD::LOAD:
1508 case ISD::INTRINSIC_W_CHAIN:
1509 if (IsLDG)
1510 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1511 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1512 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1513 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1514 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1515 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1516 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1517 else
1518 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1519 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1520 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1521 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1522 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1523 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1524 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1525 break;
1526 case NVPTXISD::LoadV2:
1527 case NVPTXISD::LDGV2:
1528 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1529 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1530 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1531 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1532 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1533 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1534 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1535 break;
1536 case NVPTXISD::LDUV2:
1537 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1538 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1539 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1540 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1541 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1542 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1543 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1544 break;
1545 case NVPTXISD::LoadV4:
1546 case NVPTXISD::LDGV4:
1547 Opcode = pickOpcodeForVT(
1548 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1549 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1551 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1552 break;
1553 case NVPTXISD::LDUV4:
1554 Opcode = pickOpcodeForVT(
1555 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1556 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1557 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1558 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1559 break;
1560 }
1561 } else {
1562 switch (N->getOpcode()) {
1563 default:
1564 return false;
1565 case ISD::LOAD:
1566 case ISD::INTRINSIC_W_CHAIN:
1567 if (IsLDG)
1568 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1569 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1570 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1571 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1572 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1573 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1574 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1575 else
1576 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1577 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1578 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1579 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1580 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1581 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1582 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1583 break;
1584 case NVPTXISD::LoadV2:
1585 case NVPTXISD::LDGV2:
1586 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1587 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1588 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1589 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1590 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1591 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1592 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1593 break;
1594 case NVPTXISD::LDUV2:
1595 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1596 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1597 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1598 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1599 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1600 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1601 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1602 break;
1603 case NVPTXISD::LoadV4:
1604 case NVPTXISD::LDGV4:
1605 Opcode = pickOpcodeForVT(
1606 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1607 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1609 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1610 break;
1611 case NVPTXISD::LDUV4:
1612 Opcode = pickOpcodeForVT(
1613 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1614 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1615 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1616 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1617 break;
1618 }
1619 }
1620 if (!Opcode)
1621 return false;
1622 SDValue Ops[] = { Op1, Chain };
1623 LD = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: InstVTList, Ops);
1624 }
1625
1626 // For automatic generation of LDG (through SelectLoad[Vector], not the
1627 // intrinsics), we may have an extending load like:
1628 //
1629 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1630 //
1631 // In this case, the matching logic above will select a load for the original
1632 // memory type (in this case, i8) and our types will not match (the node needs
1633 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1634 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1635 // CVT instruction. Ptxas should clean up any redundancies here.
1636
1637 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: N);
1638
1639 if (OrigType != EltVT &&
1640 (LdNode || (OrigType.isFloatingPoint() && EltVT.isFloatingPoint()))) {
1641 // We have an extending-load. The instruction we selected operates on the
1642 // smaller type, but the SDNode we are replacing has the larger type. We
1643 // need to emit a CVT to make the types match.
1644 unsigned CvtOpc =
1645 GetConvertOpcode(DestTy: OrigType.getSimpleVT(), SrcTy: EltVT.getSimpleVT(), N: LdNode);
1646
1647 // For each output value, apply the manual sign/zero-extension and make sure
1648 // all users of the load go through that CVT.
1649 for (unsigned i = 0; i != NumElts; ++i) {
1650 SDValue Res(LD, i);
1651 SDValue OrigVal(N, i);
1652
1653 SDNode *CvtNode =
1654 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1655 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1656 DL, MVT::i32));
1657 ReplaceUses(F: OrigVal, T: SDValue(CvtNode, 0));
1658 }
1659 }
1660
1661 ReplaceNode(F: N, T: LD);
1662 return true;
1663}
1664
1665bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1666 SDLoc dl(N);
1667 MemSDNode *ST = cast<MemSDNode>(Val: N);
1668 assert(ST->writeMem() && "Expected store");
1669 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(Val: N);
1670 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(Val: N);
1671 assert((PlainStore || AtomicStore) && "Expected store");
1672 EVT StoreVT = ST->getMemoryVT();
1673 SDNode *NVPTXST = nullptr;
1674
1675 // do not support pre/post inc/dec
1676 if (PlainStore && PlainStore->isIndexed())
1677 return false;
1678
1679 if (!StoreVT.isSimple())
1680 return false;
1681
1682 AtomicOrdering Ordering = ST->getSuccessOrdering();
1683 // In order to lower atomic loads with stronger guarantees we would need to
1684 // use store.release or insert fences. However these features were only added
1685 // with PTX ISA 6.0 / sm_70.
1686 // TODO: Check if we can actually use the new instructions and implement them.
1687 if (isStrongerThanMonotonic(AO: Ordering))
1688 return false;
1689
1690 // Address Space Setting
1691 unsigned int CodeAddrSpace = getCodeAddrSpace(N: ST);
1692 unsigned int PointerSize =
1693 CurDAG->getDataLayout().getPointerSizeInBits(AS: ST->getAddressSpace());
1694
1695 // Volatile Setting
1696 // - .volatile is only available for .global and .shared
1697 // - .volatile has the same memory synchronization semantics as .relaxed.sys
1698 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1699 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1700 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1701 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1702 isVolatile = false;
1703
1704 // Vector Setting
1705 MVT SimpleVT = StoreVT.getSimpleVT();
1706 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1707
1708 // Type Setting: toType + toTypeWidth
1709 // - for integer type, always use 'u'
1710 //
1711 MVT ScalarVT = SimpleVT.getScalarType();
1712 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1713 if (SimpleVT.isVector()) {
1714 assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
1715 "Unexpected vector type");
1716 // v2x16 is stored using st.b32
1717 toTypeWidth = 32;
1718 }
1719
1720 unsigned int toType = getLdStRegType(VT: ScalarVT);
1721
1722 // Create the machine instruction DAG
1723 SDValue Chain = ST->getChain();
1724 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1725 SDValue BasePtr = ST->getBasePtr();
1726 SDValue Addr;
1727 SDValue Offset, Base;
1728 std::optional<unsigned> Opcode;
1729 MVT::SimpleValueType SourceVT =
1730 Value.getNode()->getSimpleValueType(ResNo: 0).SimpleTy;
1731
1732 if (SelectDirectAddr(N: BasePtr, Address&: Addr)) {
1733 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1734 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1735 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1736 if (!Opcode)
1737 return false;
1738 SDValue Ops[] = {Value,
1739 getI32Imm(Imm: isVolatile, DL: dl),
1740 getI32Imm(Imm: CodeAddrSpace, DL: dl),
1741 getI32Imm(Imm: vecType, DL: dl),
1742 getI32Imm(Imm: toType, DL: dl),
1743 getI32Imm(Imm: toTypeWidth, DL: dl),
1744 Addr,
1745 Chain};
1746 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1747 } else if (PointerSize == 64
1748 ? SelectADDRsi64(OpNode: BasePtr.getNode(), Addr: BasePtr, Base, Offset)
1749 : SelectADDRsi(OpNode: BasePtr.getNode(), Addr: BasePtr, Base, Offset)) {
1750 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1751 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1752 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1753 if (!Opcode)
1754 return false;
1755 SDValue Ops[] = {Value,
1756 getI32Imm(Imm: isVolatile, DL: dl),
1757 getI32Imm(Imm: CodeAddrSpace, DL: dl),
1758 getI32Imm(Imm: vecType, DL: dl),
1759 getI32Imm(Imm: toType, DL: dl),
1760 getI32Imm(Imm: toTypeWidth, DL: dl),
1761 Base,
1762 Offset,
1763 Chain};
1764 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1765 } else if (PointerSize == 64
1766 ? SelectADDRri64(OpNode: BasePtr.getNode(), Addr: BasePtr, Base, Offset)
1767 : SelectADDRri(OpNode: BasePtr.getNode(), Addr: BasePtr, Base, Offset)) {
1768 if (PointerSize == 64)
1769 Opcode =
1770 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1771 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1772 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1773 else
1774 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1775 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1776 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1777 if (!Opcode)
1778 return false;
1779
1780 SDValue Ops[] = {Value,
1781 getI32Imm(Imm: isVolatile, DL: dl),
1782 getI32Imm(Imm: CodeAddrSpace, DL: dl),
1783 getI32Imm(Imm: vecType, DL: dl),
1784 getI32Imm(Imm: toType, DL: dl),
1785 getI32Imm(Imm: toTypeWidth, DL: dl),
1786 Base,
1787 Offset,
1788 Chain};
1789 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1790 } else {
1791 if (PointerSize == 64)
1792 Opcode =
1793 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1794 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1795 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1796 else
1797 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1798 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1799 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1800 if (!Opcode)
1801 return false;
1802 SDValue Ops[] = {Value,
1803 getI32Imm(Imm: isVolatile, DL: dl),
1804 getI32Imm(Imm: CodeAddrSpace, DL: dl),
1805 getI32Imm(Imm: vecType, DL: dl),
1806 getI32Imm(Imm: toType, DL: dl),
1807 getI32Imm(Imm: toTypeWidth, DL: dl),
1808 BasePtr,
1809 Chain};
1810 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1811 }
1812
1813 if (!NVPTXST)
1814 return false;
1815
1816 MachineMemOperand *MemRef = cast<MemSDNode>(Val: N)->getMemOperand();
1817 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: NVPTXST), NewMemRefs: {MemRef});
1818 ReplaceNode(F: N, T: NVPTXST);
1819 return true;
1820}
1821
1822bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1823 SDValue Chain = N->getOperand(Num: 0);
1824 SDValue Op1 = N->getOperand(Num: 1);
1825 SDValue Addr, Offset, Base;
1826 std::optional<unsigned> Opcode;
1827 SDLoc DL(N);
1828 SDNode *ST;
1829 EVT EltVT = Op1.getValueType();
1830 MemSDNode *MemSD = cast<MemSDNode>(Val: N);
1831 EVT StoreVT = MemSD->getMemoryVT();
1832
1833 // Address Space Setting
1834 unsigned CodeAddrSpace = getCodeAddrSpace(N: MemSD);
1835 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1836 report_fatal_error(reason: "Cannot store to pointer that points to constant "
1837 "memory space");
1838 }
1839 unsigned int PointerSize =
1840 CurDAG->getDataLayout().getPointerSizeInBits(AS: MemSD->getAddressSpace());
1841
1842 // Volatile Setting
1843 // - .volatile is only availalble for .global and .shared
1844 bool IsVolatile = MemSD->isVolatile();
1845 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1846 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1847 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1848 IsVolatile = false;
1849
1850 // Type Setting: toType + toTypeWidth
1851 // - for integer type, always use 'u'
1852 assert(StoreVT.isSimple() && "Store value is not simple");
1853 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1854 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1855 unsigned ToType = getLdStRegType(VT: ScalarVT);
1856
1857 SmallVector<SDValue, 12> StOps;
1858 SDValue N2;
1859 unsigned VecType;
1860
1861 switch (N->getOpcode()) {
1862 case NVPTXISD::StoreV2:
1863 VecType = NVPTX::PTXLdStInstCode::V2;
1864 StOps.push_back(Elt: N->getOperand(Num: 1));
1865 StOps.push_back(Elt: N->getOperand(Num: 2));
1866 N2 = N->getOperand(Num: 3);
1867 break;
1868 case NVPTXISD::StoreV4:
1869 VecType = NVPTX::PTXLdStInstCode::V4;
1870 StOps.push_back(Elt: N->getOperand(Num: 1));
1871 StOps.push_back(Elt: N->getOperand(Num: 2));
1872 StOps.push_back(Elt: N->getOperand(Num: 3));
1873 StOps.push_back(Elt: N->getOperand(Num: 4));
1874 N2 = N->getOperand(Num: 5);
1875 break;
1876 default:
1877 return false;
1878 }
1879
1880 // v8x16 is a special case. PTX doesn't have st.v8.x16
1881 // instruction. Instead, we split the vector into v2x16 chunks and
1882 // store them with st.v4.b32.
1883 if (Isv2x16VT(VT: EltVT)) {
1884 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1885 EltVT = MVT::i32;
1886 ToType = NVPTX::PTXLdStInstCode::Untyped;
1887 ToTypeWidth = 32;
1888 }
1889
1890 StOps.push_back(Elt: getI32Imm(Imm: IsVolatile, DL));
1891 StOps.push_back(Elt: getI32Imm(Imm: CodeAddrSpace, DL));
1892 StOps.push_back(Elt: getI32Imm(Imm: VecType, DL));
1893 StOps.push_back(Elt: getI32Imm(Imm: ToType, DL));
1894 StOps.push_back(Elt: getI32Imm(Imm: ToTypeWidth, DL));
1895
1896 if (SelectDirectAddr(N: N2, Address&: Addr)) {
1897 switch (N->getOpcode()) {
1898 default:
1899 return false;
1900 case NVPTXISD::StoreV2:
1901 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1902 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1903 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1904 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1905 break;
1906 case NVPTXISD::StoreV4:
1907 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1908 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1909 NVPTX::STV_i32_v4_avar, std::nullopt,
1910 NVPTX::STV_f32_v4_avar, std::nullopt);
1911 break;
1912 }
1913 StOps.push_back(Elt: Addr);
1914 } else if (PointerSize == 64 ? SelectADDRsi64(OpNode: N2.getNode(), Addr: N2, Base, Offset)
1915 : SelectADDRsi(OpNode: N2.getNode(), Addr: N2, Base, Offset)) {
1916 switch (N->getOpcode()) {
1917 default:
1918 return false;
1919 case NVPTXISD::StoreV2:
1920 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1921 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1922 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1923 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1924 break;
1925 case NVPTXISD::StoreV4:
1926 Opcode =
1927 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1928 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1929 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1930 break;
1931 }
1932 StOps.push_back(Elt: Base);
1933 StOps.push_back(Elt: Offset);
1934 } else if (PointerSize == 64 ? SelectADDRri64(OpNode: N2.getNode(), Addr: N2, Base, Offset)
1935 : SelectADDRri(OpNode: N2.getNode(), Addr: N2, Base, Offset)) {
1936 if (PointerSize == 64) {
1937 switch (N->getOpcode()) {
1938 default:
1939 return false;
1940 case NVPTXISD::StoreV2:
1941 Opcode =
1942 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1943 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1944 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1945 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1946 break;
1947 case NVPTXISD::StoreV4:
1948 Opcode = pickOpcodeForVT(
1949 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1950 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1951 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1952 break;
1953 }
1954 } else {
1955 switch (N->getOpcode()) {
1956 default:
1957 return false;
1958 case NVPTXISD::StoreV2:
1959 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1960 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1961 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1962 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1963 break;
1964 case NVPTXISD::StoreV4:
1965 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1966 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1967 NVPTX::STV_i32_v4_ari, std::nullopt,
1968 NVPTX::STV_f32_v4_ari, std::nullopt);
1969 break;
1970 }
1971 }
1972 StOps.push_back(Elt: Base);
1973 StOps.push_back(Elt: Offset);
1974 } else {
1975 if (PointerSize == 64) {
1976 switch (N->getOpcode()) {
1977 default:
1978 return false;
1979 case NVPTXISD::StoreV2:
1980 Opcode = pickOpcodeForVT(
1981 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1982 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1983 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1984 NVPTX::STV_f64_v2_areg_64);
1985 break;
1986 case NVPTXISD::StoreV4:
1987 Opcode = pickOpcodeForVT(
1988 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1989 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1990 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1991 break;
1992 }
1993 } else {
1994 switch (N->getOpcode()) {
1995 default:
1996 return false;
1997 case NVPTXISD::StoreV2:
1998 Opcode =
1999 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2000 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2001 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
2002 NVPTX::STV_f64_v2_areg);
2003 break;
2004 case NVPTXISD::StoreV4:
2005 Opcode =
2006 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2007 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2008 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2009 break;
2010 }
2011 }
2012 StOps.push_back(Elt: N2);
2013 }
2014
2015 if (!Opcode)
2016 return false;
2017
2018 StOps.push_back(Elt: Chain);
2019
2020 ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2021
2022 MachineMemOperand *MemRef = cast<MemSDNode>(Val: N)->getMemOperand();
2023 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ST), NewMemRefs: {MemRef});
2024
2025 ReplaceNode(F: N, T: ST);
2026 return true;
2027}
2028
2029bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2030 SDValue Chain = Node->getOperand(Num: 0);
2031 SDValue Offset = Node->getOperand(Num: 2);
2032 SDValue Glue = Node->getOperand(Num: 3);
2033 SDLoc DL(Node);
2034 MemSDNode *Mem = cast<MemSDNode>(Val: Node);
2035
2036 unsigned VecSize;
2037 switch (Node->getOpcode()) {
2038 default:
2039 return false;
2040 case NVPTXISD::LoadParam:
2041 VecSize = 1;
2042 break;
2043 case NVPTXISD::LoadParamV2:
2044 VecSize = 2;
2045 break;
2046 case NVPTXISD::LoadParamV4:
2047 VecSize = 4;
2048 break;
2049 }
2050
2051 EVT EltVT = Node->getValueType(ResNo: 0);
2052 EVT MemVT = Mem->getMemoryVT();
2053
2054 std::optional<unsigned> Opcode;
2055
2056 switch (VecSize) {
2057 default:
2058 return false;
2059 case 1:
2060 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2061 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2062 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2063 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2064 break;
2065 case 2:
2066 Opcode =
2067 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2068 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2069 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2070 NVPTX::LoadParamMemV2F64);
2071 break;
2072 case 4:
2073 Opcode =
2074 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2075 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2076 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2077 break;
2078 }
2079 if (!Opcode)
2080 return false;
2081
2082 SDVTList VTs;
2083 if (VecSize == 1) {
2084 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2085 } else if (VecSize == 2) {
2086 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2087 } else {
2088 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2089 VTs = CurDAG->getVTList(EVTs);
2090 }
2091
2092 unsigned OffsetVal = Offset->getAsZExtVal();
2093
2094 SmallVector<SDValue, 2> Ops;
2095 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2096 Ops.push_back(Elt: Chain);
2097 Ops.push_back(Elt: Glue);
2098
2099 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs, Ops));
2100 return true;
2101}
2102
2103bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2104 SDLoc DL(N);
2105 SDValue Chain = N->getOperand(Num: 0);
2106 SDValue Offset = N->getOperand(Num: 1);
2107 unsigned OffsetVal = Offset->getAsZExtVal();
2108 MemSDNode *Mem = cast<MemSDNode>(Val: N);
2109
2110 // How many elements do we have?
2111 unsigned NumElts = 1;
2112 switch (N->getOpcode()) {
2113 default:
2114 return false;
2115 case NVPTXISD::StoreRetval:
2116 NumElts = 1;
2117 break;
2118 case NVPTXISD::StoreRetvalV2:
2119 NumElts = 2;
2120 break;
2121 case NVPTXISD::StoreRetvalV4:
2122 NumElts = 4;
2123 break;
2124 }
2125
2126 // Build vector of operands
2127 SmallVector<SDValue, 6> Ops;
2128 for (unsigned i = 0; i < NumElts; ++i)
2129 Ops.push_back(Elt: N->getOperand(Num: i + 2));
2130 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2131 Ops.push_back(Elt: Chain);
2132
2133 // Determine target opcode
2134 // If we have an i1, use an 8-bit store. The lowering code in
2135 // NVPTXISelLowering will have already emitted an upcast.
2136 std::optional<unsigned> Opcode = 0;
2137 switch (NumElts) {
2138 default:
2139 return false;
2140 case 1:
2141 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2142 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2143 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2144 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2145 if (Opcode == NVPTX::StoreRetvalI8) {
2146 // Fine tune the opcode depending on the size of the operand.
2147 // This helps to avoid creating redundant COPY instructions in
2148 // InstrEmitter::AddRegisterOperand().
2149 switch (Ops[0].getSimpleValueType().SimpleTy) {
2150 default:
2151 break;
2152 case MVT::i32:
2153 Opcode = NVPTX::StoreRetvalI8TruncI32;
2154 break;
2155 case MVT::i64:
2156 Opcode = NVPTX::StoreRetvalI8TruncI64;
2157 break;
2158 }
2159 }
2160 break;
2161 case 2:
2162 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2163 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2164 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2165 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2166 break;
2167 case 4:
2168 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2169 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2170 NVPTX::StoreRetvalV4I32, std::nullopt,
2171 NVPTX::StoreRetvalV4F32, std::nullopt);
2172 break;
2173 }
2174 if (!Opcode)
2175 return false;
2176
2177 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2178 MachineMemOperand *MemRef = cast<MemSDNode>(Val: N)->getMemOperand();
2179 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ret), NewMemRefs: {MemRef});
2180
2181 ReplaceNode(F: N, T: Ret);
2182 return true;
2183}
2184
2185bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2186 SDLoc DL(N);
2187 SDValue Chain = N->getOperand(Num: 0);
2188 SDValue Param = N->getOperand(Num: 1);
2189 unsigned ParamVal = Param->getAsZExtVal();
2190 SDValue Offset = N->getOperand(Num: 2);
2191 unsigned OffsetVal = Offset->getAsZExtVal();
2192 MemSDNode *Mem = cast<MemSDNode>(Val: N);
2193 SDValue Glue = N->getOperand(Num: N->getNumOperands() - 1);
2194
2195 // How many elements do we have?
2196 unsigned NumElts = 1;
2197 switch (N->getOpcode()) {
2198 default:
2199 return false;
2200 case NVPTXISD::StoreParamU32:
2201 case NVPTXISD::StoreParamS32:
2202 case NVPTXISD::StoreParam:
2203 NumElts = 1;
2204 break;
2205 case NVPTXISD::StoreParamV2:
2206 NumElts = 2;
2207 break;
2208 case NVPTXISD::StoreParamV4:
2209 NumElts = 4;
2210 break;
2211 }
2212
2213 // Build vector of operands
2214 SmallVector<SDValue, 8> Ops;
2215 for (unsigned i = 0; i < NumElts; ++i)
2216 Ops.push_back(Elt: N->getOperand(Num: i + 3));
2217 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2218 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2219 Ops.push_back(Elt: Chain);
2220 Ops.push_back(Elt: Glue);
2221
2222 // Determine target opcode
2223 // If we have an i1, use an 8-bit store. The lowering code in
2224 // NVPTXISelLowering will have already emitted an upcast.
2225 std::optional<unsigned> Opcode = 0;
2226 switch (N->getOpcode()) {
2227 default:
2228 switch (NumElts) {
2229 default:
2230 return false;
2231 case 1:
2232 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2233 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2234 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2235 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2236 if (Opcode == NVPTX::StoreParamI8) {
2237 // Fine tune the opcode depending on the size of the operand.
2238 // This helps to avoid creating redundant COPY instructions in
2239 // InstrEmitter::AddRegisterOperand().
2240 switch (Ops[0].getSimpleValueType().SimpleTy) {
2241 default:
2242 break;
2243 case MVT::i32:
2244 Opcode = NVPTX::StoreParamI8TruncI32;
2245 break;
2246 case MVT::i64:
2247 Opcode = NVPTX::StoreParamI8TruncI64;
2248 break;
2249 }
2250 }
2251 break;
2252 case 2:
2253 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2254 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2255 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2256 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2257 break;
2258 case 4:
2259 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2260 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2261 NVPTX::StoreParamV4I32, std::nullopt,
2262 NVPTX::StoreParamV4F32, std::nullopt);
2263 break;
2264 }
2265 if (!Opcode)
2266 return false;
2267 break;
2268 // Special case: if we have a sign-extend/zero-extend node, insert the
2269 // conversion instruction first, and use that as the value operand to
2270 // the selected StoreParam node.
2271 case NVPTXISD::StoreParamU32: {
2272 Opcode = NVPTX::StoreParamI32;
2273 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2274 MVT::i32);
2275 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2276 MVT::i32, Ops[0], CvtNone);
2277 Ops[0] = SDValue(Cvt, 0);
2278 break;
2279 }
2280 case NVPTXISD::StoreParamS32: {
2281 Opcode = NVPTX::StoreParamI32;
2282 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2283 MVT::i32);
2284 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2285 MVT::i32, Ops[0], CvtNone);
2286 Ops[0] = SDValue(Cvt, 0);
2287 break;
2288 }
2289 }
2290
2291 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2292 SDNode *Ret = CurDAG->getMachineNode(Opcode: *Opcode, dl: DL, VTs: RetVTs, Ops);
2293 MachineMemOperand *MemRef = cast<MemSDNode>(Val: N)->getMemOperand();
2294 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ret), NewMemRefs: {MemRef});
2295
2296 ReplaceNode(F: N, T: Ret);
2297 return true;
2298}
2299
2300bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2301 unsigned Opc = 0;
2302
2303 switch (N->getOpcode()) {
2304 default: return false;
2305 case NVPTXISD::Tex1DFloatS32:
2306 Opc = NVPTX::TEX_1D_F32_S32_RR;
2307 break;
2308 case NVPTXISD::Tex1DFloatFloat:
2309 Opc = NVPTX::TEX_1D_F32_F32_RR;
2310 break;
2311 case NVPTXISD::Tex1DFloatFloatLevel:
2312 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2313 break;
2314 case NVPTXISD::Tex1DFloatFloatGrad:
2315 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2316 break;
2317 case NVPTXISD::Tex1DS32S32:
2318 Opc = NVPTX::TEX_1D_S32_S32_RR;
2319 break;
2320 case NVPTXISD::Tex1DS32Float:
2321 Opc = NVPTX::TEX_1D_S32_F32_RR;
2322 break;
2323 case NVPTXISD::Tex1DS32FloatLevel:
2324 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2325 break;
2326 case NVPTXISD::Tex1DS32FloatGrad:
2327 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2328 break;
2329 case NVPTXISD::Tex1DU32S32:
2330 Opc = NVPTX::TEX_1D_U32_S32_RR;
2331 break;
2332 case NVPTXISD::Tex1DU32Float:
2333 Opc = NVPTX::TEX_1D_U32_F32_RR;
2334 break;
2335 case NVPTXISD::Tex1DU32FloatLevel:
2336 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2337 break;
2338 case NVPTXISD::Tex1DU32FloatGrad:
2339 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2340 break;
2341 case NVPTXISD::Tex1DArrayFloatS32:
2342 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2343 break;
2344 case NVPTXISD::Tex1DArrayFloatFloat:
2345 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2346 break;
2347 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2348 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2349 break;
2350 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2351 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2352 break;
2353 case NVPTXISD::Tex1DArrayS32S32:
2354 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2355 break;
2356 case NVPTXISD::Tex1DArrayS32Float:
2357 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2358 break;
2359 case NVPTXISD::Tex1DArrayS32FloatLevel:
2360 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2361 break;
2362 case NVPTXISD::Tex1DArrayS32FloatGrad:
2363 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2364 break;
2365 case NVPTXISD::Tex1DArrayU32S32:
2366 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2367 break;
2368 case NVPTXISD::Tex1DArrayU32Float:
2369 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2370 break;
2371 case NVPTXISD::Tex1DArrayU32FloatLevel:
2372 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2373 break;
2374 case NVPTXISD::Tex1DArrayU32FloatGrad:
2375 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2376 break;
2377 case NVPTXISD::Tex2DFloatS32:
2378 Opc = NVPTX::TEX_2D_F32_S32_RR;
2379 break;
2380 case NVPTXISD::Tex2DFloatFloat:
2381 Opc = NVPTX::TEX_2D_F32_F32_RR;
2382 break;
2383 case NVPTXISD::Tex2DFloatFloatLevel:
2384 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2385 break;
2386 case NVPTXISD::Tex2DFloatFloatGrad:
2387 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2388 break;
2389 case NVPTXISD::Tex2DS32S32:
2390 Opc = NVPTX::TEX_2D_S32_S32_RR;
2391 break;
2392 case NVPTXISD::Tex2DS32Float:
2393 Opc = NVPTX::TEX_2D_S32_F32_RR;
2394 break;
2395 case NVPTXISD::Tex2DS32FloatLevel:
2396 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2397 break;
2398 case NVPTXISD::Tex2DS32FloatGrad:
2399 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2400 break;
2401 case NVPTXISD::Tex2DU32S32:
2402 Opc = NVPTX::TEX_2D_U32_S32_RR;
2403 break;
2404 case NVPTXISD::Tex2DU32Float:
2405 Opc = NVPTX::TEX_2D_U32_F32_RR;
2406 break;
2407 case NVPTXISD::Tex2DU32FloatLevel:
2408 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2409 break;
2410 case NVPTXISD::Tex2DU32FloatGrad:
2411 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2412 break;
2413 case NVPTXISD::Tex2DArrayFloatS32:
2414 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2415 break;
2416 case NVPTXISD::Tex2DArrayFloatFloat:
2417 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2418 break;
2419 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2420 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2421 break;
2422 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2423 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2424 break;
2425 case NVPTXISD::Tex2DArrayS32S32:
2426 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2427 break;
2428 case NVPTXISD::Tex2DArrayS32Float:
2429 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2430 break;
2431 case NVPTXISD::Tex2DArrayS32FloatLevel:
2432 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2433 break;
2434 case NVPTXISD::Tex2DArrayS32FloatGrad:
2435 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2436 break;
2437 case NVPTXISD::Tex2DArrayU32S32:
2438 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2439 break;
2440 case NVPTXISD::Tex2DArrayU32Float:
2441 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2442 break;
2443 case NVPTXISD::Tex2DArrayU32FloatLevel:
2444 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2445 break;
2446 case NVPTXISD::Tex2DArrayU32FloatGrad:
2447 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2448 break;
2449 case NVPTXISD::Tex3DFloatS32:
2450 Opc = NVPTX::TEX_3D_F32_S32_RR;
2451 break;
2452 case NVPTXISD::Tex3DFloatFloat:
2453 Opc = NVPTX::TEX_3D_F32_F32_RR;
2454 break;
2455 case NVPTXISD::Tex3DFloatFloatLevel:
2456 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2457 break;
2458 case NVPTXISD::Tex3DFloatFloatGrad:
2459 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2460 break;
2461 case NVPTXISD::Tex3DS32S32:
2462 Opc = NVPTX::TEX_3D_S32_S32_RR;
2463 break;
2464 case NVPTXISD::Tex3DS32Float:
2465 Opc = NVPTX::TEX_3D_S32_F32_RR;
2466 break;
2467 case NVPTXISD::Tex3DS32FloatLevel:
2468 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2469 break;
2470 case NVPTXISD::Tex3DS32FloatGrad:
2471 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2472 break;
2473 case NVPTXISD::Tex3DU32S32:
2474 Opc = NVPTX::TEX_3D_U32_S32_RR;
2475 break;
2476 case NVPTXISD::Tex3DU32Float:
2477 Opc = NVPTX::TEX_3D_U32_F32_RR;
2478 break;
2479 case NVPTXISD::Tex3DU32FloatLevel:
2480 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2481 break;
2482 case NVPTXISD::Tex3DU32FloatGrad:
2483 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2484 break;
2485 case NVPTXISD::TexCubeFloatFloat:
2486 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2487 break;
2488 case NVPTXISD::TexCubeFloatFloatLevel:
2489 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2490 break;
2491 case NVPTXISD::TexCubeS32Float:
2492 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2493 break;
2494 case NVPTXISD::TexCubeS32FloatLevel:
2495 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2496 break;
2497 case NVPTXISD::TexCubeU32Float:
2498 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2499 break;
2500 case NVPTXISD::TexCubeU32FloatLevel:
2501 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2502 break;
2503 case NVPTXISD::TexCubeArrayFloatFloat:
2504 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2505 break;
2506 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2507 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2508 break;
2509 case NVPTXISD::TexCubeArrayS32Float:
2510 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2511 break;
2512 case NVPTXISD::TexCubeArrayS32FloatLevel:
2513 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2514 break;
2515 case NVPTXISD::TexCubeArrayU32Float:
2516 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2517 break;
2518 case NVPTXISD::TexCubeArrayU32FloatLevel:
2519 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2520 break;
2521 case NVPTXISD::Tld4R2DFloatFloat:
2522 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2523 break;
2524 case NVPTXISD::Tld4G2DFloatFloat:
2525 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2526 break;
2527 case NVPTXISD::Tld4B2DFloatFloat:
2528 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2529 break;
2530 case NVPTXISD::Tld4A2DFloatFloat:
2531 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2532 break;
2533 case NVPTXISD::Tld4R2DS64Float:
2534 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2535 break;
2536 case NVPTXISD::Tld4G2DS64Float:
2537 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2538 break;
2539 case NVPTXISD::Tld4B2DS64Float:
2540 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2541 break;
2542 case NVPTXISD::Tld4A2DS64Float:
2543 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2544 break;
2545 case NVPTXISD::Tld4R2DU64Float:
2546 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2547 break;
2548 case NVPTXISD::Tld4G2DU64Float:
2549 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2550 break;
2551 case NVPTXISD::Tld4B2DU64Float:
2552 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2553 break;
2554 case NVPTXISD::Tld4A2DU64Float:
2555 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2556 break;
2557 case NVPTXISD::TexUnified1DFloatS32:
2558 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2559 break;
2560 case NVPTXISD::TexUnified1DFloatFloat:
2561 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2562 break;
2563 case NVPTXISD::TexUnified1DFloatFloatLevel:
2564 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2565 break;
2566 case NVPTXISD::TexUnified1DFloatFloatGrad:
2567 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2568 break;
2569 case NVPTXISD::TexUnified1DS32S32:
2570 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2571 break;
2572 case NVPTXISD::TexUnified1DS32Float:
2573 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2574 break;
2575 case NVPTXISD::TexUnified1DS32FloatLevel:
2576 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2577 break;
2578 case NVPTXISD::TexUnified1DS32FloatGrad:
2579 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2580 break;
2581 case NVPTXISD::TexUnified1DU32S32:
2582 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2583 break;
2584 case NVPTXISD::TexUnified1DU32Float:
2585 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2586 break;
2587 case NVPTXISD::TexUnified1DU32FloatLevel:
2588 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2589 break;
2590 case NVPTXISD::TexUnified1DU32FloatGrad:
2591 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2592 break;
2593 case NVPTXISD::TexUnified1DArrayFloatS32:
2594 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2595 break;
2596 case NVPTXISD::TexUnified1DArrayFloatFloat:
2597 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2598 break;
2599 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2600 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2601 break;
2602 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2603 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2604 break;
2605 case NVPTXISD::TexUnified1DArrayS32S32:
2606 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2607 break;
2608 case NVPTXISD::TexUnified1DArrayS32Float:
2609 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2610 break;
2611 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2612 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2613 break;
2614 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2615 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2616 break;
2617 case NVPTXISD::TexUnified1DArrayU32S32:
2618 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2619 break;
2620 case NVPTXISD::TexUnified1DArrayU32Float:
2621 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2622 break;
2623 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2624 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2625 break;
2626 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2627 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2628 break;
2629 case NVPTXISD::TexUnified2DFloatS32:
2630 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2631 break;
2632 case NVPTXISD::TexUnified2DFloatFloat:
2633 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2634 break;
2635 case NVPTXISD::TexUnified2DFloatFloatLevel:
2636 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2637 break;
2638 case NVPTXISD::TexUnified2DFloatFloatGrad:
2639 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2640 break;
2641 case NVPTXISD::TexUnified2DS32S32:
2642 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2643 break;
2644 case NVPTXISD::TexUnified2DS32Float:
2645 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2646 break;
2647 case NVPTXISD::TexUnified2DS32FloatLevel:
2648 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2649 break;
2650 case NVPTXISD::TexUnified2DS32FloatGrad:
2651 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2652 break;
2653 case NVPTXISD::TexUnified2DU32S32:
2654 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2655 break;
2656 case NVPTXISD::TexUnified2DU32Float:
2657 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2658 break;
2659 case NVPTXISD::TexUnified2DU32FloatLevel:
2660 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2661 break;
2662 case NVPTXISD::TexUnified2DU32FloatGrad:
2663 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2664 break;
2665 case NVPTXISD::TexUnified2DArrayFloatS32:
2666 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2667 break;
2668 case NVPTXISD::TexUnified2DArrayFloatFloat:
2669 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2670 break;
2671 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2672 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2673 break;
2674 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2675 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2676 break;
2677 case NVPTXISD::TexUnified2DArrayS32S32:
2678 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2679 break;
2680 case NVPTXISD::TexUnified2DArrayS32Float:
2681 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2682 break;
2683 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2684 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2685 break;
2686 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2687 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2688 break;
2689 case NVPTXISD::TexUnified2DArrayU32S32:
2690 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2691 break;
2692 case NVPTXISD::TexUnified2DArrayU32Float:
2693 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2694 break;
2695 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2696 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2697 break;
2698 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2699 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2700 break;
2701 case NVPTXISD::TexUnified3DFloatS32:
2702 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2703 break;
2704 case NVPTXISD::TexUnified3DFloatFloat:
2705 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2706 break;
2707 case NVPTXISD::TexUnified3DFloatFloatLevel:
2708 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2709 break;
2710 case NVPTXISD::TexUnified3DFloatFloatGrad:
2711 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2712 break;
2713 case NVPTXISD::TexUnified3DS32S32:
2714 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2715 break;
2716 case NVPTXISD::TexUnified3DS32Float:
2717 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2718 break;
2719 case NVPTXISD::TexUnified3DS32FloatLevel:
2720 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2721 break;
2722 case NVPTXISD::TexUnified3DS32FloatGrad:
2723 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2724 break;
2725 case NVPTXISD::TexUnified3DU32S32:
2726 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2727 break;
2728 case NVPTXISD::TexUnified3DU32Float:
2729 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2730 break;
2731 case NVPTXISD::TexUnified3DU32FloatLevel:
2732 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2733 break;
2734 case NVPTXISD::TexUnified3DU32FloatGrad:
2735 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2736 break;
2737 case NVPTXISD::TexUnifiedCubeFloatFloat:
2738 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2739 break;
2740 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2741 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2742 break;
2743 case NVPTXISD::TexUnifiedCubeS32Float:
2744 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2745 break;
2746 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2747 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2748 break;
2749 case NVPTXISD::TexUnifiedCubeU32Float:
2750 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2751 break;
2752 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2753 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2754 break;
2755 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2756 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2757 break;
2758 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2759 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2760 break;
2761 case NVPTXISD::TexUnifiedCubeArrayS32Float:
2762 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2763 break;
2764 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2765 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2766 break;
2767 case NVPTXISD::TexUnifiedCubeArrayU32Float:
2768 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2769 break;
2770 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2771 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2772 break;
2773 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2774 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2775 break;
2776 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2777 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2778 break;
2779 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2780 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2781 break;
2782 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2783 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2784 break;
2785 case NVPTXISD::Tld4UnifiedR2DS64Float:
2786 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2787 break;
2788 case NVPTXISD::Tld4UnifiedG2DS64Float:
2789 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2790 break;
2791 case NVPTXISD::Tld4UnifiedB2DS64Float:
2792 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2793 break;
2794 case NVPTXISD::Tld4UnifiedA2DS64Float:
2795 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2796 break;
2797 case NVPTXISD::Tld4UnifiedR2DU64Float:
2798 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2799 break;
2800 case NVPTXISD::Tld4UnifiedG2DU64Float:
2801 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2802 break;
2803 case NVPTXISD::Tld4UnifiedB2DU64Float:
2804 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2805 break;
2806 case NVPTXISD::Tld4UnifiedA2DU64Float:
2807 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2808 break;
2809 case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
2810 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2811 break;
2812 case NVPTXISD::TexUnifiedCubeS32FloatGrad:
2813 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2814 break;
2815 case NVPTXISD::TexUnifiedCubeU32FloatGrad:
2816 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2817 break;
2818 case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
2819 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2820 break;
2821 case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
2822 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2823 break;
2824 case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
2825 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2826 break;
2827 }
2828
2829 // Copy over operands
2830 SmallVector<SDValue, 8> Ops(drop_begin(RangeOrContainer: N->ops()));
2831 Ops.push_back(Elt: N->getOperand(Num: 0)); // Move chain to the back.
2832
2833 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops));
2834 return true;
2835}
2836
2837bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2838 unsigned Opc = 0;
2839 switch (N->getOpcode()) {
2840 default: return false;
2841 case NVPTXISD::Suld1DI8Clamp:
2842 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2843 break;
2844 case NVPTXISD::Suld1DI16Clamp:
2845 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2846 break;
2847 case NVPTXISD::Suld1DI32Clamp:
2848 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2849 break;
2850 case NVPTXISD::Suld1DI64Clamp:
2851 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2852 break;
2853 case NVPTXISD::Suld1DV2I8Clamp:
2854 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2855 break;
2856 case NVPTXISD::Suld1DV2I16Clamp:
2857 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2858 break;
2859 case NVPTXISD::Suld1DV2I32Clamp:
2860 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2861 break;
2862 case NVPTXISD::Suld1DV2I64Clamp:
2863 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2864 break;
2865 case NVPTXISD::Suld1DV4I8Clamp:
2866 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2867 break;
2868 case NVPTXISD::Suld1DV4I16Clamp:
2869 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2870 break;
2871 case NVPTXISD::Suld1DV4I32Clamp:
2872 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2873 break;
2874 case NVPTXISD::Suld1DArrayI8Clamp:
2875 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2876 break;
2877 case NVPTXISD::Suld1DArrayI16Clamp:
2878 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2879 break;
2880 case NVPTXISD::Suld1DArrayI32Clamp:
2881 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2882 break;
2883 case NVPTXISD::Suld1DArrayI64Clamp:
2884 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2885 break;
2886 case NVPTXISD::Suld1DArrayV2I8Clamp:
2887 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2888 break;
2889 case NVPTXISD::Suld1DArrayV2I16Clamp:
2890 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2891 break;
2892 case NVPTXISD::Suld1DArrayV2I32Clamp:
2893 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2894 break;
2895 case NVPTXISD::Suld1DArrayV2I64Clamp:
2896 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2897 break;
2898 case NVPTXISD::Suld1DArrayV4I8Clamp:
2899 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2900 break;
2901 case NVPTXISD::Suld1DArrayV4I16Clamp:
2902 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2903 break;
2904 case NVPTXISD::Suld1DArrayV4I32Clamp:
2905 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2906 break;
2907 case NVPTXISD::Suld2DI8Clamp:
2908 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2909 break;
2910 case NVPTXISD::Suld2DI16Clamp:
2911 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2912 break;
2913 case NVPTXISD::Suld2DI32Clamp:
2914 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2915 break;
2916 case NVPTXISD::Suld2DI64Clamp:
2917 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2918 break;
2919 case NVPTXISD::Suld2DV2I8Clamp:
2920 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2921 break;
2922 case NVPTXISD::Suld2DV2I16Clamp:
2923 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2924 break;
2925 case NVPTXISD::Suld2DV2I32Clamp:
2926 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2927 break;
2928 case NVPTXISD::Suld2DV2I64Clamp:
2929 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2930 break;
2931 case NVPTXISD::Suld2DV4I8Clamp:
2932 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2933 break;
2934 case NVPTXISD::Suld2DV4I16Clamp:
2935 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2936 break;
2937 case NVPTXISD::Suld2DV4I32Clamp:
2938 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2939 break;
2940 case NVPTXISD::Suld2DArrayI8Clamp:
2941 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2942 break;
2943 case NVPTXISD::Suld2DArrayI16Clamp:
2944 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2945 break;
2946 case NVPTXISD::Suld2DArrayI32Clamp:
2947 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2948 break;
2949 case NVPTXISD::Suld2DArrayI64Clamp:
2950 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2951 break;
2952 case NVPTXISD::Suld2DArrayV2I8Clamp:
2953 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2954 break;
2955 case NVPTXISD::Suld2DArrayV2I16Clamp:
2956 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2957 break;
2958 case NVPTXISD::Suld2DArrayV2I32Clamp:
2959 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2960 break;
2961 case NVPTXISD::Suld2DArrayV2I64Clamp:
2962 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2963 break;
2964 case NVPTXISD::Suld2DArrayV4I8Clamp:
2965 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2966 break;
2967 case NVPTXISD::Suld2DArrayV4I16Clamp:
2968 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2969 break;
2970 case NVPTXISD::Suld2DArrayV4I32Clamp:
2971 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2972 break;
2973 case NVPTXISD::Suld3DI8Clamp:
2974 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2975 break;
2976 case NVPTXISD::Suld3DI16Clamp:
2977 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2978 break;
2979 case NVPTXISD::Suld3DI32Clamp:
2980 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2981 break;
2982 case NVPTXISD::Suld3DI64Clamp:
2983 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2984 break;
2985 case NVPTXISD::Suld3DV2I8Clamp:
2986 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2987 break;
2988 case NVPTXISD::Suld3DV2I16Clamp:
2989 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2990 break;
2991 case NVPTXISD::Suld3DV2I32Clamp:
2992 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2993 break;
2994 case NVPTXISD::Suld3DV2I64Clamp:
2995 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2996 break;
2997 case NVPTXISD::Suld3DV4I8Clamp:
2998 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2999 break;
3000 case NVPTXISD::Suld3DV4I16Clamp:
3001 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3002 break;
3003 case NVPTXISD::Suld3DV4I32Clamp:
3004 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3005 break;
3006 case NVPTXISD::Suld1DI8Trap:
3007 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3008 break;
3009 case NVPTXISD::Suld1DI16Trap:
3010 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3011 break;
3012 case NVPTXISD::Suld1DI32Trap:
3013 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3014 break;
3015 case NVPTXISD::Suld1DI64Trap:
3016 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3017 break;
3018 case NVPTXISD::Suld1DV2I8Trap:
3019 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3020 break;
3021 case NVPTXISD::Suld1DV2I16Trap:
3022 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3023 break;
3024 case NVPTXISD::Suld1DV2I32Trap:
3025 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3026 break;
3027 case NVPTXISD::Suld1DV2I64Trap:
3028 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3029 break;
3030 case NVPTXISD::Suld1DV4I8Trap:
3031 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3032 break;
3033 case NVPTXISD::Suld1DV4I16Trap:
3034 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3035 break;
3036 case NVPTXISD::Suld1DV4I32Trap:
3037 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3038 break;
3039 case NVPTXISD::Suld1DArrayI8Trap:
3040 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3041 break;
3042 case NVPTXISD::Suld1DArrayI16Trap:
3043 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3044 break;
3045 case NVPTXISD::Suld1DArrayI32Trap:
3046 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3047 break;
3048 case NVPTXISD::Suld1DArrayI64Trap:
3049 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3050 break;
3051 case NVPTXISD::Suld1DArrayV2I8Trap:
3052 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3053 break;
3054 case NVPTXISD::Suld1DArrayV2I16Trap:
3055 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3056 break;
3057 case NVPTXISD::Suld1DArrayV2I32Trap:
3058 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3059 break;
3060 case NVPTXISD::Suld1DArrayV2I64Trap:
3061 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3062 break;
3063 case NVPTXISD::Suld1DArrayV4I8Trap:
3064 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3065 break;
3066 case NVPTXISD::Suld1DArrayV4I16Trap:
3067 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3068 break;
3069 case NVPTXISD::Suld1DArrayV4I32Trap:
3070 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3071 break;
3072 case NVPTXISD::Suld2DI8Trap:
3073 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3074 break;
3075 case NVPTXISD::Suld2DI16Trap:
3076 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3077 break;
3078 case NVPTXISD::Suld2DI32Trap:
3079 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3080 break;
3081 case NVPTXISD::Suld2DI64Trap:
3082 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3083 break;
3084 case NVPTXISD::Suld2DV2I8Trap:
3085 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3086 break;
3087 case NVPTXISD::Suld2DV2I16Trap:
3088 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3089 break;
3090 case NVPTXISD::Suld2DV2I32Trap:
3091 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3092 break;
3093 case NVPTXISD::Suld2DV2I64Trap:
3094 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3095 break;
3096 case NVPTXISD::Suld2DV4I8Trap:
3097 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3098 break;
3099 case NVPTXISD::Suld2DV4I16Trap:
3100 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3101 break;
3102 case NVPTXISD::Suld2DV4I32Trap:
3103 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3104 break;
3105 case NVPTXISD::Suld2DArrayI8Trap:
3106 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3107 break;
3108 case NVPTXISD::Suld2DArrayI16Trap:
3109 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3110 break;
3111 case NVPTXISD::Suld2DArrayI32Trap:
3112 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3113 break;
3114 case NVPTXISD::Suld2DArrayI64Trap:
3115 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3116 break;
3117 case NVPTXISD::Suld2DArrayV2I8Trap:
3118 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3119 break;
3120 case NVPTXISD::Suld2DArrayV2I16Trap:
3121 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3122 break;
3123 case NVPTXISD::Suld2DArrayV2I32Trap:
3124 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3125 break;
3126 case NVPTXISD::Suld2DArrayV2I64Trap:
3127 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3128 break;
3129 case NVPTXISD::Suld2DArrayV4I8Trap:
3130 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3131 break;
3132 case NVPTXISD::Suld2DArrayV4I16Trap:
3133 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3134 break;
3135 case NVPTXISD::Suld2DArrayV4I32Trap:
3136 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3137 break;
3138 case NVPTXISD::Suld3DI8Trap:
3139 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3140 break;
3141 case NVPTXISD::Suld3DI16Trap:
3142 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3143 break;
3144 case NVPTXISD::Suld3DI32Trap:
3145 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3146 break;
3147 case NVPTXISD::Suld3DI64Trap:
3148 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3149 break;
3150 case NVPTXISD::Suld3DV2I8Trap:
3151 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3152 break;
3153 case NVPTXISD::Suld3DV2I16Trap:
3154 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3155 break;
3156 case NVPTXISD::Suld3DV2I32Trap:
3157 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3158 break;
3159 case NVPTXISD::Suld3DV2I64Trap:
3160 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3161 break;
3162 case NVPTXISD::Suld3DV4I8Trap:
3163 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3164 break;
3165 case NVPTXISD::Suld3DV4I16Trap:
3166 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3167 break;
3168 case NVPTXISD::Suld3DV4I32Trap:
3169 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3170 break;
3171 case NVPTXISD::Suld1DI8Zero:
3172 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3173 break;
3174 case NVPTXISD::Suld1DI16Zero:
3175 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3176 break;
3177 case NVPTXISD::Suld1DI32Zero:
3178 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3179 break;
3180 case NVPTXISD::Suld1DI64Zero:
3181 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3182 break;
3183 case NVPTXISD::Suld1DV2I8Zero:
3184 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3185 break;
3186 case NVPTXISD::Suld1DV2I16Zero:
3187 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3188 break;
3189 case NVPTXISD::Suld1DV2I32Zero:
3190 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3191 break;
3192 case NVPTXISD::Suld1DV2I64Zero:
3193 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3194 break;
3195 case NVPTXISD::Suld1DV4I8Zero:
3196 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3197 break;
3198 case NVPTXISD::Suld1DV4I16Zero:
3199 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3200 break;
3201 case NVPTXISD::Suld1DV4I32Zero:
3202 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3203 break;
3204 case NVPTXISD::Suld1DArrayI8Zero:
3205 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3206 break;
3207 case NVPTXISD::Suld1DArrayI16Zero:
3208 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3209 break;
3210 case NVPTXISD::Suld1DArrayI32Zero:
3211 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3212 break;
3213 case NVPTXISD::Suld1DArrayI64Zero:
3214 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3215 break;
3216 case NVPTXISD::Suld1DArrayV2I8Zero:
3217 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3218 break;
3219 case NVPTXISD::Suld1DArrayV2I16Zero:
3220 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3221 break;
3222 case NVPTXISD::Suld1DArrayV2I32Zero:
3223 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3224 break;
3225 case NVPTXISD::Suld1DArrayV2I64Zero:
3226 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3227 break;
3228 case NVPTXISD::Suld1DArrayV4I8Zero:
3229 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3230 break;
3231 case NVPTXISD::Suld1DArrayV4I16Zero:
3232 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3233 break;
3234 case NVPTXISD::Suld1DArrayV4I32Zero:
3235 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3236 break;
3237 case NVPTXISD::Suld2DI8Zero:
3238 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3239 break;
3240 case NVPTXISD::Suld2DI16Zero:
3241 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3242 break;
3243 case NVPTXISD::Suld2DI32Zero:
3244 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3245 break;
3246 case NVPTXISD::Suld2DI64Zero:
3247 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3248 break;
3249 case NVPTXISD::Suld2DV2I8Zero:
3250 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3251 break;
3252 case NVPTXISD::Suld2DV2I16Zero:
3253 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3254 break;
3255 case NVPTXISD::Suld2DV2I32Zero:
3256 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3257 break;
3258 case NVPTXISD::Suld2DV2I64Zero:
3259 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3260 break;
3261 case NVPTXISD::Suld2DV4I8Zero:
3262 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3263 break;
3264 case NVPTXISD::Suld2DV4I16Zero:
3265 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3266 break;
3267 case NVPTXISD::Suld2DV4I32Zero:
3268 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3269 break;
3270 case NVPTXISD::Suld2DArrayI8Zero:
3271 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3272 break;
3273 case NVPTXISD::Suld2DArrayI16Zero:
3274 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3275 break;
3276 case NVPTXISD::Suld2DArrayI32Zero:
3277 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3278 break;
3279 case NVPTXISD::Suld2DArrayI64Zero:
3280 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3281 break;
3282 case NVPTXISD::Suld2DArrayV2I8Zero:
3283 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3284 break;
3285 case NVPTXISD::Suld2DArrayV2I16Zero:
3286 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3287 break;
3288 case NVPTXISD::Suld2DArrayV2I32Zero:
3289 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3290 break;
3291 case NVPTXISD::Suld2DArrayV2I64Zero:
3292 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3293 break;
3294 case NVPTXISD::Suld2DArrayV4I8Zero:
3295 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3296 break;
3297 case NVPTXISD::Suld2DArrayV4I16Zero:
3298 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3299 break;
3300 case NVPTXISD::Suld2DArrayV4I32Zero:
3301 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3302 break;
3303 case NVPTXISD::Suld3DI8Zero:
3304 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3305 break;
3306 case NVPTXISD::Suld3DI16Zero:
3307 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3308 break;
3309 case NVPTXISD::Suld3DI32Zero:
3310 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3311 break;
3312 case NVPTXISD::Suld3DI64Zero:
3313 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3314 break;
3315 case NVPTXISD::Suld3DV2I8Zero:
3316 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3317 break;
3318 case NVPTXISD::Suld3DV2I16Zero:
3319 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3320 break;
3321 case NVPTXISD::Suld3DV2I32Zero:
3322 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3323 break;
3324 case NVPTXISD::Suld3DV2I64Zero:
3325 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3326 break;
3327 case NVPTXISD::Suld3DV4I8Zero:
3328 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3329 break;
3330 case NVPTXISD::Suld3DV4I16Zero:
3331 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3332 break;
3333 case NVPTXISD::Suld3DV4I32Zero:
3334 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3335 break;
3336 }
3337
3338 // Copy over operands
3339 SmallVector<SDValue, 8> Ops(drop_begin(RangeOrContainer: N->ops()));
3340 Ops.push_back(Elt: N->getOperand(Num: 0)); // Move chain to the back.
3341
3342 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops));
3343 return true;
3344}
3345
3346
3347/// SelectBFE - Look for instruction sequences that can be made more efficient
3348/// by using the 'bfe' (bit-field extract) PTX instruction
3349bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3350 SDLoc DL(N);
3351 SDValue LHS = N->getOperand(Num: 0);
3352 SDValue RHS = N->getOperand(Num: 1);
3353 SDValue Len;
3354 SDValue Start;
3355 SDValue Val;
3356 bool IsSigned = false;
3357
3358 if (N->getOpcode() == ISD::AND) {
3359 // Canonicalize the operands
3360 // We want 'and %val, %mask'
3361 if (isa<ConstantSDNode>(Val: LHS) && !isa<ConstantSDNode>(Val: RHS)) {
3362 std::swap(a&: LHS, b&: RHS);
3363 }
3364
3365 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Val&: RHS);
3366 if (!Mask) {
3367 // We need a constant mask on the RHS of the AND
3368 return false;
3369 }
3370
3371 // Extract the mask bits
3372 uint64_t MaskVal = Mask->getZExtValue();
3373 if (!isMask_64(Value: MaskVal)) {
3374 // We *could* handle shifted masks here, but doing so would require an
3375 // 'and' operation to fix up the low-order bits so we would trade
3376 // shr+and for bfe+and, which has the same throughput
3377 return false;
3378 }
3379
3380 // How many bits are in our mask?
3381 int64_t NumBits = countr_one(Value: MaskVal);
3382 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3383
3384 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3385 // We have a 'srl/and' pair, extract the effective start bit and length
3386 Val = LHS.getNode()->getOperand(Num: 0);
3387 Start = LHS.getNode()->getOperand(Num: 1);
3388 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Val&: Start);
3389 if (StartConst) {
3390 uint64_t StartVal = StartConst->getZExtValue();
3391 // How many "good" bits do we have left? "good" is defined here as bits
3392 // that exist in the original value, not shifted in.
3393 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3394 if (NumBits > GoodBits) {
3395 // Do not handle the case where bits have been shifted in. In theory
3396 // we could handle this, but the cost is likely higher than just
3397 // emitting the srl/and pair.
3398 return false;
3399 }
3400 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3401 } else {
3402 // Do not handle the case where the shift amount (can be zero if no srl
3403 // was found) is not constant. We could handle this case, but it would
3404 // require run-time logic that would be more expensive than just
3405 // emitting the srl/and pair.
3406 return false;
3407 }
3408 } else {
3409 // Do not handle the case where the LHS of the and is not a shift. While
3410 // it would be trivial to handle this case, it would just transform
3411 // 'and' -> 'bfe', but 'and' has higher-throughput.
3412 return false;
3413 }
3414 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3415 if (LHS->getOpcode() == ISD::AND) {
3416 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(Val&: RHS);
3417 if (!ShiftCnst) {
3418 // Shift amount must be constant
3419 return false;
3420 }
3421
3422 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3423
3424 SDValue AndLHS = LHS->getOperand(Num: 0);
3425 SDValue AndRHS = LHS->getOperand(Num: 1);
3426
3427 // Canonicalize the AND to have the mask on the RHS
3428 if (isa<ConstantSDNode>(Val: AndLHS)) {
3429 std::swap(a&: AndLHS, b&: AndRHS);
3430 }
3431
3432 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Val&: AndRHS);
3433 if (!MaskCnst) {
3434 // Mask must be constant
3435 return false;
3436 }
3437
3438 uint64_t MaskVal = MaskCnst->getZExtValue();
3439 uint64_t NumZeros;
3440 uint64_t NumBits;
3441 if (isMask_64(Value: MaskVal)) {
3442 NumZeros = 0;
3443 // The number of bits in the result bitfield will be the number of
3444 // trailing ones (the AND) minus the number of bits we shift off
3445 NumBits = llvm::countr_one(Value: MaskVal) - ShiftAmt;
3446 } else if (isShiftedMask_64(Value: MaskVal)) {
3447 NumZeros = llvm::countr_zero(Val: MaskVal);
3448 unsigned NumOnes = llvm::countr_one(Value: MaskVal >> NumZeros);
3449 // The number of bits in the result bitfield will be the number of
3450 // trailing zeros plus the number of set bits in the mask minus the
3451 // number of bits we shift off
3452 NumBits = NumZeros + NumOnes - ShiftAmt;
3453 } else {
3454 // This is not a mask we can handle
3455 return false;
3456 }
3457
3458 if (ShiftAmt < NumZeros) {
3459 // Handling this case would require extra logic that would make this
3460 // transformation non-profitable
3461 return false;
3462 }
3463
3464 Val = AndLHS;
3465 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3466 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3467 } else if (LHS->getOpcode() == ISD::SHL) {
3468 // Here, we have a pattern like:
3469 //
3470 // (sra (shl val, NN), MM)
3471 // or
3472 // (srl (shl val, NN), MM)
3473 //
3474 // If MM >= NN, we can efficiently optimize this with bfe
3475 Val = LHS->getOperand(Num: 0);
3476
3477 SDValue ShlRHS = LHS->getOperand(Num: 1);
3478 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(Val&: ShlRHS);
3479 if (!ShlCnst) {
3480 // Shift amount must be constant
3481 return false;
3482 }
3483 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3484
3485 SDValue ShrRHS = RHS;
3486 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(Val&: ShrRHS);
3487 if (!ShrCnst) {
3488 // Shift amount must be constant
3489 return false;
3490 }
3491 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3492
3493 // To avoid extra codegen and be profitable, we need Outer >= Inner
3494 if (OuterShiftAmt < InnerShiftAmt) {
3495 return false;
3496 }
3497
3498 // If the outer shift is more than the type size, we have no bitfield to
3499 // extract (since we also check that the inner shift is <= the outer shift
3500 // then this also implies that the inner shift is < the type size)
3501 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3502 return false;
3503 }
3504
3505 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3506 MVT::i32);
3507 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3508 DL, MVT::i32);
3509
3510 if (N->getOpcode() == ISD::SRA) {
3511 // If we have a arithmetic right shift, we need to use the signed bfe
3512 // variant
3513 IsSigned = true;
3514 }
3515 } else {
3516 // No can do...
3517 return false;
3518 }
3519 } else {
3520 // No can do...
3521 return false;
3522 }
3523
3524
3525 unsigned Opc;
3526 // For the BFE operations we form here from "and" and "srl", always use the
3527 // unsigned variants.
3528 if (Val.getValueType() == MVT::i32) {
3529 if (IsSigned) {
3530 Opc = NVPTX::BFE_S32rii;
3531 } else {
3532 Opc = NVPTX::BFE_U32rii;
3533 }
3534 } else if (Val.getValueType() == MVT::i64) {
3535 if (IsSigned) {
3536 Opc = NVPTX::BFE_S64rii;
3537 } else {
3538 Opc = NVPTX::BFE_U64rii;
3539 }
3540 } else {
3541 // We cannot handle this type
3542 return false;
3543 }
3544
3545 SDValue Ops[] = {
3546 Val, Start, Len
3547 };
3548
3549 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl: DL, VTs: N->getVTList(), Ops));
3550 return true;
3551}
3552
3553// SelectDirectAddr - Match a direct address for DAG.
3554// A direct address could be a globaladdress or externalsymbol.
3555bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3556 // Return true if TGA or ES.
3557 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3558 N.getOpcode() == ISD::TargetExternalSymbol) {
3559 Address = N;
3560 return true;
3561 }
3562 if (N.getOpcode() == NVPTXISD::Wrapper) {
3563 Address = N.getOperand(i: 0);
3564 return true;
3565 }
3566 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3567 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(Val&: N)) {
3568 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3569 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3570 CastN->getOperand(Num: 0).getOpcode() == NVPTXISD::MoveParam)
3571 return SelectDirectAddr(N: CastN->getOperand(Num: 0).getOperand(i: 0), Address);
3572 }
3573 return false;
3574}
3575
3576// symbol+offset
3577bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3578 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3579 if (Addr.getOpcode() == ISD::ADD) {
3580 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3581 SDValue base = Addr.getOperand(i: 0);
3582 if (SelectDirectAddr(N: base, Address&: Base)) {
3583 Offset = CurDAG->getTargetConstant(Val: CN->getZExtValue(), DL: SDLoc(OpNode),
3584 VT: mvt);
3585 return true;
3586 }
3587 }
3588 }
3589 return false;
3590}
3591
3592// symbol+offset
3593bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3594 SDValue &Base, SDValue &Offset) {
3595 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3596}
3597
3598// symbol+offset
3599bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3600 SDValue &Base, SDValue &Offset) {
3601 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3602}
3603
3604// register+offset
3605bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3606 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3607 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) {
3608 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: mvt);
3609 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(OpNode), VT: mvt);
3610 return true;
3611 }
3612 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3613 Addr.getOpcode() == ISD::TargetGlobalAddress)
3614 return false; // direct calls.
3615
3616 if (Addr.getOpcode() == ISD::ADD) {
3617 if (SelectDirectAddr(N: Addr.getOperand(i: 0), Address&: Addr)) {
3618 return false;
3619 }
3620 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3621 if (FrameIndexSDNode *FIN =
3622 dyn_cast<FrameIndexSDNode>(Val: Addr.getOperand(i: 0)))
3623 // Constant offset from frame ref.
3624 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: mvt);
3625 else
3626 Base = Addr.getOperand(i: 0);
3627 Offset = CurDAG->getTargetConstant(Val: CN->getZExtValue(), DL: SDLoc(OpNode),
3628 VT: mvt);
3629 return true;
3630 }
3631 }
3632 return false;
3633}
3634
3635// register+offset
3636bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3637 SDValue &Base, SDValue &Offset) {
3638 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3639}
3640
3641// register+offset
3642bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3643 SDValue &Base, SDValue &Offset) {
3644 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3645}
3646
3647bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3648 unsigned int spN) const {
3649 const Value *Src = nullptr;
3650 if (MemSDNode *mN = dyn_cast<MemSDNode>(Val: N)) {
3651 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3652 return true;
3653 Src = mN->getMemOperand()->getValue();
3654 }
3655 if (!Src)
3656 return false;
3657 if (auto *PT = dyn_cast<PointerType>(Val: Src->getType()))
3658 return (PT->getAddressSpace() == spN);
3659 return false;
3660}
3661
3662/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3663/// inline asm expressions.
3664bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3665 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3666 std::vector<SDValue> &OutOps) {
3667 SDValue Op0, Op1;
3668 switch (ConstraintID) {
3669 default:
3670 return true;
3671 case InlineAsm::ConstraintCode::m: // memory
3672 if (SelectDirectAddr(N: Op, Address&: Op0)) {
3673 OutOps.push_back(x: Op0);
3674 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3675 return false;
3676 }
3677 if (SelectADDRri(OpNode: Op.getNode(), Addr: Op, Base&: Op0, Offset&: Op1)) {
3678 OutOps.push_back(x: Op0);
3679 OutOps.push_back(x: Op1);
3680 return false;
3681 }
3682 break;
3683 }
3684 return true;
3685}
3686
3687/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3688/// conversion from \p SrcTy to \p DestTy.
3689unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3690 LoadSDNode *LdNode) {
3691 bool IsSigned = LdNode && LdNode->getExtensionType() == ISD::SEXTLOAD;
3692 switch (SrcTy.SimpleTy) {
3693 default:
3694 llvm_unreachable("Unhandled source type");
3695 case MVT::i8:
3696 switch (DestTy.SimpleTy) {
3697 default:
3698 llvm_unreachable("Unhandled dest type");
3699 case MVT::i16:
3700 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3701 case MVT::i32:
3702 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3703 case MVT::i64:
3704 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3705 }
3706 case MVT::i16:
3707 switch (DestTy.SimpleTy) {
3708 default:
3709 llvm_unreachable("Unhandled dest type");
3710 case MVT::i8:
3711 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3712 case MVT::i32:
3713 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3714 case MVT::i64:
3715 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3716 }
3717 case MVT::i32:
3718 switch (DestTy.SimpleTy) {
3719 default:
3720 llvm_unreachable("Unhandled dest type");
3721 case MVT::i8:
3722 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3723 case MVT::i16:
3724 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3725 case MVT::i64:
3726 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3727 }
3728 case MVT::i64:
3729 switch (DestTy.SimpleTy) {
3730 default:
3731 llvm_unreachable("Unhandled dest type");
3732 case MVT::i8:
3733 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3734 case MVT::i16:
3735 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3736 case MVT::i32:
3737 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3738 }
3739 case MVT::f16:
3740 switch (DestTy.SimpleTy) {
3741 default:
3742 llvm_unreachable("Unhandled dest type");
3743 case MVT::f32:
3744 return NVPTX::CVT_f32_f16;
3745 case MVT::f64:
3746 return NVPTX::CVT_f64_f16;
3747 }
3748 }
3749}
3750

source code of llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp