1 | //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the SelectionDAG class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/CodeGen/SelectionDAG.h" |
14 | #include "SDNodeDbgValue.h" |
15 | #include "llvm/ADT/APFloat.h" |
16 | #include "llvm/ADT/APInt.h" |
17 | #include "llvm/ADT/APSInt.h" |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/BitVector.h" |
20 | #include "llvm/ADT/DenseSet.h" |
21 | #include "llvm/ADT/FoldingSet.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallPtrSet.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/ADT/Twine.h" |
26 | #include "llvm/Analysis/AliasAnalysis.h" |
27 | #include "llvm/Analysis/MemoryLocation.h" |
28 | #include "llvm/Analysis/TargetLibraryInfo.h" |
29 | #include "llvm/Analysis/ValueTracking.h" |
30 | #include "llvm/Analysis/VectorUtils.h" |
31 | #include "llvm/BinaryFormat/Dwarf.h" |
32 | #include "llvm/CodeGen/Analysis.h" |
33 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
34 | #include "llvm/CodeGen/ISDOpcodes.h" |
35 | #include "llvm/CodeGen/MachineBasicBlock.h" |
36 | #include "llvm/CodeGen/MachineConstantPool.h" |
37 | #include "llvm/CodeGen/MachineFrameInfo.h" |
38 | #include "llvm/CodeGen/MachineFunction.h" |
39 | #include "llvm/CodeGen/MachineMemOperand.h" |
40 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
41 | #include "llvm/CodeGen/SDPatternMatch.h" |
42 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
43 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
44 | #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
45 | #include "llvm/CodeGen/TargetFrameLowering.h" |
46 | #include "llvm/CodeGen/TargetLowering.h" |
47 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
48 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
49 | #include "llvm/CodeGen/ValueTypes.h" |
50 | #include "llvm/CodeGenTypes/MachineValueType.h" |
51 | #include "llvm/IR/Constant.h" |
52 | #include "llvm/IR/Constants.h" |
53 | #include "llvm/IR/DataLayout.h" |
54 | #include "llvm/IR/DebugInfoMetadata.h" |
55 | #include "llvm/IR/DebugLoc.h" |
56 | #include "llvm/IR/DerivedTypes.h" |
57 | #include "llvm/IR/Function.h" |
58 | #include "llvm/IR/GlobalValue.h" |
59 | #include "llvm/IR/Metadata.h" |
60 | #include "llvm/IR/Type.h" |
61 | #include "llvm/Support/Casting.h" |
62 | #include "llvm/Support/CodeGen.h" |
63 | #include "llvm/Support/Compiler.h" |
64 | #include "llvm/Support/Debug.h" |
65 | #include "llvm/Support/ErrorHandling.h" |
66 | #include "llvm/Support/KnownBits.h" |
67 | #include "llvm/Support/MathExtras.h" |
68 | #include "llvm/Support/raw_ostream.h" |
69 | #include "llvm/Target/TargetMachine.h" |
70 | #include "llvm/Target/TargetOptions.h" |
71 | #include "llvm/TargetParser/Triple.h" |
72 | #include "llvm/Transforms/Utils/SizeOpts.h" |
73 | #include <algorithm> |
74 | #include <cassert> |
75 | #include <cstdint> |
76 | #include <cstdlib> |
77 | #include <limits> |
78 | #include <optional> |
79 | #include <set> |
80 | #include <string> |
81 | #include <utility> |
82 | #include <vector> |
83 | |
84 | using namespace llvm; |
85 | using namespace llvm::SDPatternMatch; |
86 | |
87 | /// makeVTList - Return an instance of the SDVTList struct initialized with the |
88 | /// specified members. |
89 | static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { |
90 | SDVTList Res = {.VTs: VTs, .NumVTs: NumVTs}; |
91 | return Res; |
92 | } |
93 | |
94 | // Default null implementations of the callbacks. |
95 | void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} |
96 | void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} |
97 | void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} |
98 | |
99 | void SelectionDAG::DAGNodeDeletedListener::anchor() {} |
100 | void SelectionDAG::DAGNodeInsertedListener::anchor() {} |
101 | |
102 | #define DEBUG_TYPE "selectiondag" |
103 | |
104 | static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", |
105 | cl::Hidden, cl::init(Val: true), |
106 | cl::desc("Gang up loads and stores generated by inlining of memcpy")); |
107 | |
108 | static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", |
109 | cl::desc("Number limit for gluing ld/st of memcpy."), |
110 | cl::Hidden, cl::init(Val: 0)); |
111 | |
112 | static cl::opt<unsigned> |
113 | MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(Val: 8192), |
114 | cl::desc("DAG combiner limit number of steps when searching DAG " |
115 | "for predecessor nodes")); |
116 | |
117 | static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { |
118 | LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); |
119 | } |
120 | |
121 | unsigned SelectionDAG::getHasPredecessorMaxSteps() { return MaxSteps; } |
122 | |
123 | //===----------------------------------------------------------------------===// |
124 | // ConstantFPSDNode Class |
125 | //===----------------------------------------------------------------------===// |
126 | |
127 | /// isExactlyValue - We don't rely on operator== working on double values, as |
128 | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. |
129 | /// As such, this method can be used to do an exact bit-for-bit comparison of |
130 | /// two floating point values. |
131 | bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { |
132 | return getValueAPF().bitwiseIsEqual(RHS: V); |
133 | } |
134 | |
135 | bool ConstantFPSDNode::isValueValidForType(EVT VT, |
136 | const APFloat& Val) { |
137 | assert(VT.isFloatingPoint() && "Can only convert between FP types"); |
138 | |
139 | // convert modifies in place, so make a copy. |
140 | APFloat Val2 = APFloat(Val); |
141 | bool losesInfo; |
142 | (void)Val2.convert(ToSemantics: VT.getFltSemantics(), RM: APFloat::rmNearestTiesToEven, |
143 | losesInfo: &losesInfo); |
144 | return !losesInfo; |
145 | } |
146 | |
147 | //===----------------------------------------------------------------------===// |
148 | // ISD Namespace |
149 | //===----------------------------------------------------------------------===// |
150 | |
151 | bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { |
152 | if (N->getOpcode() == ISD::SPLAT_VECTOR) { |
153 | if (auto OptAPInt = N->getOperand(Num: 0)->bitcastToAPInt()) { |
154 | unsigned EltSize = |
155 | N->getValueType(ResNo: 0).getVectorElementType().getSizeInBits(); |
156 | SplatVal = OptAPInt->trunc(width: EltSize); |
157 | return true; |
158 | } |
159 | } |
160 | |
161 | auto *BV = dyn_cast<BuildVectorSDNode>(Val: N); |
162 | if (!BV) |
163 | return false; |
164 | |
165 | APInt SplatUndef; |
166 | unsigned SplatBitSize; |
167 | bool HasUndefs; |
168 | unsigned EltSize = N->getValueType(ResNo: 0).getVectorElementType().getSizeInBits(); |
169 | // Endianness does not matter here. We are checking for a splat given the |
170 | // element size of the vector, and if we find such a splat for little endian |
171 | // layout, then that should be valid also for big endian (as the full vector |
172 | // size is known to be a multiple of the element size). |
173 | const bool IsBigEndian = false; |
174 | return BV->isConstantSplat(SplatValue&: SplatVal, SplatUndef, SplatBitSize, HasAnyUndefs&: HasUndefs, |
175 | MinSplatBits: EltSize, isBigEndian: IsBigEndian) && |
176 | EltSize == SplatBitSize; |
177 | } |
178 | |
179 | // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be |
180 | // specializations of the more general isConstantSplatVector()? |
181 | |
182 | bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { |
183 | // Look through a bit convert. |
184 | while (N->getOpcode() == ISD::BITCAST) |
185 | N = N->getOperand(Num: 0).getNode(); |
186 | |
187 | if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { |
188 | APInt SplatVal; |
189 | return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes(); |
190 | } |
191 | |
192 | if (N->getOpcode() != ISD::BUILD_VECTOR) return false; |
193 | |
194 | unsigned i = 0, e = N->getNumOperands(); |
195 | |
196 | // Skip over all of the undef values. |
197 | while (i != e && N->getOperand(Num: i).isUndef()) |
198 | ++i; |
199 | |
200 | // Do not accept an all-undef vector. |
201 | if (i == e) return false; |
202 | |
203 | // Do not accept build_vectors that aren't all constants or which have non-~0 |
204 | // elements. We have to be a bit careful here, as the type of the constant |
205 | // may not be the same as the type of the vector elements due to type |
206 | // legalization (the elements are promoted to a legal type for the target and |
207 | // a vector of a type may be legal when the base element type is not). |
208 | // We only want to check enough bits to cover the vector elements, because |
209 | // we care if the resultant vector is all ones, not whether the individual |
210 | // constants are. |
211 | SDValue NotZero = N->getOperand(Num: i); |
212 | if (auto OptAPInt = NotZero->bitcastToAPInt()) { |
213 | unsigned EltSize = N->getValueType(ResNo: 0).getScalarSizeInBits(); |
214 | if (OptAPInt->countr_one() < EltSize) |
215 | return false; |
216 | } else |
217 | return false; |
218 | |
219 | // Okay, we have at least one ~0 value, check to see if the rest match or are |
220 | // undefs. Even with the above element type twiddling, this should be OK, as |
221 | // the same type legalization should have applied to all the elements. |
222 | for (++i; i != e; ++i) |
223 | if (N->getOperand(Num: i) != NotZero && !N->getOperand(Num: i).isUndef()) |
224 | return false; |
225 | return true; |
226 | } |
227 | |
228 | bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { |
229 | // Look through a bit convert. |
230 | while (N->getOpcode() == ISD::BITCAST) |
231 | N = N->getOperand(Num: 0).getNode(); |
232 | |
233 | if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { |
234 | APInt SplatVal; |
235 | return isConstantSplatVector(N, SplatVal) && SplatVal.isZero(); |
236 | } |
237 | |
238 | if (N->getOpcode() != ISD::BUILD_VECTOR) return false; |
239 | |
240 | bool IsAllUndef = true; |
241 | for (const SDValue &Op : N->op_values()) { |
242 | if (Op.isUndef()) |
243 | continue; |
244 | IsAllUndef = false; |
245 | // Do not accept build_vectors that aren't all constants or which have non-0 |
246 | // elements. We have to be a bit careful here, as the type of the constant |
247 | // may not be the same as the type of the vector elements due to type |
248 | // legalization (the elements are promoted to a legal type for the target |
249 | // and a vector of a type may be legal when the base element type is not). |
250 | // We only want to check enough bits to cover the vector elements, because |
251 | // we care if the resultant vector is all zeros, not whether the individual |
252 | // constants are. |
253 | if (auto OptAPInt = Op->bitcastToAPInt()) { |
254 | unsigned EltSize = N->getValueType(ResNo: 0).getScalarSizeInBits(); |
255 | if (OptAPInt->countr_zero() < EltSize) |
256 | return false; |
257 | } else |
258 | return false; |
259 | } |
260 | |
261 | // Do not accept an all-undef vector. |
262 | if (IsAllUndef) |
263 | return false; |
264 | return true; |
265 | } |
266 | |
267 | bool ISD::isBuildVectorAllOnes(const SDNode *N) { |
268 | return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true); |
269 | } |
270 | |
271 | bool ISD::isBuildVectorAllZeros(const SDNode *N) { |
272 | return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true); |
273 | } |
274 | |
275 | bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { |
276 | if (N->getOpcode() != ISD::BUILD_VECTOR) |
277 | return false; |
278 | |
279 | for (const SDValue &Op : N->op_values()) { |
280 | if (Op.isUndef()) |
281 | continue; |
282 | if (!isa<ConstantSDNode>(Val: Op)) |
283 | return false; |
284 | } |
285 | return true; |
286 | } |
287 | |
288 | bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { |
289 | if (N->getOpcode() != ISD::BUILD_VECTOR) |
290 | return false; |
291 | |
292 | for (const SDValue &Op : N->op_values()) { |
293 | if (Op.isUndef()) |
294 | continue; |
295 | if (!isa<ConstantFPSDNode>(Val: Op)) |
296 | return false; |
297 | } |
298 | return true; |
299 | } |
300 | |
301 | bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, |
302 | bool Signed) { |
303 | assert(N->getValueType(0).isVector() && "Expected a vector!"); |
304 | |
305 | unsigned EltSize = N->getValueType(ResNo: 0).getScalarSizeInBits(); |
306 | if (EltSize <= NewEltSize) |
307 | return false; |
308 | |
309 | if (N->getOpcode() == ISD::ZERO_EXTEND) { |
310 | return (N->getOperand(Num: 0).getValueType().getScalarSizeInBits() <= |
311 | NewEltSize) && |
312 | !Signed; |
313 | } |
314 | if (N->getOpcode() == ISD::SIGN_EXTEND) { |
315 | return (N->getOperand(Num: 0).getValueType().getScalarSizeInBits() <= |
316 | NewEltSize) && |
317 | Signed; |
318 | } |
319 | if (N->getOpcode() != ISD::BUILD_VECTOR) |
320 | return false; |
321 | |
322 | for (const SDValue &Op : N->op_values()) { |
323 | if (Op.isUndef()) |
324 | continue; |
325 | if (!isa<ConstantSDNode>(Val: Op)) |
326 | return false; |
327 | |
328 | APInt C = Op->getAsAPIntVal().trunc(width: EltSize); |
329 | if (Signed && C.trunc(width: NewEltSize).sext(width: EltSize) != C) |
330 | return false; |
331 | if (!Signed && C.trunc(width: NewEltSize).zext(width: EltSize) != C) |
332 | return false; |
333 | } |
334 | |
335 | return true; |
336 | } |
337 | |
338 | bool ISD::allOperandsUndef(const SDNode *N) { |
339 | // Return false if the node has no operands. |
340 | // This is "logically inconsistent" with the definition of "all" but |
341 | // is probably the desired behavior. |
342 | if (N->getNumOperands() == 0) |
343 | return false; |
344 | return all_of(Range: N->op_values(), P: [](SDValue Op) { return Op.isUndef(); }); |
345 | } |
346 | |
347 | bool ISD::isFreezeUndef(const SDNode *N) { |
348 | return N->getOpcode() == ISD::FREEZE && N->getOperand(Num: 0).isUndef(); |
349 | } |
350 | |
351 | template <typename ConstNodeType> |
352 | bool ISD::matchUnaryPredicateImpl(SDValue Op, |
353 | std::function<bool(ConstNodeType *)> Match, |
354 | bool AllowUndefs, bool AllowTruncation) { |
355 | // FIXME: Add support for scalar UNDEF cases? |
356 | if (auto *C = dyn_cast<ConstNodeType>(Op)) |
357 | return Match(C); |
358 | |
359 | // FIXME: Add support for vector UNDEF cases? |
360 | if (ISD::BUILD_VECTOR != Op.getOpcode() && |
361 | ISD::SPLAT_VECTOR != Op.getOpcode()) |
362 | return false; |
363 | |
364 | EVT SVT = Op.getValueType().getScalarType(); |
365 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
366 | if (AllowUndefs && Op.getOperand(i).isUndef()) { |
367 | if (!Match(nullptr)) |
368 | return false; |
369 | continue; |
370 | } |
371 | |
372 | auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i)); |
373 | if (!Cst || (!AllowTruncation && Cst->getValueType(0) != SVT) || |
374 | !Match(Cst)) |
375 | return false; |
376 | } |
377 | return true; |
378 | } |
379 | // Build used template types. |
380 | template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>( |
381 | SDValue, std::function<bool(ConstantSDNode *)>, bool, bool); |
382 | template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>( |
383 | SDValue, std::function<bool(ConstantFPSDNode *)>, bool, bool); |
384 | |
385 | bool ISD::matchBinaryPredicate( |
386 | SDValue LHS, SDValue RHS, |
387 | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, |
388 | bool AllowUndefs, bool AllowTypeMismatch) { |
389 | if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()) |
390 | return false; |
391 | |
392 | // TODO: Add support for scalar UNDEF cases? |
393 | if (auto *LHSCst = dyn_cast<ConstantSDNode>(Val&: LHS)) |
394 | if (auto *RHSCst = dyn_cast<ConstantSDNode>(Val&: RHS)) |
395 | return Match(LHSCst, RHSCst); |
396 | |
397 | // TODO: Add support for vector UNDEF cases? |
398 | if (LHS.getOpcode() != RHS.getOpcode() || |
399 | (LHS.getOpcode() != ISD::BUILD_VECTOR && |
400 | LHS.getOpcode() != ISD::SPLAT_VECTOR)) |
401 | return false; |
402 | |
403 | EVT SVT = LHS.getValueType().getScalarType(); |
404 | for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { |
405 | SDValue LHSOp = LHS.getOperand(i); |
406 | SDValue RHSOp = RHS.getOperand(i); |
407 | bool LHSUndef = AllowUndefs && LHSOp.isUndef(); |
408 | bool RHSUndef = AllowUndefs && RHSOp.isUndef(); |
409 | auto *LHSCst = dyn_cast<ConstantSDNode>(Val&: LHSOp); |
410 | auto *RHSCst = dyn_cast<ConstantSDNode>(Val&: RHSOp); |
411 | if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) |
412 | return false; |
413 | if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || |
414 | LHSOp.getValueType() != RHSOp.getValueType())) |
415 | return false; |
416 | if (!Match(LHSCst, RHSCst)) |
417 | return false; |
418 | } |
419 | return true; |
420 | } |
421 | |
422 | ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { |
423 | switch (MinMaxOpc) { |
424 | default: |
425 | llvm_unreachable("unrecognized opcode"); |
426 | case ISD::UMIN: |
427 | return ISD::UMAX; |
428 | case ISD::UMAX: |
429 | return ISD::UMIN; |
430 | case ISD::SMIN: |
431 | return ISD::SMAX; |
432 | case ISD::SMAX: |
433 | return ISD::SMIN; |
434 | } |
435 | } |
436 | |
437 | ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { |
438 | switch (VecReduceOpcode) { |
439 | default: |
440 | llvm_unreachable("Expected VECREDUCE opcode"); |
441 | case ISD::VECREDUCE_FADD: |
442 | case ISD::VECREDUCE_SEQ_FADD: |
443 | case ISD::VP_REDUCE_FADD: |
444 | case ISD::VP_REDUCE_SEQ_FADD: |
445 | return ISD::FADD; |
446 | case ISD::VECREDUCE_FMUL: |
447 | case ISD::VECREDUCE_SEQ_FMUL: |
448 | case ISD::VP_REDUCE_FMUL: |
449 | case ISD::VP_REDUCE_SEQ_FMUL: |
450 | return ISD::FMUL; |
451 | case ISD::VECREDUCE_ADD: |
452 | case ISD::VP_REDUCE_ADD: |
453 | return ISD::ADD; |
454 | case ISD::VECREDUCE_MUL: |
455 | case ISD::VP_REDUCE_MUL: |
456 | return ISD::MUL; |
457 | case ISD::VECREDUCE_AND: |
458 | case ISD::VP_REDUCE_AND: |
459 | return ISD::AND; |
460 | case ISD::VECREDUCE_OR: |
461 | case ISD::VP_REDUCE_OR: |
462 | return ISD::OR; |
463 | case ISD::VECREDUCE_XOR: |
464 | case ISD::VP_REDUCE_XOR: |
465 | return ISD::XOR; |
466 | case ISD::VECREDUCE_SMAX: |
467 | case ISD::VP_REDUCE_SMAX: |
468 | return ISD::SMAX; |
469 | case ISD::VECREDUCE_SMIN: |
470 | case ISD::VP_REDUCE_SMIN: |
471 | return ISD::SMIN; |
472 | case ISD::VECREDUCE_UMAX: |
473 | case ISD::VP_REDUCE_UMAX: |
474 | return ISD::UMAX; |
475 | case ISD::VECREDUCE_UMIN: |
476 | case ISD::VP_REDUCE_UMIN: |
477 | return ISD::UMIN; |
478 | case ISD::VECREDUCE_FMAX: |
479 | case ISD::VP_REDUCE_FMAX: |
480 | return ISD::FMAXNUM; |
481 | case ISD::VECREDUCE_FMIN: |
482 | case ISD::VP_REDUCE_FMIN: |
483 | return ISD::FMINNUM; |
484 | case ISD::VECREDUCE_FMAXIMUM: |
485 | case ISD::VP_REDUCE_FMAXIMUM: |
486 | return ISD::FMAXIMUM; |
487 | case ISD::VECREDUCE_FMINIMUM: |
488 | case ISD::VP_REDUCE_FMINIMUM: |
489 | return ISD::FMINIMUM; |
490 | } |
491 | } |
492 | |
493 | bool ISD::isVPOpcode(unsigned Opcode) { |
494 | switch (Opcode) { |
495 | default: |
496 | return false; |
497 | #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \ |
498 | case ISD::VPSD: \ |
499 | return true; |
500 | #include "llvm/IR/VPIntrinsics.def" |
501 | } |
502 | } |
503 | |
504 | bool ISD::isVPBinaryOp(unsigned Opcode) { |
505 | switch (Opcode) { |
506 | default: |
507 | break; |
508 | #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: |
509 | #define VP_PROPERTY_BINARYOP return true; |
510 | #define END_REGISTER_VP_SDNODE(VPSD) break; |
511 | #include "llvm/IR/VPIntrinsics.def" |
512 | } |
513 | return false; |
514 | } |
515 | |
516 | bool ISD::isVPReduction(unsigned Opcode) { |
517 | switch (Opcode) { |
518 | default: |
519 | return false; |
520 | case ISD::VP_REDUCE_ADD: |
521 | case ISD::VP_REDUCE_MUL: |
522 | case ISD::VP_REDUCE_AND: |
523 | case ISD::VP_REDUCE_OR: |
524 | case ISD::VP_REDUCE_XOR: |
525 | case ISD::VP_REDUCE_SMAX: |
526 | case ISD::VP_REDUCE_SMIN: |
527 | case ISD::VP_REDUCE_UMAX: |
528 | case ISD::VP_REDUCE_UMIN: |
529 | case ISD::VP_REDUCE_FMAX: |
530 | case ISD::VP_REDUCE_FMIN: |
531 | case ISD::VP_REDUCE_FMAXIMUM: |
532 | case ISD::VP_REDUCE_FMINIMUM: |
533 | case ISD::VP_REDUCE_FADD: |
534 | case ISD::VP_REDUCE_FMUL: |
535 | case ISD::VP_REDUCE_SEQ_FADD: |
536 | case ISD::VP_REDUCE_SEQ_FMUL: |
537 | return true; |
538 | } |
539 | } |
540 | |
541 | /// The operand position of the vector mask. |
542 | std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { |
543 | switch (Opcode) { |
544 | default: |
545 | return std::nullopt; |
546 | #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ |
547 | case ISD::VPSD: \ |
548 | return MASKPOS; |
549 | #include "llvm/IR/VPIntrinsics.def" |
550 | } |
551 | } |
552 | |
553 | /// The operand position of the explicit vector length parameter. |
554 | std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { |
555 | switch (Opcode) { |
556 | default: |
557 | return std::nullopt; |
558 | #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ |
559 | case ISD::VPSD: \ |
560 | return EVLPOS; |
561 | #include "llvm/IR/VPIntrinsics.def" |
562 | } |
563 | } |
564 | |
565 | std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode, |
566 | bool hasFPExcept) { |
567 | // FIXME: Return strict opcodes in case of fp exceptions. |
568 | switch (VPOpcode) { |
569 | default: |
570 | return std::nullopt; |
571 | #define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) case ISD::VPOPC: |
572 | #define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) return ISD::SDOPC; |
573 | #define END_REGISTER_VP_SDNODE(VPOPC) break; |
574 | #include "llvm/IR/VPIntrinsics.def" |
575 | } |
576 | return std::nullopt; |
577 | } |
578 | |
579 | std::optional<unsigned> ISD::getVPForBaseOpcode(unsigned Opcode) { |
580 | switch (Opcode) { |
581 | default: |
582 | return std::nullopt; |
583 | #define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break; |
584 | #define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC: |
585 | #define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC; |
586 | #include "llvm/IR/VPIntrinsics.def" |
587 | } |
588 | } |
589 | |
590 | ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { |
591 | switch (ExtType) { |
592 | case ISD::EXTLOAD: |
593 | return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; |
594 | case ISD::SEXTLOAD: |
595 | return ISD::SIGN_EXTEND; |
596 | case ISD::ZEXTLOAD: |
597 | return ISD::ZERO_EXTEND; |
598 | default: |
599 | break; |
600 | } |
601 | |
602 | llvm_unreachable("Invalid LoadExtType"); |
603 | } |
604 | |
605 | ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { |
606 | // To perform this operation, we just need to swap the L and G bits of the |
607 | // operation. |
608 | unsigned OldL = (Operation >> 2) & 1; |
609 | unsigned OldG = (Operation >> 1) & 1; |
610 | return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits |
611 | (OldL << 1) | // New G bit |
612 | (OldG << 2)); // New L bit. |
613 | } |
614 | |
615 | static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) { |
616 | unsigned Operation = Op; |
617 | if (isIntegerLike) |
618 | Operation ^= 7; // Flip L, G, E bits, but not U. |
619 | else |
620 | Operation ^= 15; // Flip all of the condition bits. |
621 | |
622 | if (Operation > ISD::SETTRUE2) |
623 | Operation &= ~8; // Don't let N and U bits get set. |
624 | |
625 | return ISD::CondCode(Operation); |
626 | } |
627 | |
628 | ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) { |
629 | return getSetCCInverseImpl(Op, isIntegerLike: Type.isInteger()); |
630 | } |
631 | |
632 | ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op, |
633 | bool isIntegerLike) { |
634 | return getSetCCInverseImpl(Op, isIntegerLike); |
635 | } |
636 | |
637 | /// For an integer comparison, return 1 if the comparison is a signed operation |
638 | /// and 2 if the result is an unsigned comparison. Return zero if the operation |
639 | /// does not depend on the sign of the input (setne and seteq). |
640 | static int isSignedOp(ISD::CondCode Opcode) { |
641 | switch (Opcode) { |
642 | default: llvm_unreachable("Illegal integer setcc operation!"); |
643 | case ISD::SETEQ: |
644 | case ISD::SETNE: return 0; |
645 | case ISD::SETLT: |
646 | case ISD::SETLE: |
647 | case ISD::SETGT: |
648 | case ISD::SETGE: return 1; |
649 | case ISD::SETULT: |
650 | case ISD::SETULE: |
651 | case ISD::SETUGT: |
652 | case ISD::SETUGE: return 2; |
653 | } |
654 | } |
655 | |
656 | ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, |
657 | EVT Type) { |
658 | bool IsInteger = Type.isInteger(); |
659 | if (IsInteger && (isSignedOp(Opcode: Op1) | isSignedOp(Opcode: Op2)) == 3) |
660 | // Cannot fold a signed integer setcc with an unsigned integer setcc. |
661 | return ISD::SETCC_INVALID; |
662 | |
663 | unsigned Op = Op1 | Op2; // Combine all of the condition bits. |
664 | |
665 | // If the N and U bits get set, then the resultant comparison DOES suddenly |
666 | // care about orderedness, and it is true when ordered. |
667 | if (Op > ISD::SETTRUE2) |
668 | Op &= ~16; // Clear the U bit if the N bit is set. |
669 | |
670 | // Canonicalize illegal integer setcc's. |
671 | if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT |
672 | Op = ISD::SETNE; |
673 | |
674 | return ISD::CondCode(Op); |
675 | } |
676 | |
677 | ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, |
678 | EVT Type) { |
679 | bool IsInteger = Type.isInteger(); |
680 | if (IsInteger && (isSignedOp(Opcode: Op1) | isSignedOp(Opcode: Op2)) == 3) |
681 | // Cannot fold a signed setcc with an unsigned setcc. |
682 | return ISD::SETCC_INVALID; |
683 | |
684 | // Combine all of the condition bits. |
685 | ISD::CondCode Result = ISD::CondCode(Op1 & Op2); |
686 | |
687 | // Canonicalize illegal integer setcc's. |
688 | if (IsInteger) { |
689 | switch (Result) { |
690 | default: break; |
691 | case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT |
692 | case ISD::SETOEQ: // SETEQ & SETU[LG]E |
693 | case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE |
694 | case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE |
695 | case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE |
696 | } |
697 | } |
698 | |
699 | return Result; |
700 | } |
701 | |
702 | //===----------------------------------------------------------------------===// |
703 | // SDNode Profile Support |
704 | //===----------------------------------------------------------------------===// |
705 | |
706 | /// AddNodeIDOpcode - Add the node opcode to the NodeID data. |
707 | static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { |
708 | ID.AddInteger(I: OpC); |
709 | } |
710 | |
711 | /// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them |
712 | /// solely with their pointer. |
713 | static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { |
714 | ID.AddPointer(Ptr: VTList.VTs); |
715 | } |
716 | |
717 | /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. |
718 | static void AddNodeIDOperands(FoldingSetNodeID &ID, |
719 | ArrayRef<SDValue> Ops) { |
720 | for (const auto &Op : Ops) { |
721 | ID.AddPointer(Ptr: Op.getNode()); |
722 | ID.AddInteger(I: Op.getResNo()); |
723 | } |
724 | } |
725 | |
726 | /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. |
727 | static void AddNodeIDOperands(FoldingSetNodeID &ID, |
728 | ArrayRef<SDUse> Ops) { |
729 | for (const auto &Op : Ops) { |
730 | ID.AddPointer(Ptr: Op.getNode()); |
731 | ID.AddInteger(I: Op.getResNo()); |
732 | } |
733 | } |
734 | |
735 | static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC, |
736 | SDVTList VTList, ArrayRef<SDValue> OpList) { |
737 | AddNodeIDOpcode(ID, OpC); |
738 | AddNodeIDValueTypes(ID, VTList); |
739 | AddNodeIDOperands(ID, Ops: OpList); |
740 | } |
741 | |
742 | /// If this is an SDNode with special info, add this info to the NodeID data. |
743 | static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { |
744 | switch (N->getOpcode()) { |
745 | case ISD::TargetExternalSymbol: |
746 | case ISD::ExternalSymbol: |
747 | case ISD::MCSymbol: |
748 | llvm_unreachable("Should only be used on nodes with operands"); |
749 | default: break; // Normal nodes don't need extra info. |
750 | case ISD::TargetConstant: |
751 | case ISD::Constant: { |
752 | const ConstantSDNode *C = cast<ConstantSDNode>(Val: N); |
753 | ID.AddPointer(Ptr: C->getConstantIntValue()); |
754 | ID.AddBoolean(B: C->isOpaque()); |
755 | break; |
756 | } |
757 | case ISD::TargetConstantFP: |
758 | case ISD::ConstantFP: |
759 | ID.AddPointer(Ptr: cast<ConstantFPSDNode>(Val: N)->getConstantFPValue()); |
760 | break; |
761 | case ISD::TargetGlobalAddress: |
762 | case ISD::GlobalAddress: |
763 | case ISD::TargetGlobalTLSAddress: |
764 | case ISD::GlobalTLSAddress: { |
765 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val: N); |
766 | ID.AddPointer(Ptr: GA->getGlobal()); |
767 | ID.AddInteger(I: GA->getOffset()); |
768 | ID.AddInteger(I: GA->getTargetFlags()); |
769 | break; |
770 | } |
771 | case ISD::BasicBlock: |
772 | ID.AddPointer(Ptr: cast<BasicBlockSDNode>(Val: N)->getBasicBlock()); |
773 | break; |
774 | case ISD::Register: |
775 | ID.AddInteger(I: cast<RegisterSDNode>(Val: N)->getReg().id()); |
776 | break; |
777 | case ISD::RegisterMask: |
778 | ID.AddPointer(Ptr: cast<RegisterMaskSDNode>(Val: N)->getRegMask()); |
779 | break; |
780 | case ISD::SRCVALUE: |
781 | ID.AddPointer(Ptr: cast<SrcValueSDNode>(Val: N)->getValue()); |
782 | break; |
783 | case ISD::FrameIndex: |
784 | case ISD::TargetFrameIndex: |
785 | ID.AddInteger(I: cast<FrameIndexSDNode>(Val: N)->getIndex()); |
786 | break; |
787 | case ISD::LIFETIME_START: |
788 | case ISD::LIFETIME_END: |
789 | if (cast<LifetimeSDNode>(Val: N)->hasOffset()) { |
790 | ID.AddInteger(I: cast<LifetimeSDNode>(Val: N)->getSize()); |
791 | ID.AddInteger(I: cast<LifetimeSDNode>(Val: N)->getOffset()); |
792 | } |
793 | break; |
794 | case ISD::PSEUDO_PROBE: |
795 | ID.AddInteger(I: cast<PseudoProbeSDNode>(Val: N)->getGuid()); |
796 | ID.AddInteger(I: cast<PseudoProbeSDNode>(Val: N)->getIndex()); |
797 | ID.AddInteger(I: cast<PseudoProbeSDNode>(Val: N)->getAttributes()); |
798 | break; |
799 | case ISD::JumpTable: |
800 | case ISD::TargetJumpTable: |
801 | ID.AddInteger(I: cast<JumpTableSDNode>(Val: N)->getIndex()); |
802 | ID.AddInteger(I: cast<JumpTableSDNode>(Val: N)->getTargetFlags()); |
803 | break; |
804 | case ISD::ConstantPool: |
805 | case ISD::TargetConstantPool: { |
806 | const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val: N); |
807 | ID.AddInteger(I: CP->getAlign().value()); |
808 | ID.AddInteger(I: CP->getOffset()); |
809 | if (CP->isMachineConstantPoolEntry()) |
810 | CP->getMachineCPVal()->addSelectionDAGCSEId(ID); |
811 | else |
812 | ID.AddPointer(Ptr: CP->getConstVal()); |
813 | ID.AddInteger(I: CP->getTargetFlags()); |
814 | break; |
815 | } |
816 | case ISD::TargetIndex: { |
817 | const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(Val: N); |
818 | ID.AddInteger(I: TI->getIndex()); |
819 | ID.AddInteger(I: TI->getOffset()); |
820 | ID.AddInteger(I: TI->getTargetFlags()); |
821 | break; |
822 | } |
823 | case ISD::LOAD: { |
824 | const LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
825 | ID.AddInteger(I: LD->getMemoryVT().getRawBits()); |
826 | ID.AddInteger(I: LD->getRawSubclassData()); |
827 | ID.AddInteger(I: LD->getPointerInfo().getAddrSpace()); |
828 | ID.AddInteger(I: LD->getMemOperand()->getFlags()); |
829 | break; |
830 | } |
831 | case ISD::STORE: { |
832 | const StoreSDNode *ST = cast<StoreSDNode>(Val: N); |
833 | ID.AddInteger(I: ST->getMemoryVT().getRawBits()); |
834 | ID.AddInteger(I: ST->getRawSubclassData()); |
835 | ID.AddInteger(I: ST->getPointerInfo().getAddrSpace()); |
836 | ID.AddInteger(I: ST->getMemOperand()->getFlags()); |
837 | break; |
838 | } |
839 | case ISD::VP_LOAD: { |
840 | const VPLoadSDNode *ELD = cast<VPLoadSDNode>(Val: N); |
841 | ID.AddInteger(I: ELD->getMemoryVT().getRawBits()); |
842 | ID.AddInteger(I: ELD->getRawSubclassData()); |
843 | ID.AddInteger(I: ELD->getPointerInfo().getAddrSpace()); |
844 | ID.AddInteger(I: ELD->getMemOperand()->getFlags()); |
845 | break; |
846 | } |
847 | case ISD::VP_STORE: { |
848 | const VPStoreSDNode *EST = cast<VPStoreSDNode>(Val: N); |
849 | ID.AddInteger(I: EST->getMemoryVT().getRawBits()); |
850 | ID.AddInteger(I: EST->getRawSubclassData()); |
851 | ID.AddInteger(I: EST->getPointerInfo().getAddrSpace()); |
852 | ID.AddInteger(I: EST->getMemOperand()->getFlags()); |
853 | break; |
854 | } |
855 | case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: { |
856 | const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(Val: N); |
857 | ID.AddInteger(I: SLD->getMemoryVT().getRawBits()); |
858 | ID.AddInteger(I: SLD->getRawSubclassData()); |
859 | ID.AddInteger(I: SLD->getPointerInfo().getAddrSpace()); |
860 | break; |
861 | } |
862 | case ISD::EXPERIMENTAL_VP_STRIDED_STORE: { |
863 | const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(Val: N); |
864 | ID.AddInteger(I: SST->getMemoryVT().getRawBits()); |
865 | ID.AddInteger(I: SST->getRawSubclassData()); |
866 | ID.AddInteger(I: SST->getPointerInfo().getAddrSpace()); |
867 | break; |
868 | } |
869 | case ISD::VP_GATHER: { |
870 | const VPGatherSDNode *EG = cast<VPGatherSDNode>(Val: N); |
871 | ID.AddInteger(I: EG->getMemoryVT().getRawBits()); |
872 | ID.AddInteger(I: EG->getRawSubclassData()); |
873 | ID.AddInteger(I: EG->getPointerInfo().getAddrSpace()); |
874 | ID.AddInteger(I: EG->getMemOperand()->getFlags()); |
875 | break; |
876 | } |
877 | case ISD::VP_SCATTER: { |
878 | const VPScatterSDNode *ES = cast<VPScatterSDNode>(Val: N); |
879 | ID.AddInteger(I: ES->getMemoryVT().getRawBits()); |
880 | ID.AddInteger(I: ES->getRawSubclassData()); |
881 | ID.AddInteger(I: ES->getPointerInfo().getAddrSpace()); |
882 | ID.AddInteger(I: ES->getMemOperand()->getFlags()); |
883 | break; |
884 | } |
885 | case ISD::MLOAD: { |
886 | const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(Val: N); |
887 | ID.AddInteger(I: MLD->getMemoryVT().getRawBits()); |
888 | ID.AddInteger(I: MLD->getRawSubclassData()); |
889 | ID.AddInteger(I: MLD->getPointerInfo().getAddrSpace()); |
890 | ID.AddInteger(I: MLD->getMemOperand()->getFlags()); |
891 | break; |
892 | } |
893 | case ISD::MSTORE: { |
894 | const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(Val: N); |
895 | ID.AddInteger(I: MST->getMemoryVT().getRawBits()); |
896 | ID.AddInteger(I: MST->getRawSubclassData()); |
897 | ID.AddInteger(I: MST->getPointerInfo().getAddrSpace()); |
898 | ID.AddInteger(I: MST->getMemOperand()->getFlags()); |
899 | break; |
900 | } |
901 | case ISD::MGATHER: { |
902 | const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(Val: N); |
903 | ID.AddInteger(I: MG->getMemoryVT().getRawBits()); |
904 | ID.AddInteger(I: MG->getRawSubclassData()); |
905 | ID.AddInteger(I: MG->getPointerInfo().getAddrSpace()); |
906 | ID.AddInteger(I: MG->getMemOperand()->getFlags()); |
907 | break; |
908 | } |
909 | case ISD::MSCATTER: { |
910 | const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(Val: N); |
911 | ID.AddInteger(I: MS->getMemoryVT().getRawBits()); |
912 | ID.AddInteger(I: MS->getRawSubclassData()); |
913 | ID.AddInteger(I: MS->getPointerInfo().getAddrSpace()); |
914 | ID.AddInteger(I: MS->getMemOperand()->getFlags()); |
915 | break; |
916 | } |
917 | case ISD::ATOMIC_CMP_SWAP: |
918 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
919 | case ISD::ATOMIC_SWAP: |
920 | case ISD::ATOMIC_LOAD_ADD: |
921 | case ISD::ATOMIC_LOAD_SUB: |
922 | case ISD::ATOMIC_LOAD_AND: |
923 | case ISD::ATOMIC_LOAD_CLR: |
924 | case ISD::ATOMIC_LOAD_OR: |
925 | case ISD::ATOMIC_LOAD_XOR: |
926 | case ISD::ATOMIC_LOAD_NAND: |
927 | case ISD::ATOMIC_LOAD_MIN: |
928 | case ISD::ATOMIC_LOAD_MAX: |
929 | case ISD::ATOMIC_LOAD_UMIN: |
930 | case ISD::ATOMIC_LOAD_UMAX: |
931 | case ISD::ATOMIC_LOAD: |
932 | case ISD::ATOMIC_STORE: { |
933 | const AtomicSDNode *AT = cast<AtomicSDNode>(Val: N); |
934 | ID.AddInteger(I: AT->getMemoryVT().getRawBits()); |
935 | ID.AddInteger(I: AT->getRawSubclassData()); |
936 | ID.AddInteger(I: AT->getPointerInfo().getAddrSpace()); |
937 | ID.AddInteger(I: AT->getMemOperand()->getFlags()); |
938 | break; |
939 | } |
940 | case ISD::VECTOR_SHUFFLE: { |
941 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val: N)->getMask(); |
942 | for (int M : Mask) |
943 | ID.AddInteger(I: M); |
944 | break; |
945 | } |
946 | case ISD::ADDRSPACECAST: { |
947 | const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Val: N); |
948 | ID.AddInteger(I: ASC->getSrcAddressSpace()); |
949 | ID.AddInteger(I: ASC->getDestAddressSpace()); |
950 | break; |
951 | } |
952 | case ISD::TargetBlockAddress: |
953 | case ISD::BlockAddress: { |
954 | const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Val: N); |
955 | ID.AddPointer(Ptr: BA->getBlockAddress()); |
956 | ID.AddInteger(I: BA->getOffset()); |
957 | ID.AddInteger(I: BA->getTargetFlags()); |
958 | break; |
959 | } |
960 | case ISD::AssertAlign: |
961 | ID.AddInteger(I: cast<AssertAlignSDNode>(Val: N)->getAlign().value()); |
962 | break; |
963 | case ISD::PREFETCH: |
964 | case ISD::INTRINSIC_VOID: |
965 | case ISD::INTRINSIC_W_CHAIN: |
966 | // Handled by MemIntrinsicSDNode check after the switch. |
967 | break; |
968 | case ISD::MDNODE_SDNODE: |
969 | ID.AddPointer(Ptr: cast<MDNodeSDNode>(Val: N)->getMD()); |
970 | break; |
971 | } // end switch (N->getOpcode()) |
972 | |
973 | // MemIntrinsic nodes could also have subclass data, address spaces, and flags |
974 | // to check. |
975 | if (auto *MN = dyn_cast<MemIntrinsicSDNode>(Val: N)) { |
976 | ID.AddInteger(I: MN->getRawSubclassData()); |
977 | ID.AddInteger(I: MN->getPointerInfo().getAddrSpace()); |
978 | ID.AddInteger(I: MN->getMemOperand()->getFlags()); |
979 | ID.AddInteger(I: MN->getMemoryVT().getRawBits()); |
980 | } |
981 | } |
982 | |
983 | /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID |
984 | /// data. |
985 | static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { |
986 | AddNodeIDOpcode(ID, OpC: N->getOpcode()); |
987 | // Add the return value info. |
988 | AddNodeIDValueTypes(ID, VTList: N->getVTList()); |
989 | // Add the operand info. |
990 | AddNodeIDOperands(ID, Ops: N->ops()); |
991 | |
992 | // Handle SDNode leafs with special info. |
993 | AddNodeIDCustom(ID, N); |
994 | } |
995 | |
996 | //===----------------------------------------------------------------------===// |
997 | // SelectionDAG Class |
998 | //===----------------------------------------------------------------------===// |
999 | |
1000 | /// doNotCSE - Return true if CSE should not be performed for this node. |
1001 | static bool doNotCSE(SDNode *N) { |
1002 | if (N->getValueType(ResNo: 0) == MVT::Glue) |
1003 | return true; // Never CSE anything that produces a glue result. |
1004 | |
1005 | switch (N->getOpcode()) { |
1006 | default: break; |
1007 | case ISD::HANDLENODE: |
1008 | case ISD::EH_LABEL: |
1009 | return true; // Never CSE these nodes. |
1010 | } |
1011 | |
1012 | // Check that remaining values produced are not flags. |
1013 | for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) |
1014 | if (N->getValueType(ResNo: i) == MVT::Glue) |
1015 | return true; // Never CSE anything that produces a glue result. |
1016 | |
1017 | return false; |
1018 | } |
1019 | |
1020 | /// RemoveDeadNodes - This method deletes all unreachable nodes in the |
1021 | /// SelectionDAG. |
1022 | void SelectionDAG::RemoveDeadNodes() { |
1023 | // Create a dummy node (which is not added to allnodes), that adds a reference |
1024 | // to the root node, preventing it from being deleted. |
1025 | HandleSDNode Dummy(getRoot()); |
1026 | |
1027 | SmallVector<SDNode*, 128> DeadNodes; |
1028 | |
1029 | // Add all obviously-dead nodes to the DeadNodes worklist. |
1030 | for (SDNode &Node : allnodes()) |
1031 | if (Node.use_empty()) |
1032 | DeadNodes.push_back(Elt: &Node); |
1033 | |
1034 | RemoveDeadNodes(DeadNodes); |
1035 | |
1036 | // If the root changed (e.g. it was a dead load, update the root). |
1037 | setRoot(Dummy.getValue()); |
1038 | } |
1039 | |
1040 | /// RemoveDeadNodes - This method deletes the unreachable nodes in the |
1041 | /// given list, and any nodes that become unreachable as a result. |
1042 | void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { |
1043 | |
1044 | // Process the worklist, deleting the nodes and adding their uses to the |
1045 | // worklist. |
1046 | while (!DeadNodes.empty()) { |
1047 | SDNode *N = DeadNodes.pop_back_val(); |
1048 | // Skip to next node if we've already managed to delete the node. This could |
1049 | // happen if replacing a node causes a node previously added to the node to |
1050 | // be deleted. |
1051 | if (N->getOpcode() == ISD::DELETED_NODE) |
1052 | continue; |
1053 | |
1054 | for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) |
1055 | DUL->NodeDeleted(N, nullptr); |
1056 | |
1057 | // Take the node out of the appropriate CSE map. |
1058 | RemoveNodeFromCSEMaps(N); |
1059 | |
1060 | // Next, brutally remove the operand list. This is safe to do, as there are |
1061 | // no cycles in the graph. |
1062 | for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { |
1063 | SDUse &Use = *I++; |
1064 | SDNode *Operand = Use.getNode(); |
1065 | Use.set(SDValue()); |
1066 | |
1067 | // Now that we removed this operand, see if there are no uses of it left. |
1068 | if (Operand->use_empty()) |
1069 | DeadNodes.push_back(Elt: Operand); |
1070 | } |
1071 | |
1072 | DeallocateNode(N); |
1073 | } |
1074 | } |
1075 | |
1076 | void SelectionDAG::RemoveDeadNode(SDNode *N){ |
1077 | SmallVector<SDNode*, 16> DeadNodes(1, N); |
1078 | |
1079 | // Create a dummy node that adds a reference to the root node, preventing |
1080 | // it from being deleted. (This matters if the root is an operand of the |
1081 | // dead node.) |
1082 | HandleSDNode Dummy(getRoot()); |
1083 | |
1084 | RemoveDeadNodes(DeadNodes); |
1085 | } |
1086 | |
1087 | void SelectionDAG::DeleteNode(SDNode *N) { |
1088 | // First take this out of the appropriate CSE map. |
1089 | RemoveNodeFromCSEMaps(N); |
1090 | |
1091 | // Finally, remove uses due to operands of this node, remove from the |
1092 | // AllNodes list, and delete the node. |
1093 | DeleteNodeNotInCSEMaps(N); |
1094 | } |
1095 | |
1096 | void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { |
1097 | assert(N->getIterator() != AllNodes.begin() && |
1098 | "Cannot delete the entry node!"); |
1099 | assert(N->use_empty() && "Cannot delete a node that is not dead!"); |
1100 | |
1101 | // Drop all of the operands and decrement used node's use counts. |
1102 | N->DropOperands(); |
1103 | |
1104 | DeallocateNode(N); |
1105 | } |
1106 | |
1107 | void SDDbgInfo::add(SDDbgValue *V, bool isParameter) { |
1108 | assert(!(V->isVariadic() && isParameter)); |
1109 | if (isParameter) |
1110 | ByvalParmDbgValues.push_back(Elt: V); |
1111 | else |
1112 | DbgValues.push_back(Elt: V); |
1113 | for (const SDNode *Node : V->getSDNodes()) |
1114 | if (Node) |
1115 | DbgValMap[Node].push_back(Elt: V); |
1116 | } |
1117 | |
1118 | void SDDbgInfo::erase(const SDNode *Node) { |
1119 | DbgValMapType::iterator I = DbgValMap.find(Val: Node); |
1120 | if (I == DbgValMap.end()) |
1121 | return; |
1122 | for (auto &Val: I->second) |
1123 | Val->setIsInvalidated(); |
1124 | DbgValMap.erase(I); |
1125 | } |
1126 | |
1127 | void SelectionDAG::DeallocateNode(SDNode *N) { |
1128 | // If we have operands, deallocate them. |
1129 | removeOperands(Node: N); |
1130 | |
1131 | NodeAllocator.Deallocate(E: AllNodes.remove(IT: N)); |
1132 | |
1133 | // Set the opcode to DELETED_NODE to help catch bugs when node |
1134 | // memory is reallocated. |
1135 | // FIXME: There are places in SDag that have grown a dependency on the opcode |
1136 | // value in the released node. |
1137 | __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); |
1138 | N->NodeType = ISD::DELETED_NODE; |
1139 | |
1140 | // If any of the SDDbgValue nodes refer to this SDNode, invalidate |
1141 | // them and forget about that node. |
1142 | DbgInfo->erase(Node: N); |
1143 | |
1144 | // Invalidate extra info. |
1145 | SDEI.erase(Val: N); |
1146 | } |
1147 | |
1148 | #ifndef NDEBUG |
1149 | /// VerifySDNode - Check the given SDNode. Aborts if it is invalid. |
1150 | void SelectionDAG::verifyNode(SDNode *N) const { |
1151 | switch (N->getOpcode()) { |
1152 | default: |
1153 | if (N->isTargetOpcode()) |
1154 | getSelectionDAGInfo().verifyTargetNode(DAG: *this, N); |
1155 | break; |
1156 | case ISD::BUILD_PAIR: { |
1157 | EVT VT = N->getValueType(ResNo: 0); |
1158 | assert(N->getNumValues() == 1 && "Too many results!"); |
1159 | assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && |
1160 | "Wrong return type!"); |
1161 | assert(N->getNumOperands() == 2 && "Wrong number of operands!"); |
1162 | assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && |
1163 | "Mismatched operand types!"); |
1164 | assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && |
1165 | "Wrong operand type!"); |
1166 | assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && |
1167 | "Wrong return type size"); |
1168 | break; |
1169 | } |
1170 | case ISD::BUILD_VECTOR: { |
1171 | assert(N->getNumValues() == 1 && "Too many results!"); |
1172 | assert(N->getValueType(0).isVector() && "Wrong return type!"); |
1173 | assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && |
1174 | "Wrong number of operands!"); |
1175 | EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType(); |
1176 | for (const SDUse &Op : N->ops()) { |
1177 | assert((Op.getValueType() == EltVT || |
1178 | (EltVT.isInteger() && Op.getValueType().isInteger() && |
1179 | EltVT.bitsLE(Op.getValueType()))) && |
1180 | "Wrong operand type!"); |
1181 | assert(Op.getValueType() == N->getOperand(0).getValueType() && |
1182 | "Operands must all have the same type"); |
1183 | } |
1184 | break; |
1185 | } |
1186 | } |
1187 | } |
1188 | #endif // NDEBUG |
1189 | |
1190 | /// Insert a newly allocated node into the DAG. |
1191 | /// |
1192 | /// Handles insertion into the all nodes list and CSE map, as well as |
1193 | /// verification and other common operations when a new node is allocated. |
1194 | void SelectionDAG::InsertNode(SDNode *N) { |
1195 | AllNodes.push_back(val: N); |
1196 | #ifndef NDEBUG |
1197 | N->PersistentId = NextPersistentId++; |
1198 | verifyNode(N); |
1199 | #endif |
1200 | for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) |
1201 | DUL->NodeInserted(N); |
1202 | } |
1203 | |
1204 | /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that |
1205 | /// correspond to it. This is useful when we're about to delete or repurpose |
1206 | /// the node. We don't want future request for structurally identical nodes |
1207 | /// to return N anymore. |
1208 | bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { |
1209 | bool Erased = false; |
1210 | switch (N->getOpcode()) { |
1211 | case ISD::HANDLENODE: return false; // noop. |
1212 | case ISD::CONDCODE: |
1213 | assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && |
1214 | "Cond code doesn't exist!"); |
1215 | Erased = CondCodeNodes[cast<CondCodeSDNode>(Val: N)->get()] != nullptr; |
1216 | CondCodeNodes[cast<CondCodeSDNode>(Val: N)->get()] = nullptr; |
1217 | break; |
1218 | case ISD::ExternalSymbol: |
1219 | Erased = ExternalSymbols.erase(Key: cast<ExternalSymbolSDNode>(Val: N)->getSymbol()); |
1220 | break; |
1221 | case ISD::TargetExternalSymbol: { |
1222 | ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(Val: N); |
1223 | Erased = TargetExternalSymbols.erase(x: std::pair<std::string, unsigned>( |
1224 | ESN->getSymbol(), ESN->getTargetFlags())); |
1225 | break; |
1226 | } |
1227 | case ISD::MCSymbol: { |
1228 | auto *MCSN = cast<MCSymbolSDNode>(Val: N); |
1229 | Erased = MCSymbols.erase(Val: MCSN->getMCSymbol()); |
1230 | break; |
1231 | } |
1232 | case ISD::VALUETYPE: { |
1233 | EVT VT = cast<VTSDNode>(Val: N)->getVT(); |
1234 | if (VT.isExtended()) { |
1235 | Erased = ExtendedValueTypeNodes.erase(x: VT); |
1236 | } else { |
1237 | Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; |
1238 | ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; |
1239 | } |
1240 | break; |
1241 | } |
1242 | default: |
1243 | // Remove it from the CSE Map. |
1244 | assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); |
1245 | assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); |
1246 | Erased = CSEMap.RemoveNode(N); |
1247 | break; |
1248 | } |
1249 | #ifndef NDEBUG |
1250 | // Verify that the node was actually in one of the CSE maps, unless it has a |
1251 | // glue result (which cannot be CSE'd) or is one of the special cases that are |
1252 | // not subject to CSE. |
1253 | if (!Erased && N->getValueType(ResNo: N->getNumValues()-1) != MVT::Glue && |
1254 | !N->isMachineOpcode() && !doNotCSE(N)) { |
1255 | N->dump(G: this); |
1256 | dbgs() << "\n"; |
1257 | llvm_unreachable("Node is not in map!"); |
1258 | } |
1259 | #endif |
1260 | return Erased; |
1261 | } |
1262 | |
1263 | /// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE |
1264 | /// maps and modified in place. Add it back to the CSE maps, unless an identical |
1265 | /// node already exists, in which case transfer all its users to the existing |
1266 | /// node. This transfer can potentially trigger recursive merging. |
1267 | void |
1268 | SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { |
1269 | // For node types that aren't CSE'd, just act as if no identical node |
1270 | // already exists. |
1271 | if (!doNotCSE(N)) { |
1272 | SDNode *Existing = CSEMap.GetOrInsertNode(N); |
1273 | if (Existing != N) { |
1274 | // If there was already an existing matching node, use ReplaceAllUsesWith |
1275 | // to replace the dead one with the existing one. This can cause |
1276 | // recursive merging of other unrelated nodes down the line. |
1277 | Existing->intersectFlagsWith(Flags: N->getFlags()); |
1278 | ReplaceAllUsesWith(From: N, To: Existing); |
1279 | |
1280 | // N is now dead. Inform the listeners and delete it. |
1281 | for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) |
1282 | DUL->NodeDeleted(N, Existing); |
1283 | DeleteNodeNotInCSEMaps(N); |
1284 | return; |
1285 | } |
1286 | } |
1287 | |
1288 | // If the node doesn't already exist, we updated it. Inform listeners. |
1289 | for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) |
1290 | DUL->NodeUpdated(N); |
1291 | } |
1292 | |
1293 | /// FindModifiedNodeSlot - Find a slot for the specified node if its operands |
1294 | /// were replaced with those specified. If this node is never memoized, |
1295 | /// return null, otherwise return a pointer to the slot it would take. If a |
1296 | /// node already exists with these operands, the slot will be non-null. |
1297 | SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, |
1298 | void *&InsertPos) { |
1299 | if (doNotCSE(N)) |
1300 | return nullptr; |
1301 | |
1302 | SDValue Ops[] = { Op }; |
1303 | FoldingSetNodeID ID; |
1304 | AddNodeIDNode(ID, OpC: N->getOpcode(), VTList: N->getVTList(), OpList: Ops); |
1305 | AddNodeIDCustom(ID, N); |
1306 | SDNode *Node = FindNodeOrInsertPos(ID, DL: SDLoc(N), InsertPos); |
1307 | if (Node) |
1308 | Node->intersectFlagsWith(Flags: N->getFlags()); |
1309 | return Node; |
1310 | } |
1311 | |
1312 | /// FindModifiedNodeSlot - Find a slot for the specified node if its operands |
1313 | /// were replaced with those specified. If this node is never memoized, |
1314 | /// return null, otherwise return a pointer to the slot it would take. If a |
1315 | /// node already exists with these operands, the slot will be non-null. |
1316 | SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, |
1317 | SDValue Op1, SDValue Op2, |
1318 | void *&InsertPos) { |
1319 | if (doNotCSE(N)) |
1320 | return nullptr; |
1321 | |
1322 | SDValue Ops[] = { Op1, Op2 }; |
1323 | FoldingSetNodeID ID; |
1324 | AddNodeIDNode(ID, OpC: N->getOpcode(), VTList: N->getVTList(), OpList: Ops); |
1325 | AddNodeIDCustom(ID, N); |
1326 | SDNode *Node = FindNodeOrInsertPos(ID, DL: SDLoc(N), InsertPos); |
1327 | if (Node) |
1328 | Node->intersectFlagsWith(Flags: N->getFlags()); |
1329 | return Node; |
1330 | } |
1331 | |
1332 | /// FindModifiedNodeSlot - Find a slot for the specified node if its operands |
1333 | /// were replaced with those specified. If this node is never memoized, |
1334 | /// return null, otherwise return a pointer to the slot it would take. If a |
1335 | /// node already exists with these operands, the slot will be non-null. |
1336 | SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, |
1337 | void *&InsertPos) { |
1338 | if (doNotCSE(N)) |
1339 | return nullptr; |
1340 | |
1341 | FoldingSetNodeID ID; |
1342 | AddNodeIDNode(ID, OpC: N->getOpcode(), VTList: N->getVTList(), OpList: Ops); |
1343 | AddNodeIDCustom(ID, N); |
1344 | SDNode *Node = FindNodeOrInsertPos(ID, DL: SDLoc(N), InsertPos); |
1345 | if (Node) |
1346 | Node->intersectFlagsWith(Flags: N->getFlags()); |
1347 | return Node; |
1348 | } |
1349 | |
1350 | Align SelectionDAG::getEVTAlign(EVT VT) const { |
1351 | Type *Ty = VT == MVT::iPTR ? PointerType::get(C&: *getContext(), AddressSpace: 0) |
1352 | : VT.getTypeForEVT(Context&: *getContext()); |
1353 | |
1354 | return getDataLayout().getABITypeAlign(Ty); |
1355 | } |
1356 | |
1357 | // EntryNode could meaningfully have debug info if we can find it... |
1358 | SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL) |
1359 | : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), |
1360 | getVTList(MVT::Other, MVT::Glue)), |
1361 | Root(getEntryNode()) { |
1362 | InsertNode(N: &EntryNode); |
1363 | DbgInfo = new SDDbgInfo(); |
1364 | } |
1365 | |
1366 | void SelectionDAG::init(MachineFunction &NewMF, |
1367 | OptimizationRemarkEmitter &NewORE, Pass *PassPtr, |
1368 | const TargetLibraryInfo *LibraryInfo, |
1369 | UniformityInfo *NewUA, ProfileSummaryInfo *PSIin, |
1370 | BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin, |
1371 | FunctionVarLocs const *VarLocs) { |
1372 | MF = &NewMF; |
1373 | SDAGISelPass = PassPtr; |
1374 | ORE = &NewORE; |
1375 | TLI = getSubtarget().getTargetLowering(); |
1376 | TSI = getSubtarget().getSelectionDAGInfo(); |
1377 | LibInfo = LibraryInfo; |
1378 | Context = &MF->getFunction().getContext(); |
1379 | UA = NewUA; |
1380 | PSI = PSIin; |
1381 | BFI = BFIin; |
1382 | MMI = &MMIin; |
1383 | FnVarLocs = VarLocs; |
1384 | } |
1385 | |
1386 | SelectionDAG::~SelectionDAG() { |
1387 | assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); |
1388 | allnodes_clear(); |
1389 | OperandRecycler.clear(OperandAllocator); |
1390 | delete DbgInfo; |
1391 | } |
1392 | |
1393 | bool SelectionDAG::shouldOptForSize() const { |
1394 | return llvm::shouldOptimizeForSize(BB: FLI->MBB->getBasicBlock(), PSI, BFI); |
1395 | } |
1396 | |
1397 | void SelectionDAG::allnodes_clear() { |
1398 | assert(&*AllNodes.begin() == &EntryNode); |
1399 | AllNodes.remove(IT: AllNodes.begin()); |
1400 | while (!AllNodes.empty()) |
1401 | DeallocateNode(N: &AllNodes.front()); |
1402 | #ifndef NDEBUG |
1403 | NextPersistentId = 0; |
1404 | #endif |
1405 | } |
1406 | |
1407 | SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, |
1408 | void *&InsertPos) { |
1409 | SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); |
1410 | if (N) { |
1411 | switch (N->getOpcode()) { |
1412 | default: break; |
1413 | case ISD::Constant: |
1414 | case ISD::ConstantFP: |
1415 | llvm_unreachable("Querying for Constant and ConstantFP nodes requires " |
1416 | "debug location. Use another overload."); |
1417 | } |
1418 | } |
1419 | return N; |
1420 | } |
1421 | |
1422 | SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, |
1423 | const SDLoc &DL, void *&InsertPos) { |
1424 | SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); |
1425 | if (N) { |
1426 | switch (N->getOpcode()) { |
1427 | case ISD::Constant: |
1428 | case ISD::ConstantFP: |
1429 | // Erase debug location from the node if the node is used at several |
1430 | // different places. Do not propagate one location to all uses as it |
1431 | // will cause a worse single stepping debugging experience. |
1432 | if (N->getDebugLoc() != DL.getDebugLoc()) |
1433 | N->setDebugLoc(DebugLoc()); |
1434 | break; |
1435 | default: |
1436 | // When the node's point of use is located earlier in the instruction |
1437 | // sequence than its prior point of use, update its debug info to the |
1438 | // earlier location. |
1439 | if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()) |
1440 | N->setDebugLoc(DL.getDebugLoc()); |
1441 | break; |
1442 | } |
1443 | } |
1444 | return N; |
1445 | } |
1446 | |
1447 | void SelectionDAG::clear() { |
1448 | allnodes_clear(); |
1449 | OperandRecycler.clear(OperandAllocator); |
1450 | OperandAllocator.Reset(); |
1451 | CSEMap.clear(); |
1452 | |
1453 | ExtendedValueTypeNodes.clear(); |
1454 | ExternalSymbols.clear(); |
1455 | TargetExternalSymbols.clear(); |
1456 | MCSymbols.clear(); |
1457 | SDEI.clear(); |
1458 | std::fill(first: CondCodeNodes.begin(), last: CondCodeNodes.end(), value: nullptr); |
1459 | std::fill(first: ValueTypeNodes.begin(), last: ValueTypeNodes.end(), value: nullptr); |
1460 | |
1461 | EntryNode.UseList = nullptr; |
1462 | InsertNode(N: &EntryNode); |
1463 | Root = getEntryNode(); |
1464 | DbgInfo->clear(); |
1465 | } |
1466 | |
1467 | SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { |
1468 | return VT.bitsGT(VT: Op.getValueType()) |
1469 | ? getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Op) |
1470 | : getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: Op, |
1471 | N2: getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
1472 | } |
1473 | |
1474 | std::pair<SDValue, SDValue> |
1475 | SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain, |
1476 | const SDLoc &DL, EVT VT) { |
1477 | assert(!VT.bitsEq(Op.getValueType()) && |
1478 | "Strict no-op FP extend/round not allowed."); |
1479 | SDValue Res = |
1480 | VT.bitsGT(Op.getValueType()) |
1481 | ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}) |
1482 | : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, |
1483 | {Chain, Op, getIntPtrConstant(0, DL, /*isTarget=*/true)}); |
1484 | |
1485 | return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1)); |
1486 | } |
1487 | |
1488 | SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1489 | return VT.bitsGT(VT: Op.getValueType()) ? |
1490 | getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Op) : |
1491 | getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
1492 | } |
1493 | |
1494 | SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1495 | return VT.bitsGT(VT: Op.getValueType()) ? |
1496 | getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: Op) : |
1497 | getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
1498 | } |
1499 | |
1500 | SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1501 | return VT.bitsGT(VT: Op.getValueType()) ? |
1502 | getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Op) : |
1503 | getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
1504 | } |
1505 | |
1506 | SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, |
1507 | EVT VT) { |
1508 | assert(!VT.isVector()); |
1509 | auto Type = Op.getValueType(); |
1510 | SDValue DestOp; |
1511 | if (Type == VT) |
1512 | return Op; |
1513 | auto Size = Op.getValueSizeInBits(); |
1514 | DestOp = getBitcast(VT: EVT::getIntegerVT(Context&: *Context, BitWidth: Size), V: Op); |
1515 | if (DestOp.getValueType() == VT) |
1516 | return DestOp; |
1517 | |
1518 | return getAnyExtOrTrunc(Op: DestOp, DL, VT); |
1519 | } |
1520 | |
1521 | SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL, |
1522 | EVT VT) { |
1523 | assert(!VT.isVector()); |
1524 | auto Type = Op.getValueType(); |
1525 | SDValue DestOp; |
1526 | if (Type == VT) |
1527 | return Op; |
1528 | auto Size = Op.getValueSizeInBits(); |
1529 | DestOp = getBitcast(VT: MVT::getIntegerVT(BitWidth: Size), V: Op); |
1530 | if (DestOp.getValueType() == VT) |
1531 | return DestOp; |
1532 | |
1533 | return getSExtOrTrunc(Op: DestOp, DL, VT); |
1534 | } |
1535 | |
1536 | SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL, |
1537 | EVT VT) { |
1538 | assert(!VT.isVector()); |
1539 | auto Type = Op.getValueType(); |
1540 | SDValue DestOp; |
1541 | if (Type == VT) |
1542 | return Op; |
1543 | auto Size = Op.getValueSizeInBits(); |
1544 | DestOp = getBitcast(VT: MVT::getIntegerVT(BitWidth: Size), V: Op); |
1545 | if (DestOp.getValueType() == VT) |
1546 | return DestOp; |
1547 | |
1548 | return getZExtOrTrunc(Op: DestOp, DL, VT); |
1549 | } |
1550 | |
1551 | SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, |
1552 | EVT OpVT) { |
1553 | if (VT.bitsLE(VT: Op.getValueType())) |
1554 | return getNode(Opcode: ISD::TRUNCATE, DL: SL, VT, Operand: Op); |
1555 | |
1556 | TargetLowering::BooleanContent BType = TLI->getBooleanContents(Type: OpVT); |
1557 | return getNode(Opcode: TLI->getExtendForContent(Content: BType), DL: SL, VT, Operand: Op); |
1558 | } |
1559 | |
1560 | SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { |
1561 | EVT OpVT = Op.getValueType(); |
1562 | assert(VT.isInteger() && OpVT.isInteger() && |
1563 | "Cannot getZeroExtendInReg FP types"); |
1564 | assert(VT.isVector() == OpVT.isVector() && |
1565 | "getZeroExtendInReg type should be vector iff the operand " |
1566 | "type is vector!"); |
1567 | assert((!VT.isVector() || |
1568 | VT.getVectorElementCount() == OpVT.getVectorElementCount()) && |
1569 | "Vector element counts must match in getZeroExtendInReg"); |
1570 | assert(VT.bitsLE(OpVT) && "Not extending!"); |
1571 | if (OpVT == VT) |
1572 | return Op; |
1573 | APInt Imm = APInt::getLowBitsSet(numBits: OpVT.getScalarSizeInBits(), |
1574 | loBitsSet: VT.getScalarSizeInBits()); |
1575 | return getNode(Opcode: ISD::AND, DL, VT: OpVT, N1: Op, N2: getConstant(Val: Imm, DL, VT: OpVT)); |
1576 | } |
1577 | |
1578 | SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask, |
1579 | SDValue EVL, const SDLoc &DL, |
1580 | EVT VT) { |
1581 | EVT OpVT = Op.getValueType(); |
1582 | assert(VT.isInteger() && OpVT.isInteger() && |
1583 | "Cannot getVPZeroExtendInReg FP types"); |
1584 | assert(VT.isVector() && OpVT.isVector() && |
1585 | "getVPZeroExtendInReg type and operand type should be vector!"); |
1586 | assert(VT.getVectorElementCount() == OpVT.getVectorElementCount() && |
1587 | "Vector element counts must match in getZeroExtendInReg"); |
1588 | assert(VT.bitsLE(OpVT) && "Not extending!"); |
1589 | if (OpVT == VT) |
1590 | return Op; |
1591 | APInt Imm = APInt::getLowBitsSet(numBits: OpVT.getScalarSizeInBits(), |
1592 | loBitsSet: VT.getScalarSizeInBits()); |
1593 | return getNode(Opcode: ISD::VP_AND, DL, VT: OpVT, N1: Op, N2: getConstant(Val: Imm, DL, VT: OpVT), N3: Mask, |
1594 | N4: EVL); |
1595 | } |
1596 | |
1597 | SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1598 | // Only unsigned pointer semantics are supported right now. In the future this |
1599 | // might delegate to TLI to check pointer signedness. |
1600 | return getZExtOrTrunc(Op, DL, VT); |
1601 | } |
1602 | |
1603 | SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { |
1604 | // Only unsigned pointer semantics are supported right now. In the future this |
1605 | // might delegate to TLI to check pointer signedness. |
1606 | return getZeroExtendInReg(Op, DL, VT); |
1607 | } |
1608 | |
1609 | SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) { |
1610 | return getNode(Opcode: ISD::SUB, DL, VT, N1: getConstant(Val: 0, DL, VT), N2: Val); |
1611 | } |
1612 | |
1613 | /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). |
1614 | SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { |
1615 | return getNode(Opcode: ISD::XOR, DL, VT, N1: Val, N2: getAllOnesConstant(DL, VT)); |
1616 | } |
1617 | |
1618 | SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { |
1619 | SDValue TrueValue = getBoolConstant(V: true, DL, VT, OpVT: VT); |
1620 | return getNode(Opcode: ISD::XOR, DL, VT, N1: Val, N2: TrueValue); |
1621 | } |
1622 | |
1623 | SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, |
1624 | SDValue Mask, SDValue EVL, EVT VT) { |
1625 | SDValue TrueValue = getBoolConstant(V: true, DL, VT, OpVT: VT); |
1626 | return getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Val, N2: TrueValue, N3: Mask, N4: EVL); |
1627 | } |
1628 | |
1629 | SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, |
1630 | SDValue Mask, SDValue EVL) { |
1631 | return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL); |
1632 | } |
1633 | |
1634 | SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, |
1635 | SDValue Mask, SDValue EVL) { |
1636 | if (VT.bitsGT(VT: Op.getValueType())) |
1637 | return getNode(Opcode: ISD::VP_ZERO_EXTEND, DL, VT, N1: Op, N2: Mask, N3: EVL); |
1638 | if (VT.bitsLT(VT: Op.getValueType())) |
1639 | return getNode(Opcode: ISD::VP_TRUNCATE, DL, VT, N1: Op, N2: Mask, N3: EVL); |
1640 | return Op; |
1641 | } |
1642 | |
1643 | SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, |
1644 | EVT OpVT) { |
1645 | if (!V) |
1646 | return getConstant(Val: 0, DL, VT); |
1647 | |
1648 | switch (TLI->getBooleanContents(Type: OpVT)) { |
1649 | case TargetLowering::ZeroOrOneBooleanContent: |
1650 | case TargetLowering::UndefinedBooleanContent: |
1651 | return getConstant(Val: 1, DL, VT); |
1652 | case TargetLowering::ZeroOrNegativeOneBooleanContent: |
1653 | return getAllOnesConstant(DL, VT); |
1654 | } |
1655 | llvm_unreachable("Unexpected boolean content enum!"); |
1656 | } |
1657 | |
1658 | SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, |
1659 | bool isT, bool isO) { |
1660 | return getConstant(Val: APInt(VT.getScalarSizeInBits(), Val, /*isSigned=*/false), |
1661 | DL, VT, isTarget: isT, isOpaque: isO); |
1662 | } |
1663 | |
1664 | SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, |
1665 | bool isT, bool isO) { |
1666 | return getConstant(Val: *ConstantInt::get(Context&: *Context, V: Val), DL, VT, isTarget: isT, isOpaque: isO); |
1667 | } |
1668 | |
1669 | SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, |
1670 | EVT VT, bool isT, bool isO) { |
1671 | assert(VT.isInteger() && "Cannot create FP integer constant!"); |
1672 | |
1673 | EVT EltVT = VT.getScalarType(); |
1674 | const ConstantInt *Elt = &Val; |
1675 | |
1676 | // Vector splats are explicit within the DAG, with ConstantSDNode holding the |
1677 | // to-be-splatted scalar ConstantInt. |
1678 | if (isa<VectorType>(Val: Elt->getType())) |
1679 | Elt = ConstantInt::get(Context&: *getContext(), V: Elt->getValue()); |
1680 | |
1681 | // In some cases the vector type is legal but the element type is illegal and |
1682 | // needs to be promoted, for example v8i8 on ARM. In this case, promote the |
1683 | // inserted value (the type does not need to match the vector element type). |
1684 | // Any extra bits introduced will be truncated away. |
1685 | if (VT.isVector() && TLI->getTypeAction(Context&: *getContext(), VT: EltVT) == |
1686 | TargetLowering::TypePromoteInteger) { |
1687 | EltVT = TLI->getTypeToTransformTo(Context&: *getContext(), VT: EltVT); |
1688 | APInt NewVal; |
1689 | if (TLI->isSExtCheaperThanZExt(FromTy: VT.getScalarType(), ToTy: EltVT)) |
1690 | NewVal = Elt->getValue().sextOrTrunc(width: EltVT.getSizeInBits()); |
1691 | else |
1692 | NewVal = Elt->getValue().zextOrTrunc(width: EltVT.getSizeInBits()); |
1693 | Elt = ConstantInt::get(Context&: *getContext(), V: NewVal); |
1694 | } |
1695 | // In other cases the element type is illegal and needs to be expanded, for |
1696 | // example v2i64 on MIPS32. In this case, find the nearest legal type, split |
1697 | // the value into n parts and use a vector type with n-times the elements. |
1698 | // Then bitcast to the type requested. |
1699 | // Legalizing constants too early makes the DAGCombiner's job harder so we |
1700 | // only legalize if the DAG tells us we must produce legal types. |
1701 | else if (NewNodesMustHaveLegalTypes && VT.isVector() && |
1702 | TLI->getTypeAction(Context&: *getContext(), VT: EltVT) == |
1703 | TargetLowering::TypeExpandInteger) { |
1704 | const APInt &NewVal = Elt->getValue(); |
1705 | EVT ViaEltVT = TLI->getTypeToTransformTo(Context&: *getContext(), VT: EltVT); |
1706 | unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); |
1707 | |
1708 | // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. |
1709 | if (VT.isScalableVector() || |
1710 | TLI->isOperationLegal(Op: ISD::SPLAT_VECTOR, VT)) { |
1711 | assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && |
1712 | "Can only handle an even split!"); |
1713 | unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; |
1714 | |
1715 | SmallVector<SDValue, 2> ScalarParts; |
1716 | for (unsigned i = 0; i != Parts; ++i) |
1717 | ScalarParts.push_back(Elt: getConstant( |
1718 | Val: NewVal.extractBits(numBits: ViaEltSizeInBits, bitPosition: i * ViaEltSizeInBits), DL, |
1719 | VT: ViaEltVT, isT, isO)); |
1720 | |
1721 | return getNode(Opcode: ISD::SPLAT_VECTOR_PARTS, DL, VT, Ops: ScalarParts); |
1722 | } |
1723 | |
1724 | unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; |
1725 | EVT ViaVecVT = EVT::getVectorVT(Context&: *getContext(), VT: ViaEltVT, NumElements: ViaVecNumElts); |
1726 | |
1727 | // Check the temporary vector is the correct size. If this fails then |
1728 | // getTypeToTransformTo() probably returned a type whose size (in bits) |
1729 | // isn't a power-of-2 factor of the requested type size. |
1730 | assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); |
1731 | |
1732 | SmallVector<SDValue, 2> EltParts; |
1733 | for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) |
1734 | EltParts.push_back(Elt: getConstant( |
1735 | Val: NewVal.extractBits(numBits: ViaEltSizeInBits, bitPosition: i * ViaEltSizeInBits), DL, |
1736 | VT: ViaEltVT, isT, isO)); |
1737 | |
1738 | // EltParts is currently in little endian order. If we actually want |
1739 | // big-endian order then reverse it now. |
1740 | if (getDataLayout().isBigEndian()) |
1741 | std::reverse(first: EltParts.begin(), last: EltParts.end()); |
1742 | |
1743 | // The elements must be reversed when the element order is different |
1744 | // to the endianness of the elements (because the BITCAST is itself a |
1745 | // vector shuffle in this situation). However, we do not need any code to |
1746 | // perform this reversal because getConstant() is producing a vector |
1747 | // splat. |
1748 | // This situation occurs in MIPS MSA. |
1749 | |
1750 | SmallVector<SDValue, 8> Ops; |
1751 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) |
1752 | llvm::append_range(C&: Ops, R&: EltParts); |
1753 | |
1754 | SDValue V = |
1755 | getNode(Opcode: ISD::BITCAST, DL, VT, Operand: getBuildVector(VT: ViaVecVT, DL, Ops)); |
1756 | return V; |
1757 | } |
1758 | |
1759 | assert(Elt->getBitWidth() == EltVT.getSizeInBits() && |
1760 | "APInt size does not match type size!"); |
1761 | unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; |
1762 | SDVTList VTs = getVTList(VT: EltVT); |
1763 | FoldingSetNodeID ID; |
1764 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
1765 | ID.AddPointer(Ptr: Elt); |
1766 | ID.AddBoolean(B: isO); |
1767 | void *IP = nullptr; |
1768 | SDNode *N = nullptr; |
1769 | if ((N = FindNodeOrInsertPos(ID, DL, InsertPos&: IP))) |
1770 | if (!VT.isVector()) |
1771 | return SDValue(N, 0); |
1772 | |
1773 | if (!N) { |
1774 | N = newSDNode<ConstantSDNode>(Args&: isT, Args&: isO, Args&: Elt, Args&: VTs); |
1775 | CSEMap.InsertNode(N, InsertPos: IP); |
1776 | InsertNode(N); |
1777 | NewSDValueDbgMsg(V: SDValue(N, 0), Msg: "Creating constant: ", G: this); |
1778 | } |
1779 | |
1780 | SDValue Result(N, 0); |
1781 | if (VT.isVector()) |
1782 | Result = getSplat(VT, DL, Op: Result); |
1783 | return Result; |
1784 | } |
1785 | |
1786 | SDValue SelectionDAG::getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, |
1787 | bool isT, bool isO) { |
1788 | unsigned Size = VT.getScalarSizeInBits(); |
1789 | return getConstant(Val: APInt(Size, Val, /*isSigned=*/true), DL, VT, isT, isO); |
1790 | } |
1791 | |
1792 | SDValue SelectionDAG::getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget, |
1793 | bool IsOpaque) { |
1794 | return getConstant(Val: APInt::getAllOnes(numBits: VT.getScalarSizeInBits()), DL, VT, |
1795 | isT: IsTarget, isO: IsOpaque); |
1796 | } |
1797 | |
1798 | SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, |
1799 | bool isTarget) { |
1800 | return getConstant(Val, DL, VT: TLI->getPointerTy(DL: getDataLayout()), isT: isTarget); |
1801 | } |
1802 | |
1803 | SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, |
1804 | const SDLoc &DL) { |
1805 | assert(VT.isInteger() && "Shift amount is not an integer type!"); |
1806 | EVT ShiftVT = TLI->getShiftAmountTy(LHSTy: VT, DL: getDataLayout()); |
1807 | return getConstant(Val, DL, VT: ShiftVT); |
1808 | } |
1809 | |
1810 | SDValue SelectionDAG::getShiftAmountConstant(const APInt &Val, EVT VT, |
1811 | const SDLoc &DL) { |
1812 | assert(Val.ult(VT.getScalarSizeInBits()) && "Out of range shift"); |
1813 | return getShiftAmountConstant(Val: Val.getZExtValue(), VT, DL); |
1814 | } |
1815 | |
1816 | SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, |
1817 | bool isTarget) { |
1818 | return getConstant(Val, DL, VT: TLI->getVectorIdxTy(DL: getDataLayout()), isT: isTarget); |
1819 | } |
1820 | |
1821 | SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, |
1822 | bool isTarget) { |
1823 | return getConstantFP(V: *ConstantFP::get(Context&: *getContext(), V), DL, VT, isTarget); |
1824 | } |
1825 | |
1826 | SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, |
1827 | EVT VT, bool isTarget) { |
1828 | assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); |
1829 | |
1830 | EVT EltVT = VT.getScalarType(); |
1831 | const ConstantFP *Elt = &V; |
1832 | |
1833 | // Vector splats are explicit within the DAG, with ConstantFPSDNode holding |
1834 | // the to-be-splatted scalar ConstantFP. |
1835 | if (isa<VectorType>(Val: Elt->getType())) |
1836 | Elt = ConstantFP::get(Context&: *getContext(), V: Elt->getValue()); |
1837 | |
1838 | // Do the map lookup using the actual bit pattern for the floating point |
1839 | // value, so that we don't have problems with 0.0 comparing equal to -0.0, and |
1840 | // we don't have issues with SNANs. |
1841 | unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; |
1842 | SDVTList VTs = getVTList(VT: EltVT); |
1843 | FoldingSetNodeID ID; |
1844 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
1845 | ID.AddPointer(Ptr: Elt); |
1846 | void *IP = nullptr; |
1847 | SDNode *N = nullptr; |
1848 | if ((N = FindNodeOrInsertPos(ID, DL, InsertPos&: IP))) |
1849 | if (!VT.isVector()) |
1850 | return SDValue(N, 0); |
1851 | |
1852 | if (!N) { |
1853 | N = newSDNode<ConstantFPSDNode>(Args&: isTarget, Args&: Elt, Args&: VTs); |
1854 | CSEMap.InsertNode(N, InsertPos: IP); |
1855 | InsertNode(N); |
1856 | } |
1857 | |
1858 | SDValue Result(N, 0); |
1859 | if (VT.isVector()) |
1860 | Result = getSplat(VT, DL, Op: Result); |
1861 | NewSDValueDbgMsg(V: Result, Msg: "Creating fp constant: ", G: this); |
1862 | return Result; |
1863 | } |
1864 | |
1865 | SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, |
1866 | bool isTarget) { |
1867 | EVT EltVT = VT.getScalarType(); |
1868 | if (EltVT == MVT::f32) |
1869 | return getConstantFP(V: APFloat((float)Val), DL, VT, isTarget); |
1870 | if (EltVT == MVT::f64) |
1871 | return getConstantFP(V: APFloat(Val), DL, VT, isTarget); |
1872 | if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || |
1873 | EltVT == MVT::f16 || EltVT == MVT::bf16) { |
1874 | bool Ignored; |
1875 | APFloat APF = APFloat(Val); |
1876 | APF.convert(ToSemantics: EltVT.getFltSemantics(), RM: APFloat::rmNearestTiesToEven, |
1877 | losesInfo: &Ignored); |
1878 | return getConstantFP(V: APF, DL, VT, isTarget); |
1879 | } |
1880 | llvm_unreachable("Unsupported type in getConstantFP"); |
1881 | } |
1882 | |
1883 | SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, |
1884 | EVT VT, int64_t Offset, bool isTargetGA, |
1885 | unsigned TargetFlags) { |
1886 | assert((TargetFlags == 0 || isTargetGA) && |
1887 | "Cannot set target flags on target-independent globals"); |
1888 | |
1889 | // Truncate (with sign-extension) the offset value to the pointer size. |
1890 | unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); |
1891 | if (BitWidth < 64) |
1892 | Offset = SignExtend64(X: Offset, B: BitWidth); |
1893 | |
1894 | unsigned Opc; |
1895 | if (GV->isThreadLocal()) |
1896 | Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; |
1897 | else |
1898 | Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; |
1899 | |
1900 | SDVTList VTs = getVTList(VT); |
1901 | FoldingSetNodeID ID; |
1902 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
1903 | ID.AddPointer(Ptr: GV); |
1904 | ID.AddInteger(I: Offset); |
1905 | ID.AddInteger(I: TargetFlags); |
1906 | void *IP = nullptr; |
1907 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) |
1908 | return SDValue(E, 0); |
1909 | |
1910 | auto *N = newSDNode<GlobalAddressSDNode>( |
1911 | Args&: Opc, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: GV, Args&: VTs, Args&: Offset, Args&: TargetFlags); |
1912 | CSEMap.InsertNode(N, InsertPos: IP); |
1913 | InsertNode(N); |
1914 | return SDValue(N, 0); |
1915 | } |
1916 | |
1917 | SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { |
1918 | unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; |
1919 | SDVTList VTs = getVTList(VT); |
1920 | FoldingSetNodeID ID; |
1921 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
1922 | ID.AddInteger(I: FI); |
1923 | void *IP = nullptr; |
1924 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
1925 | return SDValue(E, 0); |
1926 | |
1927 | auto *N = newSDNode<FrameIndexSDNode>(Args&: FI, Args&: VTs, Args&: isTarget); |
1928 | CSEMap.InsertNode(N, InsertPos: IP); |
1929 | InsertNode(N); |
1930 | return SDValue(N, 0); |
1931 | } |
1932 | |
1933 | SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, |
1934 | unsigned TargetFlags) { |
1935 | assert((TargetFlags == 0 || isTarget) && |
1936 | "Cannot set target flags on target-independent jump tables"); |
1937 | unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; |
1938 | SDVTList VTs = getVTList(VT); |
1939 | FoldingSetNodeID ID; |
1940 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
1941 | ID.AddInteger(I: JTI); |
1942 | ID.AddInteger(I: TargetFlags); |
1943 | void *IP = nullptr; |
1944 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
1945 | return SDValue(E, 0); |
1946 | |
1947 | auto *N = newSDNode<JumpTableSDNode>(Args&: JTI, Args&: VTs, Args&: isTarget, Args&: TargetFlags); |
1948 | CSEMap.InsertNode(N, InsertPos: IP); |
1949 | InsertNode(N); |
1950 | return SDValue(N, 0); |
1951 | } |
1952 | |
1953 | SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, |
1954 | const SDLoc &DL) { |
1955 | EVT PTy = getTargetLoweringInfo().getPointerTy(DL: getDataLayout()); |
1956 | return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, |
1957 | getTargetConstant(Val: static_cast<uint64_t>(JTI), DL, VT: PTy, isOpaque: true)); |
1958 | } |
1959 | |
1960 | SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, |
1961 | MaybeAlign Alignment, int Offset, |
1962 | bool isTarget, unsigned TargetFlags) { |
1963 | assert((TargetFlags == 0 || isTarget) && |
1964 | "Cannot set target flags on target-independent globals"); |
1965 | if (!Alignment) |
1966 | Alignment = shouldOptForSize() |
1967 | ? getDataLayout().getABITypeAlign(Ty: C->getType()) |
1968 | : getDataLayout().getPrefTypeAlign(Ty: C->getType()); |
1969 | unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; |
1970 | SDVTList VTs = getVTList(VT); |
1971 | FoldingSetNodeID ID; |
1972 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
1973 | ID.AddInteger(I: Alignment->value()); |
1974 | ID.AddInteger(I: Offset); |
1975 | ID.AddPointer(Ptr: C); |
1976 | ID.AddInteger(I: TargetFlags); |
1977 | void *IP = nullptr; |
1978 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
1979 | return SDValue(E, 0); |
1980 | |
1981 | auto *N = newSDNode<ConstantPoolSDNode>(Args&: isTarget, Args&: C, Args&: VTs, Args&: Offset, Args&: *Alignment, |
1982 | Args&: TargetFlags); |
1983 | CSEMap.InsertNode(N, InsertPos: IP); |
1984 | InsertNode(N); |
1985 | SDValue V = SDValue(N, 0); |
1986 | NewSDValueDbgMsg(V, Msg: "Creating new constant pool: ", G: this); |
1987 | return V; |
1988 | } |
1989 | |
1990 | SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, |
1991 | MaybeAlign Alignment, int Offset, |
1992 | bool isTarget, unsigned TargetFlags) { |
1993 | assert((TargetFlags == 0 || isTarget) && |
1994 | "Cannot set target flags on target-independent globals"); |
1995 | if (!Alignment) |
1996 | Alignment = getDataLayout().getPrefTypeAlign(Ty: C->getType()); |
1997 | unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; |
1998 | SDVTList VTs = getVTList(VT); |
1999 | FoldingSetNodeID ID; |
2000 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
2001 | ID.AddInteger(I: Alignment->value()); |
2002 | ID.AddInteger(I: Offset); |
2003 | C->addSelectionDAGCSEId(ID); |
2004 | ID.AddInteger(I: TargetFlags); |
2005 | void *IP = nullptr; |
2006 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2007 | return SDValue(E, 0); |
2008 | |
2009 | auto *N = newSDNode<ConstantPoolSDNode>(Args&: isTarget, Args&: C, Args&: VTs, Args&: Offset, Args&: *Alignment, |
2010 | Args&: TargetFlags); |
2011 | CSEMap.InsertNode(N, InsertPos: IP); |
2012 | InsertNode(N); |
2013 | return SDValue(N, 0); |
2014 | } |
2015 | |
2016 | SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { |
2017 | FoldingSetNodeID ID; |
2018 | AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), {}); |
2019 | ID.AddPointer(Ptr: MBB); |
2020 | void *IP = nullptr; |
2021 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2022 | return SDValue(E, 0); |
2023 | |
2024 | auto *N = newSDNode<BasicBlockSDNode>(Args&: MBB); |
2025 | CSEMap.InsertNode(N, InsertPos: IP); |
2026 | InsertNode(N); |
2027 | return SDValue(N, 0); |
2028 | } |
2029 | |
2030 | SDValue SelectionDAG::getValueType(EVT VT) { |
2031 | if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= |
2032 | ValueTypeNodes.size()) |
2033 | ValueTypeNodes.resize(new_size: VT.getSimpleVT().SimpleTy+1); |
2034 | |
2035 | SDNode *&N = VT.isExtended() ? |
2036 | ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; |
2037 | |
2038 | if (N) return SDValue(N, 0); |
2039 | N = newSDNode<VTSDNode>(Args&: VT); |
2040 | InsertNode(N); |
2041 | return SDValue(N, 0); |
2042 | } |
2043 | |
2044 | SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { |
2045 | SDNode *&N = ExternalSymbols[Sym]; |
2046 | if (N) return SDValue(N, 0); |
2047 | N = newSDNode<ExternalSymbolSDNode>(Args: false, Args&: Sym, Args: 0, Args: getVTList(VT)); |
2048 | InsertNode(N); |
2049 | return SDValue(N, 0); |
2050 | } |
2051 | |
2052 | SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { |
2053 | SDNode *&N = MCSymbols[Sym]; |
2054 | if (N) |
2055 | return SDValue(N, 0); |
2056 | N = newSDNode<MCSymbolSDNode>(Args&: Sym, Args: getVTList(VT)); |
2057 | InsertNode(N); |
2058 | return SDValue(N, 0); |
2059 | } |
2060 | |
2061 | SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, |
2062 | unsigned TargetFlags) { |
2063 | SDNode *&N = |
2064 | TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; |
2065 | if (N) return SDValue(N, 0); |
2066 | N = newSDNode<ExternalSymbolSDNode>(Args: true, Args&: Sym, Args&: TargetFlags, Args: getVTList(VT)); |
2067 | InsertNode(N); |
2068 | return SDValue(N, 0); |
2069 | } |
2070 | |
2071 | SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { |
2072 | if ((unsigned)Cond >= CondCodeNodes.size()) |
2073 | CondCodeNodes.resize(new_size: Cond+1); |
2074 | |
2075 | if (!CondCodeNodes[Cond]) { |
2076 | auto *N = newSDNode<CondCodeSDNode>(Args&: Cond); |
2077 | CondCodeNodes[Cond] = N; |
2078 | InsertNode(N); |
2079 | } |
2080 | |
2081 | return SDValue(CondCodeNodes[Cond], 0); |
2082 | } |
2083 | |
2084 | SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, |
2085 | bool ConstantFold) { |
2086 | assert(MulImm.getBitWidth() == VT.getSizeInBits() && |
2087 | "APInt size does not match type size!"); |
2088 | |
2089 | if (MulImm == 0) |
2090 | return getConstant(Val: 0, DL, VT); |
2091 | |
2092 | if (ConstantFold) { |
2093 | const MachineFunction &MF = getMachineFunction(); |
2094 | const Function &F = MF.getFunction(); |
2095 | ConstantRange CR = getVScaleRange(F: &F, BitWidth: 64); |
2096 | if (const APInt *C = CR.getSingleElement()) |
2097 | return getConstant(Val: MulImm * C->getZExtValue(), DL, VT); |
2098 | } |
2099 | |
2100 | return getNode(Opcode: ISD::VSCALE, DL, VT, Operand: getConstant(Val: MulImm, DL, VT)); |
2101 | } |
2102 | |
2103 | SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, |
2104 | bool ConstantFold) { |
2105 | if (EC.isScalable()) |
2106 | return getVScale(DL, VT, |
2107 | MulImm: APInt(VT.getSizeInBits(), EC.getKnownMinValue())); |
2108 | |
2109 | return getConstant(Val: EC.getKnownMinValue(), DL, VT); |
2110 | } |
2111 | |
2112 | SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { |
2113 | APInt One(ResVT.getScalarSizeInBits(), 1); |
2114 | return getStepVector(DL, ResVT, StepVal: One); |
2115 | } |
2116 | |
2117 | SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, |
2118 | const APInt &StepVal) { |
2119 | assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); |
2120 | if (ResVT.isScalableVector()) |
2121 | return getNode( |
2122 | Opcode: ISD::STEP_VECTOR, DL, VT: ResVT, |
2123 | Operand: getTargetConstant(Val: StepVal, DL, VT: ResVT.getVectorElementType())); |
2124 | |
2125 | SmallVector<SDValue, 16> OpsStepConstants; |
2126 | for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) |
2127 | OpsStepConstants.push_back( |
2128 | Elt: getConstant(Val: StepVal * i, DL, VT: ResVT.getVectorElementType())); |
2129 | return getBuildVector(VT: ResVT, DL, Ops: OpsStepConstants); |
2130 | } |
2131 | |
2132 | /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that |
2133 | /// point at N1 to point at N2 and indices that point at N2 to point at N1. |
2134 | static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { |
2135 | std::swap(a&: N1, b&: N2); |
2136 | ShuffleVectorSDNode::commuteMask(Mask: M); |
2137 | } |
2138 | |
2139 | SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, |
2140 | SDValue N2, ArrayRef<int> Mask) { |
2141 | assert(VT.getVectorNumElements() == Mask.size() && |
2142 | "Must have the same number of vector elements as mask elements!"); |
2143 | assert(VT == N1.getValueType() && VT == N2.getValueType() && |
2144 | "Invalid VECTOR_SHUFFLE"); |
2145 | |
2146 | // Canonicalize shuffle undef, undef -> undef |
2147 | if (N1.isUndef() && N2.isUndef()) |
2148 | return getUNDEF(VT); |
2149 | |
2150 | // Validate that all indices in Mask are within the range of the elements |
2151 | // input to the shuffle. |
2152 | int NElts = Mask.size(); |
2153 | assert(llvm::all_of(Mask, |
2154 | [&](int M) { return M < (NElts * 2) && M >= -1; }) && |
2155 | "Index out of range"); |
2156 | |
2157 | // Copy the mask so we can do any needed cleanup. |
2158 | SmallVector<int, 8> MaskVec(Mask); |
2159 | |
2160 | // Canonicalize shuffle v, v -> v, undef |
2161 | if (N1 == N2) { |
2162 | N2 = getUNDEF(VT); |
2163 | for (int i = 0; i != NElts; ++i) |
2164 | if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; |
2165 | } |
2166 | |
2167 | // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. |
2168 | if (N1.isUndef()) |
2169 | commuteShuffle(N1, N2, M: MaskVec); |
2170 | |
2171 | if (TLI->hasVectorBlend()) { |
2172 | // If shuffling a splat, try to blend the splat instead. We do this here so |
2173 | // that even when this arises during lowering we don't have to re-handle it. |
2174 | auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { |
2175 | BitVector UndefElements; |
2176 | SDValue Splat = BV->getSplatValue(UndefElements: &UndefElements); |
2177 | if (!Splat) |
2178 | return; |
2179 | |
2180 | for (int i = 0; i < NElts; ++i) { |
2181 | if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) |
2182 | continue; |
2183 | |
2184 | // If this input comes from undef, mark it as such. |
2185 | if (UndefElements[MaskVec[i] - Offset]) { |
2186 | MaskVec[i] = -1; |
2187 | continue; |
2188 | } |
2189 | |
2190 | // If we can blend a non-undef lane, use that instead. |
2191 | if (!UndefElements[i]) |
2192 | MaskVec[i] = i + Offset; |
2193 | } |
2194 | }; |
2195 | if (auto *N1BV = dyn_cast<BuildVectorSDNode>(Val&: N1)) |
2196 | BlendSplat(N1BV, 0); |
2197 | if (auto *N2BV = dyn_cast<BuildVectorSDNode>(Val&: N2)) |
2198 | BlendSplat(N2BV, NElts); |
2199 | } |
2200 | |
2201 | // Canonicalize all index into lhs, -> shuffle lhs, undef |
2202 | // Canonicalize all index into rhs, -> shuffle rhs, undef |
2203 | bool AllLHS = true, AllRHS = true; |
2204 | bool N2Undef = N2.isUndef(); |
2205 | for (int i = 0; i != NElts; ++i) { |
2206 | if (MaskVec[i] >= NElts) { |
2207 | if (N2Undef) |
2208 | MaskVec[i] = -1; |
2209 | else |
2210 | AllLHS = false; |
2211 | } else if (MaskVec[i] >= 0) { |
2212 | AllRHS = false; |
2213 | } |
2214 | } |
2215 | if (AllLHS && AllRHS) |
2216 | return getUNDEF(VT); |
2217 | if (AllLHS && !N2Undef) |
2218 | N2 = getUNDEF(VT); |
2219 | if (AllRHS) { |
2220 | N1 = getUNDEF(VT); |
2221 | commuteShuffle(N1, N2, M: MaskVec); |
2222 | } |
2223 | // Reset our undef status after accounting for the mask. |
2224 | N2Undef = N2.isUndef(); |
2225 | // Re-check whether both sides ended up undef. |
2226 | if (N1.isUndef() && N2Undef) |
2227 | return getUNDEF(VT); |
2228 | |
2229 | // If Identity shuffle return that node. |
2230 | bool Identity = true, AllSame = true; |
2231 | for (int i = 0; i != NElts; ++i) { |
2232 | if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; |
2233 | if (MaskVec[i] != MaskVec[0]) AllSame = false; |
2234 | } |
2235 | if (Identity && NElts) |
2236 | return N1; |
2237 | |
2238 | // Shuffling a constant splat doesn't change the result. |
2239 | if (N2Undef) { |
2240 | SDValue V = N1; |
2241 | |
2242 | // Look through any bitcasts. We check that these don't change the number |
2243 | // (and size) of elements and just changes their types. |
2244 | while (V.getOpcode() == ISD::BITCAST) |
2245 | V = V->getOperand(Num: 0); |
2246 | |
2247 | // A splat should always show up as a build vector node. |
2248 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) { |
2249 | BitVector UndefElements; |
2250 | SDValue Splat = BV->getSplatValue(UndefElements: &UndefElements); |
2251 | // If this is a splat of an undef, shuffling it is also undef. |
2252 | if (Splat && Splat.isUndef()) |
2253 | return getUNDEF(VT); |
2254 | |
2255 | bool SameNumElts = |
2256 | V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); |
2257 | |
2258 | // We only have a splat which can skip shuffles if there is a splatted |
2259 | // value and no undef lanes rearranged by the shuffle. |
2260 | if (Splat && UndefElements.none()) { |
2261 | // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the |
2262 | // number of elements match or the value splatted is a zero constant. |
2263 | if (SameNumElts || isNullConstant(V: Splat)) |
2264 | return N1; |
2265 | } |
2266 | |
2267 | // If the shuffle itself creates a splat, build the vector directly. |
2268 | if (AllSame && SameNumElts) { |
2269 | EVT BuildVT = BV->getValueType(ResNo: 0); |
2270 | const SDValue &Splatted = BV->getOperand(Num: MaskVec[0]); |
2271 | SDValue NewBV = getSplatBuildVector(VT: BuildVT, DL: dl, Op: Splatted); |
2272 | |
2273 | // We may have jumped through bitcasts, so the type of the |
2274 | // BUILD_VECTOR may not match the type of the shuffle. |
2275 | if (BuildVT != VT) |
2276 | NewBV = getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: NewBV); |
2277 | return NewBV; |
2278 | } |
2279 | } |
2280 | } |
2281 | |
2282 | SDVTList VTs = getVTList(VT); |
2283 | FoldingSetNodeID ID; |
2284 | SDValue Ops[2] = { N1, N2 }; |
2285 | AddNodeIDNode(ID, OpC: ISD::VECTOR_SHUFFLE, VTList: VTs, OpList: Ops); |
2286 | for (int i = 0; i != NElts; ++i) |
2287 | ID.AddInteger(I: MaskVec[i]); |
2288 | |
2289 | void* IP = nullptr; |
2290 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) |
2291 | return SDValue(E, 0); |
2292 | |
2293 | // Allocate the mask array for the node out of the BumpPtrAllocator, since |
2294 | // SDNode doesn't have access to it. This memory will be "leaked" when |
2295 | // the node is deallocated, but recovered when the NodeAllocator is released. |
2296 | int *MaskAlloc = OperandAllocator.Allocate<int>(Num: NElts); |
2297 | llvm::copy(Range&: MaskVec, Out: MaskAlloc); |
2298 | |
2299 | auto *N = newSDNode<ShuffleVectorSDNode>(Args&: VTs, Args: dl.getIROrder(), |
2300 | Args: dl.getDebugLoc(), Args&: MaskAlloc); |
2301 | createOperands(Node: N, Vals: Ops); |
2302 | |
2303 | CSEMap.InsertNode(N, InsertPos: IP); |
2304 | InsertNode(N); |
2305 | SDValue V = SDValue(N, 0); |
2306 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
2307 | return V; |
2308 | } |
2309 | |
2310 | SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { |
2311 | EVT VT = SV.getValueType(ResNo: 0); |
2312 | SmallVector<int, 8> MaskVec(SV.getMask()); |
2313 | ShuffleVectorSDNode::commuteMask(Mask: MaskVec); |
2314 | |
2315 | SDValue Op0 = SV.getOperand(Num: 0); |
2316 | SDValue Op1 = SV.getOperand(Num: 1); |
2317 | return getVectorShuffle(VT, dl: SDLoc(&SV), N1: Op1, N2: Op0, Mask: MaskVec); |
2318 | } |
2319 | |
2320 | SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { |
2321 | SDVTList VTs = getVTList(VT); |
2322 | FoldingSetNodeID ID; |
2323 | AddNodeIDNode(ID, OpC: ISD::Register, VTList: VTs, OpList: {}); |
2324 | ID.AddInteger(I: Reg.id()); |
2325 | void *IP = nullptr; |
2326 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2327 | return SDValue(E, 0); |
2328 | |
2329 | auto *N = newSDNode<RegisterSDNode>(Args&: Reg, Args&: VTs); |
2330 | N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); |
2331 | CSEMap.InsertNode(N, InsertPos: IP); |
2332 | InsertNode(N); |
2333 | return SDValue(N, 0); |
2334 | } |
2335 | |
2336 | SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { |
2337 | FoldingSetNodeID ID; |
2338 | AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), {}); |
2339 | ID.AddPointer(Ptr: RegMask); |
2340 | void *IP = nullptr; |
2341 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2342 | return SDValue(E, 0); |
2343 | |
2344 | auto *N = newSDNode<RegisterMaskSDNode>(Args&: RegMask); |
2345 | CSEMap.InsertNode(N, InsertPos: IP); |
2346 | InsertNode(N); |
2347 | return SDValue(N, 0); |
2348 | } |
2349 | |
2350 | SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, |
2351 | MCSymbol *Label) { |
2352 | return getLabelNode(Opcode: ISD::EH_LABEL, dl, Root, Label); |
2353 | } |
2354 | |
2355 | SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, |
2356 | SDValue Root, MCSymbol *Label) { |
2357 | FoldingSetNodeID ID; |
2358 | SDValue Ops[] = { Root }; |
2359 | AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); |
2360 | ID.AddPointer(Ptr: Label); |
2361 | void *IP = nullptr; |
2362 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2363 | return SDValue(E, 0); |
2364 | |
2365 | auto *N = |
2366 | newSDNode<LabelSDNode>(Args&: Opcode, Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: Label); |
2367 | createOperands(Node: N, Vals: Ops); |
2368 | |
2369 | CSEMap.InsertNode(N, InsertPos: IP); |
2370 | InsertNode(N); |
2371 | return SDValue(N, 0); |
2372 | } |
2373 | |
2374 | SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, |
2375 | int64_t Offset, bool isTarget, |
2376 | unsigned TargetFlags) { |
2377 | unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; |
2378 | SDVTList VTs = getVTList(VT); |
2379 | |
2380 | FoldingSetNodeID ID; |
2381 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: {}); |
2382 | ID.AddPointer(Ptr: BA); |
2383 | ID.AddInteger(I: Offset); |
2384 | ID.AddInteger(I: TargetFlags); |
2385 | void *IP = nullptr; |
2386 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2387 | return SDValue(E, 0); |
2388 | |
2389 | auto *N = newSDNode<BlockAddressSDNode>(Args&: Opc, Args&: VTs, Args&: BA, Args&: Offset, Args&: TargetFlags); |
2390 | CSEMap.InsertNode(N, InsertPos: IP); |
2391 | InsertNode(N); |
2392 | return SDValue(N, 0); |
2393 | } |
2394 | |
2395 | SDValue SelectionDAG::getSrcValue(const Value *V) { |
2396 | FoldingSetNodeID ID; |
2397 | AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), {}); |
2398 | ID.AddPointer(Ptr: V); |
2399 | |
2400 | void *IP = nullptr; |
2401 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2402 | return SDValue(E, 0); |
2403 | |
2404 | auto *N = newSDNode<SrcValueSDNode>(Args&: V); |
2405 | CSEMap.InsertNode(N, InsertPos: IP); |
2406 | InsertNode(N); |
2407 | return SDValue(N, 0); |
2408 | } |
2409 | |
2410 | SDValue SelectionDAG::getMDNode(const MDNode *MD) { |
2411 | FoldingSetNodeID ID; |
2412 | AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), {}); |
2413 | ID.AddPointer(Ptr: MD); |
2414 | |
2415 | void *IP = nullptr; |
2416 | if (SDNode *E = FindNodeOrInsertPos(ID, InsertPos&: IP)) |
2417 | return SDValue(E, 0); |
2418 | |
2419 | auto *N = newSDNode<MDNodeSDNode>(Args&: MD); |
2420 | CSEMap.InsertNode(N, InsertPos: IP); |
2421 | InsertNode(N); |
2422 | return SDValue(N, 0); |
2423 | } |
2424 | |
2425 | SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { |
2426 | if (VT == V.getValueType()) |
2427 | return V; |
2428 | |
2429 | return getNode(Opcode: ISD::BITCAST, DL: SDLoc(V), VT, Operand: V); |
2430 | } |
2431 | |
2432 | SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, |
2433 | unsigned SrcAS, unsigned DestAS) { |
2434 | SDVTList VTs = getVTList(VT); |
2435 | SDValue Ops[] = {Ptr}; |
2436 | FoldingSetNodeID ID; |
2437 | AddNodeIDNode(ID, OpC: ISD::ADDRSPACECAST, VTList: VTs, OpList: Ops); |
2438 | ID.AddInteger(I: SrcAS); |
2439 | ID.AddInteger(I: DestAS); |
2440 | |
2441 | void *IP = nullptr; |
2442 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) |
2443 | return SDValue(E, 0); |
2444 | |
2445 | auto *N = newSDNode<AddrSpaceCastSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), |
2446 | Args&: VTs, Args&: SrcAS, Args&: DestAS); |
2447 | createOperands(Node: N, Vals: Ops); |
2448 | |
2449 | CSEMap.InsertNode(N, InsertPos: IP); |
2450 | InsertNode(N); |
2451 | return SDValue(N, 0); |
2452 | } |
2453 | |
2454 | SDValue SelectionDAG::getFreeze(SDValue V) { |
2455 | return getNode(Opcode: ISD::FREEZE, DL: SDLoc(V), VT: V.getValueType(), Operand: V); |
2456 | } |
2457 | |
2458 | /// getShiftAmountOperand - Return the specified value casted to |
2459 | /// the target's desired shift amount type. |
2460 | SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { |
2461 | EVT OpTy = Op.getValueType(); |
2462 | EVT ShTy = TLI->getShiftAmountTy(LHSTy, DL: getDataLayout()); |
2463 | if (OpTy == ShTy || OpTy.isVector()) return Op; |
2464 | |
2465 | return getZExtOrTrunc(Op, DL: SDLoc(Op), VT: ShTy); |
2466 | } |
2467 | |
2468 | /// Given a store node \p StoreNode, return true if it is safe to fold that node |
2469 | /// into \p FPNode, which expands to a library call with output pointers. |
2470 | static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, |
2471 | SDNode *FPNode) { |
2472 | SmallVector<const SDNode *, 8> Worklist; |
2473 | SmallVector<const SDNode *, 8> DeferredNodes; |
2474 | SmallPtrSet<const SDNode *, 16> Visited; |
2475 | |
2476 | // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). |
2477 | for (SDValue Op : StoreNode->ops()) |
2478 | if (Op.getNode() != FPNode) |
2479 | Worklist.push_back(Elt: Op.getNode()); |
2480 | |
2481 | unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); |
2482 | while (!Worklist.empty()) { |
2483 | const SDNode *Node = Worklist.pop_back_val(); |
2484 | auto [_, Inserted] = Visited.insert(Ptr: Node); |
2485 | if (!Inserted) |
2486 | continue; |
2487 | |
2488 | if (MaxSteps > 0 && Visited.size() >= MaxSteps) |
2489 | return false; |
2490 | |
2491 | // Reached the FPNode (would result in a cycle). |
2492 | // OR Reached CALLSEQ_START (would result in nested call sequences). |
2493 | if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) |
2494 | return false; |
2495 | |
2496 | if (Node->getOpcode() == ISD::CALLSEQ_END) { |
2497 | // Defer looking into call sequences (so we can check we're outside one). |
2498 | // We still need to look through these for the predecessor check. |
2499 | DeferredNodes.push_back(Elt: Node); |
2500 | continue; |
2501 | } |
2502 | |
2503 | for (SDValue Op : Node->ops()) |
2504 | Worklist.push_back(Elt: Op.getNode()); |
2505 | } |
2506 | |
2507 | // True if we're outside a call sequence and don't have the FPNode as a |
2508 | // predecessor. No cycles or nested call sequences possible. |
2509 | return !SDNode::hasPredecessorHelper(N: FPNode, Visited, Worklist&: DeferredNodes, |
2510 | MaxSteps); |
2511 | } |
2512 | |
2513 | bool SelectionDAG::expandMultipleResultFPLibCall( |
2514 | RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, |
2515 | std::optional<unsigned> CallRetResNo) { |
2516 | LLVMContext &Ctx = *getContext(); |
2517 | EVT VT = Node->getValueType(ResNo: 0); |
2518 | unsigned NumResults = Node->getNumValues(); |
2519 | |
2520 | const char *LCName = TLI->getLibcallName(Call: LC); |
2521 | if (!LC || !LCName) |
2522 | return false; |
2523 | |
2524 | auto getVecDesc = [&]() -> VecDesc const * { |
2525 | for (bool Masked : {false, true}) { |
2526 | if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( |
2527 | F: LCName, VF: VT.getVectorElementCount(), Masked)) { |
2528 | return VD; |
2529 | } |
2530 | } |
2531 | return nullptr; |
2532 | }; |
2533 | |
2534 | // For vector types, we must find a vector mapping for the libcall. |
2535 | VecDesc const *VD = nullptr; |
2536 | if (VT.isVector() && !(VD = getVecDesc())) |
2537 | return false; |
2538 | |
2539 | // Find users of the node that store the results (and share input chains). The |
2540 | // destination pointers can be used instead of creating stack allocations. |
2541 | SDValue StoresInChain; |
2542 | SmallVector<StoreSDNode *, 2> ResultStores(NumResults); |
2543 | for (SDNode *User : Node->users()) { |
2544 | if (!ISD::isNormalStore(N: User)) |
2545 | continue; |
2546 | auto *ST = cast<StoreSDNode>(Val: User); |
2547 | SDValue StoreValue = ST->getValue(); |
2548 | unsigned ResNo = StoreValue.getResNo(); |
2549 | // Ensure the store corresponds to an output pointer. |
2550 | if (CallRetResNo == ResNo) |
2551 | continue; |
2552 | // Ensure the store to the default address space and not atomic or volatile. |
2553 | if (!ST->isSimple() || ST->getAddressSpace() != 0) |
2554 | continue; |
2555 | // Ensure all store chains are the same (so they don't alias). |
2556 | if (StoresInChain && ST->getChain() != StoresInChain) |
2557 | continue; |
2558 | // Ensure the store is properly aligned. |
2559 | Type *StoreType = StoreValue.getValueType().getTypeForEVT(Context&: Ctx); |
2560 | if (ST->getAlign() < |
2561 | getDataLayout().getABITypeAlign(Ty: StoreType->getScalarType())) |
2562 | continue; |
2563 | // Avoid: |
2564 | // 1. Creating cyclic dependencies. |
2565 | // 2. Expanding the node to a call within a call sequence. |
2566 | if (!canFoldStoreIntoLibCallOutputPointers(StoreNode: ST, FPNode: Node)) |
2567 | continue; |
2568 | ResultStores[ResNo] = ST; |
2569 | StoresInChain = ST->getChain(); |
2570 | } |
2571 | |
2572 | TargetLowering::ArgListTy Args; |
2573 | auto AddArgListEntry = [&](SDValue Node, Type *Ty) { |
2574 | TargetLowering::ArgListEntry Entry{}; |
2575 | Entry.Ty = Ty; |
2576 | Entry.Node = Node; |
2577 | Args.push_back(x: Entry); |
2578 | }; |
2579 | |
2580 | // Pass the arguments. |
2581 | for (const SDValue &Op : Node->op_values()) { |
2582 | EVT ArgVT = Op.getValueType(); |
2583 | Type *ArgTy = ArgVT.getTypeForEVT(Context&: Ctx); |
2584 | AddArgListEntry(Op, ArgTy); |
2585 | } |
2586 | |
2587 | // Pass the output pointers. |
2588 | SmallVector<SDValue, 2> ResultPtrs(NumResults); |
2589 | Type *PointerTy = PointerType::getUnqual(C&: Ctx); |
2590 | for (auto [ResNo, ST] : llvm::enumerate(First&: ResultStores)) { |
2591 | if (ResNo == CallRetResNo) |
2592 | continue; |
2593 | EVT ResVT = Node->getValueType(ResNo); |
2594 | SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(VT: ResVT); |
2595 | ResultPtrs[ResNo] = ResultPtr; |
2596 | AddArgListEntry(ResultPtr, PointerTy); |
2597 | } |
2598 | |
2599 | SDLoc DL(Node); |
2600 | |
2601 | // Pass the vector mask (if required). |
2602 | if (VD && VD->isMasked()) { |
2603 | EVT MaskVT = TLI->getSetCCResultType(DL: getDataLayout(), Context&: Ctx, VT); |
2604 | SDValue Mask = getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT); |
2605 | AddArgListEntry(Mask, MaskVT.getTypeForEVT(Context&: Ctx)); |
2606 | } |
2607 | |
2608 | Type *RetType = CallRetResNo.has_value() |
2609 | ? Node->getValueType(ResNo: *CallRetResNo).getTypeForEVT(Context&: Ctx) |
2610 | : Type::getVoidTy(C&: Ctx); |
2611 | SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); |
2612 | SDValue Callee = getExternalSymbol(Sym: VD ? VD->getVectorFnName().data() : LCName, |
2613 | VT: TLI->getPointerTy(DL: getDataLayout())); |
2614 | TargetLowering::CallLoweringInfo CLI(*this); |
2615 | CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( |
2616 | CC: TLI->getLibcallCallingConv(Call: LC), ResultType: RetType, Target: Callee, ArgsList: std::move(Args)); |
2617 | |
2618 | auto [Call, CallChain] = TLI->LowerCallTo(CLI); |
2619 | |
2620 | for (auto [ResNo, ResultPtr] : llvm::enumerate(First&: ResultPtrs)) { |
2621 | if (ResNo == CallRetResNo) { |
2622 | Results.push_back(Elt: Call); |
2623 | continue; |
2624 | } |
2625 | MachinePointerInfo PtrInfo; |
2626 | SDValue LoadResult = |
2627 | getLoad(VT: Node->getValueType(ResNo), dl: DL, Chain: CallChain, Ptr: ResultPtr, PtrInfo); |
2628 | SDValue OutChain = LoadResult.getValue(R: 1); |
2629 | |
2630 | if (StoreSDNode *ST = ResultStores[ResNo]) { |
2631 | // Replace store with the library call. |
2632 | ReplaceAllUsesOfValueWith(From: SDValue(ST, 0), To: OutChain); |
2633 | PtrInfo = ST->getPointerInfo(); |
2634 | } else { |
2635 | PtrInfo = MachinePointerInfo::getFixedStack( |
2636 | MF&: getMachineFunction(), FI: cast<FrameIndexSDNode>(Val&: ResultPtr)->getIndex()); |
2637 | } |
2638 | |
2639 | Results.push_back(Elt: LoadResult); |
2640 | } |
2641 | |
2642 | return true; |
2643 | } |
2644 | |
2645 | SDValue SelectionDAG::expandVAArg(SDNode *Node) { |
2646 | SDLoc dl(Node); |
2647 | const TargetLowering &TLI = getTargetLoweringInfo(); |
2648 | const Value *V = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue(); |
2649 | EVT VT = Node->getValueType(ResNo: 0); |
2650 | SDValue Tmp1 = Node->getOperand(Num: 0); |
2651 | SDValue Tmp2 = Node->getOperand(Num: 1); |
2652 | const MaybeAlign MA(Node->getConstantOperandVal(Num: 3)); |
2653 | |
2654 | SDValue VAListLoad = getLoad(VT: TLI.getPointerTy(DL: getDataLayout()), dl, Chain: Tmp1, |
2655 | Ptr: Tmp2, PtrInfo: MachinePointerInfo(V)); |
2656 | SDValue VAList = VAListLoad; |
2657 | |
2658 | if (MA && *MA > TLI.getMinStackArgumentAlignment()) { |
2659 | VAList = getNode(Opcode: ISD::ADD, DL: dl, VT: VAList.getValueType(), N1: VAList, |
2660 | N2: getConstant(Val: MA->value() - 1, DL: dl, VT: VAList.getValueType())); |
2661 | |
2662 | VAList = getNode( |
2663 | Opcode: ISD::AND, DL: dl, VT: VAList.getValueType(), N1: VAList, |
2664 | N2: getSignedConstant(Val: -(int64_t)MA->value(), DL: dl, VT: VAList.getValueType())); |
2665 | } |
2666 | |
2667 | // Increment the pointer, VAList, to the next vaarg |
2668 | Tmp1 = getNode(Opcode: ISD::ADD, DL: dl, VT: VAList.getValueType(), N1: VAList, |
2669 | N2: getConstant(Val: getDataLayout().getTypeAllocSize( |
2670 | Ty: VT.getTypeForEVT(Context&: *getContext())), |
2671 | DL: dl, VT: VAList.getValueType())); |
2672 | // Store the incremented VAList to the legalized pointer |
2673 | Tmp1 = |
2674 | getStore(Chain: VAListLoad.getValue(R: 1), dl, Val: Tmp1, Ptr: Tmp2, PtrInfo: MachinePointerInfo(V)); |
2675 | // Load the actual argument out of the pointer VAList |
2676 | return getLoad(VT, dl, Chain: Tmp1, Ptr: VAList, PtrInfo: MachinePointerInfo()); |
2677 | } |
2678 | |
2679 | SDValue SelectionDAG::expandVACopy(SDNode *Node) { |
2680 | SDLoc dl(Node); |
2681 | const TargetLowering &TLI = getTargetLoweringInfo(); |
2682 | // This defaults to loading a pointer from the input and storing it to the |
2683 | // output, returning the chain. |
2684 | const Value *VD = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 3))->getValue(); |
2685 | const Value *VS = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 4))->getValue(); |
2686 | SDValue Tmp1 = |
2687 | getLoad(VT: TLI.getPointerTy(DL: getDataLayout()), dl, Chain: Node->getOperand(Num: 0), |
2688 | Ptr: Node->getOperand(Num: 2), PtrInfo: MachinePointerInfo(VS)); |
2689 | return getStore(Chain: Tmp1.getValue(R: 1), dl, Val: Tmp1, Ptr: Node->getOperand(Num: 1), |
2690 | PtrInfo: MachinePointerInfo(VD)); |
2691 | } |
2692 | |
2693 | Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { |
2694 | const DataLayout &DL = getDataLayout(); |
2695 | Type *Ty = VT.getTypeForEVT(Context&: *getContext()); |
2696 | Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); |
2697 | |
2698 | if (TLI->isTypeLegal(VT) || !VT.isVector()) |
2699 | return RedAlign; |
2700 | |
2701 | const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); |
2702 | const Align StackAlign = TFI->getStackAlign(); |
2703 | |
2704 | // See if we can choose a smaller ABI alignment in cases where it's an |
2705 | // illegal vector type that will get broken down. |
2706 | if (RedAlign > StackAlign) { |
2707 | EVT IntermediateVT; |
2708 | MVT RegisterVT; |
2709 | unsigned NumIntermediates; |
2710 | TLI->getVectorTypeBreakdown(Context&: *getContext(), VT, IntermediateVT, |
2711 | NumIntermediates, RegisterVT); |
2712 | Ty = IntermediateVT.getTypeForEVT(Context&: *getContext()); |
2713 | Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); |
2714 | if (RedAlign2 < RedAlign) |
2715 | RedAlign = RedAlign2; |
2716 | |
2717 | if (!getMachineFunction().getFrameInfo().isStackRealignable()) |
2718 | // If the stack is not realignable, the alignment should be limited to the |
2719 | // StackAlignment |
2720 | RedAlign = std::min(a: RedAlign, b: StackAlign); |
2721 | } |
2722 | |
2723 | return RedAlign; |
2724 | } |
2725 | |
2726 | SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { |
2727 | MachineFrameInfo &MFI = MF->getFrameInfo(); |
2728 | const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); |
2729 | int StackID = 0; |
2730 | if (Bytes.isScalable()) |
2731 | StackID = TFI->getStackIDForScalableVectors(); |
2732 | // The stack id gives an indication of whether the object is scalable or |
2733 | // not, so it's safe to pass in the minimum size here. |
2734 | int FrameIdx = MFI.CreateStackObject(Size: Bytes.getKnownMinValue(), Alignment, |
2735 | isSpillSlot: false, Alloca: nullptr, ID: StackID); |
2736 | return getFrameIndex(FI: FrameIdx, VT: TLI->getFrameIndexTy(DL: getDataLayout())); |
2737 | } |
2738 | |
2739 | SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { |
2740 | Type *Ty = VT.getTypeForEVT(Context&: *getContext()); |
2741 | Align StackAlign = |
2742 | std::max(a: getDataLayout().getPrefTypeAlign(Ty), b: Align(minAlign)); |
2743 | return CreateStackTemporary(Bytes: VT.getStoreSize(), Alignment: StackAlign); |
2744 | } |
2745 | |
2746 | SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { |
2747 | TypeSize VT1Size = VT1.getStoreSize(); |
2748 | TypeSize VT2Size = VT2.getStoreSize(); |
2749 | assert(VT1Size.isScalable() == VT2Size.isScalable() && |
2750 | "Don't know how to choose the maximum size when creating a stack " |
2751 | "temporary"); |
2752 | TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue() |
2753 | ? VT1Size |
2754 | : VT2Size; |
2755 | |
2756 | Type *Ty1 = VT1.getTypeForEVT(Context&: *getContext()); |
2757 | Type *Ty2 = VT2.getTypeForEVT(Context&: *getContext()); |
2758 | const DataLayout &DL = getDataLayout(); |
2759 | Align Align = std::max(a: DL.getPrefTypeAlign(Ty: Ty1), b: DL.getPrefTypeAlign(Ty: Ty2)); |
2760 | return CreateStackTemporary(Bytes, Alignment: Align); |
2761 | } |
2762 | |
2763 | SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, |
2764 | ISD::CondCode Cond, const SDLoc &dl) { |
2765 | EVT OpVT = N1.getValueType(); |
2766 | |
2767 | auto GetUndefBooleanConstant = [&]() { |
2768 | if (VT.getScalarType() == MVT::i1 || |
2769 | TLI->getBooleanContents(Type: OpVT) == |
2770 | TargetLowering::UndefinedBooleanContent) |
2771 | return getUNDEF(VT); |
2772 | // ZeroOrOne / ZeroOrNegative require specific values for the high bits, |
2773 | // so we cannot use getUNDEF(). Return zero instead. |
2774 | return getConstant(Val: 0, DL: dl, VT); |
2775 | }; |
2776 | |
2777 | // These setcc operations always fold. |
2778 | switch (Cond) { |
2779 | default: break; |
2780 | case ISD::SETFALSE: |
2781 | case ISD::SETFALSE2: return getBoolConstant(V: false, DL: dl, VT, OpVT); |
2782 | case ISD::SETTRUE: |
2783 | case ISD::SETTRUE2: return getBoolConstant(V: true, DL: dl, VT, OpVT); |
2784 | |
2785 | case ISD::SETOEQ: |
2786 | case ISD::SETOGT: |
2787 | case ISD::SETOGE: |
2788 | case ISD::SETOLT: |
2789 | case ISD::SETOLE: |
2790 | case ISD::SETONE: |
2791 | case ISD::SETO: |
2792 | case ISD::SETUO: |
2793 | case ISD::SETUEQ: |
2794 | case ISD::SETUNE: |
2795 | assert(!OpVT.isInteger() && "Illegal setcc for integer!"); |
2796 | break; |
2797 | } |
2798 | |
2799 | if (OpVT.isInteger()) { |
2800 | // For EQ and NE, we can always pick a value for the undef to make the |
2801 | // predicate pass or fail, so we can return undef. |
2802 | // Matches behavior in llvm::ConstantFoldCompareInstruction. |
2803 | // icmp eq/ne X, undef -> undef. |
2804 | if ((N1.isUndef() || N2.isUndef()) && |
2805 | (Cond == ISD::SETEQ || Cond == ISD::SETNE)) |
2806 | return GetUndefBooleanConstant(); |
2807 | |
2808 | // If both operands are undef, we can return undef for int comparison. |
2809 | // icmp undef, undef -> undef. |
2810 | if (N1.isUndef() && N2.isUndef()) |
2811 | return GetUndefBooleanConstant(); |
2812 | |
2813 | // icmp X, X -> true/false |
2814 | // icmp X, undef -> true/false because undef could be X. |
2815 | if (N1.isUndef() || N2.isUndef() || N1 == N2) |
2816 | return getBoolConstant(V: ISD::isTrueWhenEqual(Cond), DL: dl, VT, OpVT); |
2817 | } |
2818 | |
2819 | if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2)) { |
2820 | const APInt &C2 = N2C->getAPIntValue(); |
2821 | if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Val&: N1)) { |
2822 | const APInt &C1 = N1C->getAPIntValue(); |
2823 | |
2824 | return getBoolConstant(V: ICmpInst::compare(LHS: C1, RHS: C2, Pred: getICmpCondCode(Pred: Cond)), |
2825 | DL: dl, VT, OpVT); |
2826 | } |
2827 | } |
2828 | |
2829 | auto *N1CFP = dyn_cast<ConstantFPSDNode>(Val&: N1); |
2830 | auto *N2CFP = dyn_cast<ConstantFPSDNode>(Val&: N2); |
2831 | |
2832 | if (N1CFP && N2CFP) { |
2833 | APFloat::cmpResult R = N1CFP->getValueAPF().compare(RHS: N2CFP->getValueAPF()); |
2834 | switch (Cond) { |
2835 | default: break; |
2836 | case ISD::SETEQ: if (R==APFloat::cmpUnordered) |
2837 | return GetUndefBooleanConstant(); |
2838 | [[fallthrough]]; |
2839 | case ISD::SETOEQ: return getBoolConstant(V: R==APFloat::cmpEqual, DL: dl, VT, |
2840 | OpVT); |
2841 | case ISD::SETNE: if (R==APFloat::cmpUnordered) |
2842 | return GetUndefBooleanConstant(); |
2843 | [[fallthrough]]; |
2844 | case ISD::SETONE: return getBoolConstant(V: R==APFloat::cmpGreaterThan || |
2845 | R==APFloat::cmpLessThan, DL: dl, VT, |
2846 | OpVT); |
2847 | case ISD::SETLT: if (R==APFloat::cmpUnordered) |
2848 | return GetUndefBooleanConstant(); |
2849 | [[fallthrough]]; |
2850 | case ISD::SETOLT: return getBoolConstant(V: R==APFloat::cmpLessThan, DL: dl, VT, |
2851 | OpVT); |
2852 | case ISD::SETGT: if (R==APFloat::cmpUnordered) |
2853 | return GetUndefBooleanConstant(); |
2854 | [[fallthrough]]; |
2855 | case ISD::SETOGT: return getBoolConstant(V: R==APFloat::cmpGreaterThan, DL: dl, |
2856 | VT, OpVT); |
2857 | case ISD::SETLE: if (R==APFloat::cmpUnordered) |
2858 | return GetUndefBooleanConstant(); |
2859 | [[fallthrough]]; |
2860 | case ISD::SETOLE: return getBoolConstant(V: R==APFloat::cmpLessThan || |
2861 | R==APFloat::cmpEqual, DL: dl, VT, |
2862 | OpVT); |
2863 | case ISD::SETGE: if (R==APFloat::cmpUnordered) |
2864 | return GetUndefBooleanConstant(); |
2865 | [[fallthrough]]; |
2866 | case ISD::SETOGE: return getBoolConstant(V: R==APFloat::cmpGreaterThan || |
2867 | R==APFloat::cmpEqual, DL: dl, VT, OpVT); |
2868 | case ISD::SETO: return getBoolConstant(V: R!=APFloat::cmpUnordered, DL: dl, VT, |
2869 | OpVT); |
2870 | case ISD::SETUO: return getBoolConstant(V: R==APFloat::cmpUnordered, DL: dl, VT, |
2871 | OpVT); |
2872 | case ISD::SETUEQ: return getBoolConstant(V: R==APFloat::cmpUnordered || |
2873 | R==APFloat::cmpEqual, DL: dl, VT, |
2874 | OpVT); |
2875 | case ISD::SETUNE: return getBoolConstant(V: R!=APFloat::cmpEqual, DL: dl, VT, |
2876 | OpVT); |
2877 | case ISD::SETULT: return getBoolConstant(V: R==APFloat::cmpUnordered || |
2878 | R==APFloat::cmpLessThan, DL: dl, VT, |
2879 | OpVT); |
2880 | case ISD::SETUGT: return getBoolConstant(V: R==APFloat::cmpGreaterThan || |
2881 | R==APFloat::cmpUnordered, DL: dl, VT, |
2882 | OpVT); |
2883 | case ISD::SETULE: return getBoolConstant(V: R!=APFloat::cmpGreaterThan, DL: dl, |
2884 | VT, OpVT); |
2885 | case ISD::SETUGE: return getBoolConstant(V: R!=APFloat::cmpLessThan, DL: dl, VT, |
2886 | OpVT); |
2887 | } |
2888 | } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { |
2889 | // Ensure that the constant occurs on the RHS. |
2890 | ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Operation: Cond); |
2891 | if (!TLI->isCondCodeLegal(CC: SwappedCond, VT: OpVT.getSimpleVT())) |
2892 | return SDValue(); |
2893 | return getSetCC(DL: dl, VT, LHS: N2, RHS: N1, Cond: SwappedCond); |
2894 | } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || |
2895 | (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { |
2896 | // If an operand is known to be a nan (or undef that could be a nan), we can |
2897 | // fold it. |
2898 | // Choosing NaN for the undef will always make unordered comparison succeed |
2899 | // and ordered comparison fails. |
2900 | // Matches behavior in llvm::ConstantFoldCompareInstruction. |
2901 | switch (ISD::getUnorderedFlavor(Cond)) { |
2902 | default: |
2903 | llvm_unreachable("Unknown flavor!"); |
2904 | case 0: // Known false. |
2905 | return getBoolConstant(V: false, DL: dl, VT, OpVT); |
2906 | case 1: // Known true. |
2907 | return getBoolConstant(V: true, DL: dl, VT, OpVT); |
2908 | case 2: // Undefined. |
2909 | return GetUndefBooleanConstant(); |
2910 | } |
2911 | } |
2912 | |
2913 | // Could not fold it. |
2914 | return SDValue(); |
2915 | } |
2916 | |
2917 | /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We |
2918 | /// use this predicate to simplify operations downstream. |
2919 | bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { |
2920 | unsigned BitWidth = Op.getScalarValueSizeInBits(); |
2921 | return MaskedValueIsZero(Op, Mask: APInt::getSignMask(BitWidth), Depth); |
2922 | } |
2923 | |
2924 | /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use |
2925 | /// this predicate to simplify operations downstream. Mask is known to be zero |
2926 | /// for bits that V cannot have. |
2927 | bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, |
2928 | unsigned Depth) const { |
2929 | return Mask.isSubsetOf(RHS: computeKnownBits(Op: V, Depth).Zero); |
2930 | } |
2931 | |
2932 | /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in |
2933 | /// DemandedElts. We use this predicate to simplify operations downstream. |
2934 | /// Mask is known to be zero for bits that V cannot have. |
2935 | bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, |
2936 | const APInt &DemandedElts, |
2937 | unsigned Depth) const { |
2938 | return Mask.isSubsetOf(RHS: computeKnownBits(Op: V, DemandedElts, Depth).Zero); |
2939 | } |
2940 | |
2941 | /// MaskedVectorIsZero - Return true if 'Op' is known to be zero in |
2942 | /// DemandedElts. We use this predicate to simplify operations downstream. |
2943 | bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts, |
2944 | unsigned Depth /* = 0 */) const { |
2945 | return computeKnownBits(Op: V, DemandedElts, Depth).isZero(); |
2946 | } |
2947 | |
2948 | /// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. |
2949 | bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, |
2950 | unsigned Depth) const { |
2951 | return Mask.isSubsetOf(RHS: computeKnownBits(Op: V, Depth).One); |
2952 | } |
2953 | |
2954 | APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op, |
2955 | const APInt &DemandedElts, |
2956 | unsigned Depth) const { |
2957 | EVT VT = Op.getValueType(); |
2958 | assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!"); |
2959 | |
2960 | unsigned NumElts = VT.getVectorNumElements(); |
2961 | assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask."); |
2962 | |
2963 | APInt KnownZeroElements = APInt::getZero(numBits: NumElts); |
2964 | for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) { |
2965 | if (!DemandedElts[EltIdx]) |
2966 | continue; // Don't query elements that are not demanded. |
2967 | APInt Mask = APInt::getOneBitSet(numBits: NumElts, BitNo: EltIdx); |
2968 | if (MaskedVectorIsZero(V: Op, DemandedElts: Mask, Depth)) |
2969 | KnownZeroElements.setBit(EltIdx); |
2970 | } |
2971 | return KnownZeroElements; |
2972 | } |
2973 | |
2974 | /// isSplatValue - Return true if the vector V has the same value |
2975 | /// across all DemandedElts. For scalable vectors, we don't know the |
2976 | /// number of lanes at compile time. Instead, we use a 1 bit APInt |
2977 | /// to represent a conservative value for all lanes; that is, that |
2978 | /// one bit value is implicitly splatted across all lanes. |
2979 | bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, |
2980 | APInt &UndefElts, unsigned Depth) const { |
2981 | unsigned Opcode = V.getOpcode(); |
2982 | EVT VT = V.getValueType(); |
2983 | assert(VT.isVector() && "Vector type expected"); |
2984 | assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) && |
2985 | "scalable demanded bits are ignored"); |
2986 | |
2987 | if (!DemandedElts) |
2988 | return false; // No demanded elts, better to assume we don't know anything. |
2989 | |
2990 | if (Depth >= MaxRecursionDepth) |
2991 | return false; // Limit search depth. |
2992 | |
2993 | // Deal with some common cases here that work for both fixed and scalable |
2994 | // vector types. |
2995 | switch (Opcode) { |
2996 | case ISD::SPLAT_VECTOR: |
2997 | UndefElts = V.getOperand(i: 0).isUndef() |
2998 | ? APInt::getAllOnes(numBits: DemandedElts.getBitWidth()) |
2999 | : APInt(DemandedElts.getBitWidth(), 0); |
3000 | return true; |
3001 | case ISD::ADD: |
3002 | case ISD::SUB: |
3003 | case ISD::AND: |
3004 | case ISD::XOR: |
3005 | case ISD::OR: { |
3006 | APInt UndefLHS, UndefRHS; |
3007 | SDValue LHS = V.getOperand(i: 0); |
3008 | SDValue RHS = V.getOperand(i: 1); |
3009 | // Only recognize splats with the same demanded undef elements for both |
3010 | // operands, otherwise we might fail to handle binop-specific undef |
3011 | // handling. |
3012 | // e.g. (and undef, 0) -> 0 etc. |
3013 | if (isSplatValue(V: LHS, DemandedElts, UndefElts&: UndefLHS, Depth: Depth + 1) && |
3014 | isSplatValue(V: RHS, DemandedElts, UndefElts&: UndefRHS, Depth: Depth + 1) && |
3015 | (DemandedElts & UndefLHS) == (DemandedElts & UndefRHS)) { |
3016 | UndefElts = UndefLHS | UndefRHS; |
3017 | return true; |
3018 | } |
3019 | return false; |
3020 | } |
3021 | case ISD::ABS: |
3022 | case ISD::TRUNCATE: |
3023 | case ISD::SIGN_EXTEND: |
3024 | case ISD::ZERO_EXTEND: |
3025 | return isSplatValue(V: V.getOperand(i: 0), DemandedElts, UndefElts, Depth: Depth + 1); |
3026 | default: |
3027 | if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || |
3028 | Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) |
3029 | return TLI->isSplatValueForTargetNode(Op: V, DemandedElts, UndefElts, DAG: *this, |
3030 | Depth); |
3031 | break; |
3032 | } |
3033 | |
3034 | // We don't support other cases than those above for scalable vectors at |
3035 | // the moment. |
3036 | if (VT.isScalableVector()) |
3037 | return false; |
3038 | |
3039 | unsigned NumElts = VT.getVectorNumElements(); |
3040 | assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); |
3041 | UndefElts = APInt::getZero(numBits: NumElts); |
3042 | |
3043 | switch (Opcode) { |
3044 | case ISD::BUILD_VECTOR: { |
3045 | SDValue Scl; |
3046 | for (unsigned i = 0; i != NumElts; ++i) { |
3047 | SDValue Op = V.getOperand(i); |
3048 | if (Op.isUndef()) { |
3049 | UndefElts.setBit(i); |
3050 | continue; |
3051 | } |
3052 | if (!DemandedElts[i]) |
3053 | continue; |
3054 | if (Scl && Scl != Op) |
3055 | return false; |
3056 | Scl = Op; |
3057 | } |
3058 | return true; |
3059 | } |
3060 | case ISD::VECTOR_SHUFFLE: { |
3061 | // Check if this is a shuffle node doing a splat or a shuffle of a splat. |
3062 | APInt DemandedLHS = APInt::getZero(numBits: NumElts); |
3063 | APInt DemandedRHS = APInt::getZero(numBits: NumElts); |
3064 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val&: V)->getMask(); |
3065 | for (int i = 0; i != (int)NumElts; ++i) { |
3066 | int M = Mask[i]; |
3067 | if (M < 0) { |
3068 | UndefElts.setBit(i); |
3069 | continue; |
3070 | } |
3071 | if (!DemandedElts[i]) |
3072 | continue; |
3073 | if (M < (int)NumElts) |
3074 | DemandedLHS.setBit(M); |
3075 | else |
3076 | DemandedRHS.setBit(M - NumElts); |
3077 | } |
3078 | |
3079 | // If we aren't demanding either op, assume there's no splat. |
3080 | // If we are demanding both ops, assume there's no splat. |
3081 | if ((DemandedLHS.isZero() && DemandedRHS.isZero()) || |
3082 | (!DemandedLHS.isZero() && !DemandedRHS.isZero())) |
3083 | return false; |
3084 | |
3085 | // See if the demanded elts of the source op is a splat or we only demand |
3086 | // one element, which should always be a splat. |
3087 | // TODO: Handle source ops splats with undefs. |
3088 | auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) { |
3089 | APInt SrcUndefs; |
3090 | return (SrcElts.popcount() == 1) || |
3091 | (isSplatValue(V: Src, DemandedElts: SrcElts, UndefElts&: SrcUndefs, Depth: Depth + 1) && |
3092 | (SrcElts & SrcUndefs).isZero()); |
3093 | }; |
3094 | if (!DemandedLHS.isZero()) |
3095 | return CheckSplatSrc(V.getOperand(i: 0), DemandedLHS); |
3096 | return CheckSplatSrc(V.getOperand(i: 1), DemandedRHS); |
3097 | } |
3098 | case ISD::EXTRACT_SUBVECTOR: { |
3099 | // Offset the demanded elts by the subvector index. |
3100 | SDValue Src = V.getOperand(i: 0); |
3101 | // We don't support scalable vectors at the moment. |
3102 | if (Src.getValueType().isScalableVector()) |
3103 | return false; |
3104 | uint64_t Idx = V.getConstantOperandVal(i: 1); |
3105 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
3106 | APInt UndefSrcElts; |
3107 | APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx); |
3108 | if (isSplatValue(V: Src, DemandedElts: DemandedSrcElts, UndefElts&: UndefSrcElts, Depth: Depth + 1)) { |
3109 | UndefElts = UndefSrcElts.extractBits(numBits: NumElts, bitPosition: Idx); |
3110 | return true; |
3111 | } |
3112 | break; |
3113 | } |
3114 | case ISD::ANY_EXTEND_VECTOR_INREG: |
3115 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
3116 | case ISD::ZERO_EXTEND_VECTOR_INREG: { |
3117 | // Widen the demanded elts by the src element count. |
3118 | SDValue Src = V.getOperand(i: 0); |
3119 | // We don't support scalable vectors at the moment. |
3120 | if (Src.getValueType().isScalableVector()) |
3121 | return false; |
3122 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
3123 | APInt UndefSrcElts; |
3124 | APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts); |
3125 | if (isSplatValue(V: Src, DemandedElts: DemandedSrcElts, UndefElts&: UndefSrcElts, Depth: Depth + 1)) { |
3126 | UndefElts = UndefSrcElts.trunc(width: NumElts); |
3127 | return true; |
3128 | } |
3129 | break; |
3130 | } |
3131 | case ISD::BITCAST: { |
3132 | SDValue Src = V.getOperand(i: 0); |
3133 | EVT SrcVT = Src.getValueType(); |
3134 | unsigned SrcBitWidth = SrcVT.getScalarSizeInBits(); |
3135 | unsigned BitWidth = VT.getScalarSizeInBits(); |
3136 | |
3137 | // Ignore bitcasts from unsupported types. |
3138 | // TODO: Add fp support? |
3139 | if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger()) |
3140 | break; |
3141 | |
3142 | // Bitcast 'small element' vector to 'large element' vector. |
3143 | if ((BitWidth % SrcBitWidth) == 0) { |
3144 | // See if each sub element is a splat. |
3145 | unsigned Scale = BitWidth / SrcBitWidth; |
3146 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
3147 | APInt ScaledDemandedElts = |
3148 | APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts); |
3149 | for (unsigned I = 0; I != Scale; ++I) { |
3150 | APInt SubUndefElts; |
3151 | APInt SubDemandedElt = APInt::getOneBitSet(numBits: Scale, BitNo: I); |
3152 | APInt SubDemandedElts = APInt::getSplat(NewLen: NumSrcElts, V: SubDemandedElt); |
3153 | SubDemandedElts &= ScaledDemandedElts; |
3154 | if (!isSplatValue(V: Src, DemandedElts: SubDemandedElts, UndefElts&: SubUndefElts, Depth: Depth + 1)) |
3155 | return false; |
3156 | // TODO: Add support for merging sub undef elements. |
3157 | if (!SubUndefElts.isZero()) |
3158 | return false; |
3159 | } |
3160 | return true; |
3161 | } |
3162 | break; |
3163 | } |
3164 | } |
3165 | |
3166 | return false; |
3167 | } |
3168 | |
3169 | /// Helper wrapper to main isSplatValue function. |
3170 | bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const { |
3171 | EVT VT = V.getValueType(); |
3172 | assert(VT.isVector() && "Vector type expected"); |
3173 | |
3174 | APInt UndefElts; |
3175 | // Since the number of lanes in a scalable vector is unknown at compile time, |
3176 | // we track one bit which is implicitly broadcast to all lanes. This means |
3177 | // that all lanes in a scalable vector are considered demanded. |
3178 | APInt DemandedElts |
3179 | = APInt::getAllOnes(numBits: VT.isScalableVector() ? 1 : VT.getVectorNumElements()); |
3180 | return isSplatValue(V, DemandedElts, UndefElts) && |
3181 | (AllowUndefs || !UndefElts); |
3182 | } |
3183 | |
3184 | SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { |
3185 | V = peekThroughExtractSubvectors(V); |
3186 | |
3187 | EVT VT = V.getValueType(); |
3188 | unsigned Opcode = V.getOpcode(); |
3189 | switch (Opcode) { |
3190 | default: { |
3191 | APInt UndefElts; |
3192 | // Since the number of lanes in a scalable vector is unknown at compile time, |
3193 | // we track one bit which is implicitly broadcast to all lanes. This means |
3194 | // that all lanes in a scalable vector are considered demanded. |
3195 | APInt DemandedElts |
3196 | = APInt::getAllOnes(numBits: VT.isScalableVector() ? 1 : VT.getVectorNumElements()); |
3197 | |
3198 | if (isSplatValue(V, DemandedElts, UndefElts)) { |
3199 | if (VT.isScalableVector()) { |
3200 | // DemandedElts and UndefElts are ignored for scalable vectors, since |
3201 | // the only supported cases are SPLAT_VECTOR nodes. |
3202 | SplatIdx = 0; |
3203 | } else { |
3204 | // Handle case where all demanded elements are UNDEF. |
3205 | if (DemandedElts.isSubsetOf(RHS: UndefElts)) { |
3206 | SplatIdx = 0; |
3207 | return getUNDEF(VT); |
3208 | } |
3209 | SplatIdx = (UndefElts & DemandedElts).countr_one(); |
3210 | } |
3211 | return V; |
3212 | } |
3213 | break; |
3214 | } |
3215 | case ISD::SPLAT_VECTOR: |
3216 | SplatIdx = 0; |
3217 | return V; |
3218 | case ISD::VECTOR_SHUFFLE: { |
3219 | assert(!VT.isScalableVector()); |
3220 | // Check if this is a shuffle node doing a splat. |
3221 | // TODO - remove this and rely purely on SelectionDAG::isSplatValue, |
3222 | // getTargetVShiftNode currently struggles without the splat source. |
3223 | auto *SVN = cast<ShuffleVectorSDNode>(Val&: V); |
3224 | if (!SVN->isSplat()) |
3225 | break; |
3226 | int Idx = SVN->getSplatIndex(); |
3227 | int NumElts = V.getValueType().getVectorNumElements(); |
3228 | SplatIdx = Idx % NumElts; |
3229 | return V.getOperand(i: Idx / NumElts); |
3230 | } |
3231 | } |
3232 | |
3233 | return SDValue(); |
3234 | } |
3235 | |
3236 | SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) { |
3237 | int SplatIdx; |
3238 | if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) { |
3239 | EVT SVT = SrcVector.getValueType().getScalarType(); |
3240 | EVT LegalSVT = SVT; |
3241 | if (LegalTypes && !TLI->isTypeLegal(VT: SVT)) { |
3242 | if (!SVT.isInteger()) |
3243 | return SDValue(); |
3244 | LegalSVT = TLI->getTypeToTransformTo(Context&: *getContext(), VT: LegalSVT); |
3245 | if (LegalSVT.bitsLT(VT: SVT)) |
3246 | return SDValue(); |
3247 | } |
3248 | return getExtractVectorElt(DL: SDLoc(V), VT: LegalSVT, Vec: SrcVector, Idx: SplatIdx); |
3249 | } |
3250 | return SDValue(); |
3251 | } |
3252 | |
3253 | std::optional<ConstantRange> |
3254 | SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts, |
3255 | unsigned Depth) const { |
3256 | assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || |
3257 | V.getOpcode() == ISD::SRA) && |
3258 | "Unknown shift node"); |
3259 | // Shifting more than the bitwidth is not valid. |
3260 | unsigned BitWidth = V.getScalarValueSizeInBits(); |
3261 | |
3262 | if (auto *Cst = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1))) { |
3263 | const APInt &ShAmt = Cst->getAPIntValue(); |
3264 | if (ShAmt.uge(RHS: BitWidth)) |
3265 | return std::nullopt; |
3266 | return ConstantRange(ShAmt); |
3267 | } |
3268 | |
3269 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Val: V.getOperand(i: 1))) { |
3270 | const APInt *MinAmt = nullptr, *MaxAmt = nullptr; |
3271 | for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { |
3272 | if (!DemandedElts[i]) |
3273 | continue; |
3274 | auto *SA = dyn_cast<ConstantSDNode>(Val: BV->getOperand(Num: i)); |
3275 | if (!SA) { |
3276 | MinAmt = MaxAmt = nullptr; |
3277 | break; |
3278 | } |
3279 | const APInt &ShAmt = SA->getAPIntValue(); |
3280 | if (ShAmt.uge(RHS: BitWidth)) |
3281 | return std::nullopt; |
3282 | if (!MinAmt || MinAmt->ugt(RHS: ShAmt)) |
3283 | MinAmt = &ShAmt; |
3284 | if (!MaxAmt || MaxAmt->ult(RHS: ShAmt)) |
3285 | MaxAmt = &ShAmt; |
3286 | } |
3287 | assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) && |
3288 | "Failed to find matching min/max shift amounts"); |
3289 | if (MinAmt && MaxAmt) |
3290 | return ConstantRange(*MinAmt, *MaxAmt + 1); |
3291 | } |
3292 | |
3293 | // Use computeKnownBits to find a hidden constant/knownbits (usually type |
3294 | // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc. |
3295 | KnownBits KnownAmt = computeKnownBits(Op: V.getOperand(i: 1), DemandedElts, Depth); |
3296 | if (KnownAmt.getMaxValue().ult(RHS: BitWidth)) |
3297 | return ConstantRange::fromKnownBits(Known: KnownAmt, /*IsSigned=*/false); |
3298 | |
3299 | return std::nullopt; |
3300 | } |
3301 | |
3302 | std::optional<uint64_t> |
3303 | SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, |
3304 | unsigned Depth) const { |
3305 | assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || |
3306 | V.getOpcode() == ISD::SRA) && |
3307 | "Unknown shift node"); |
3308 | if (std::optional<ConstantRange> AmtRange = |
3309 | getValidShiftAmountRange(V, DemandedElts, Depth)) |
3310 | if (const APInt *ShAmt = AmtRange->getSingleElement()) |
3311 | return ShAmt->getZExtValue(); |
3312 | return std::nullopt; |
3313 | } |
3314 | |
3315 | std::optional<uint64_t> |
3316 | SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { |
3317 | EVT VT = V.getValueType(); |
3318 | APInt DemandedElts = VT.isFixedLengthVector() |
3319 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
3320 | : APInt(1, 1); |
3321 | return getValidShiftAmount(V, DemandedElts, Depth); |
3322 | } |
3323 | |
3324 | std::optional<uint64_t> |
3325 | SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, |
3326 | unsigned Depth) const { |
3327 | assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || |
3328 | V.getOpcode() == ISD::SRA) && |
3329 | "Unknown shift node"); |
3330 | if (std::optional<ConstantRange> AmtRange = |
3331 | getValidShiftAmountRange(V, DemandedElts, Depth)) |
3332 | return AmtRange->getUnsignedMin().getZExtValue(); |
3333 | return std::nullopt; |
3334 | } |
3335 | |
3336 | std::optional<uint64_t> |
3337 | SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { |
3338 | EVT VT = V.getValueType(); |
3339 | APInt DemandedElts = VT.isFixedLengthVector() |
3340 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
3341 | : APInt(1, 1); |
3342 | return getValidMinimumShiftAmount(V, DemandedElts, Depth); |
3343 | } |
3344 | |
3345 | std::optional<uint64_t> |
3346 | SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, |
3347 | unsigned Depth) const { |
3348 | assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || |
3349 | V.getOpcode() == ISD::SRA) && |
3350 | "Unknown shift node"); |
3351 | if (std::optional<ConstantRange> AmtRange = |
3352 | getValidShiftAmountRange(V, DemandedElts, Depth)) |
3353 | return AmtRange->getUnsignedMax().getZExtValue(); |
3354 | return std::nullopt; |
3355 | } |
3356 | |
3357 | std::optional<uint64_t> |
3358 | SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const { |
3359 | EVT VT = V.getValueType(); |
3360 | APInt DemandedElts = VT.isFixedLengthVector() |
3361 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
3362 | : APInt(1, 1); |
3363 | return getValidMaximumShiftAmount(V, DemandedElts, Depth); |
3364 | } |
3365 | |
3366 | /// Determine which bits of Op are known to be either zero or one and return |
3367 | /// them in Known. For vectors, the known bits are those that are shared by |
3368 | /// every vector element. |
3369 | KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { |
3370 | EVT VT = Op.getValueType(); |
3371 | |
3372 | // Since the number of lanes in a scalable vector is unknown at compile time, |
3373 | // we track one bit which is implicitly broadcast to all lanes. This means |
3374 | // that all lanes in a scalable vector are considered demanded. |
3375 | APInt DemandedElts = VT.isFixedLengthVector() |
3376 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
3377 | : APInt(1, 1); |
3378 | return computeKnownBits(Op, DemandedElts, Depth); |
3379 | } |
3380 | |
3381 | /// Determine which bits of Op are known to be either zero or one and return |
3382 | /// them in Known. The DemandedElts argument allows us to only collect the known |
3383 | /// bits that are shared by the requested vector elements. |
3384 | KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, |
3385 | unsigned Depth) const { |
3386 | unsigned BitWidth = Op.getScalarValueSizeInBits(); |
3387 | |
3388 | KnownBits Known(BitWidth); // Don't know anything. |
3389 | |
3390 | if (auto OptAPInt = Op->bitcastToAPInt()) { |
3391 | // We know all of the bits for a constant! |
3392 | return KnownBits::makeConstant(C: *std::move(OptAPInt)); |
3393 | } |
3394 | |
3395 | if (Depth >= MaxRecursionDepth) |
3396 | return Known; // Limit search depth. |
3397 | |
3398 | KnownBits Known2; |
3399 | unsigned NumElts = DemandedElts.getBitWidth(); |
3400 | assert((!Op.getValueType().isFixedLengthVector() || |
3401 | NumElts == Op.getValueType().getVectorNumElements()) && |
3402 | "Unexpected vector size"); |
3403 | |
3404 | if (!DemandedElts) |
3405 | return Known; // No demanded elts, better to assume we don't know anything. |
3406 | |
3407 | unsigned Opcode = Op.getOpcode(); |
3408 | switch (Opcode) { |
3409 | case ISD::MERGE_VALUES: |
3410 | return computeKnownBits(Op: Op.getOperand(i: Op.getResNo()), DemandedElts, |
3411 | Depth: Depth + 1); |
3412 | case ISD::SPLAT_VECTOR: { |
3413 | SDValue SrcOp = Op.getOperand(i: 0); |
3414 | assert(SrcOp.getValueSizeInBits() >= BitWidth && |
3415 | "Expected SPLAT_VECTOR implicit truncation"); |
3416 | // Implicitly truncate the bits to match the official semantics of |
3417 | // SPLAT_VECTOR. |
3418 | Known = computeKnownBits(Op: SrcOp, Depth: Depth + 1).trunc(BitWidth); |
3419 | break; |
3420 | } |
3421 | case ISD::SPLAT_VECTOR_PARTS: { |
3422 | unsigned ScalarSize = Op.getOperand(i: 0).getScalarValueSizeInBits(); |
3423 | assert(ScalarSize * Op.getNumOperands() == BitWidth && |
3424 | "Expected SPLAT_VECTOR_PARTS scalars to cover element width"); |
3425 | for (auto [I, SrcOp] : enumerate(First: Op->ops())) { |
3426 | Known.insertBits(SubBits: computeKnownBits(Op: SrcOp, Depth: Depth + 1), BitPosition: ScalarSize * I); |
3427 | } |
3428 | break; |
3429 | } |
3430 | case ISD::STEP_VECTOR: { |
3431 | const APInt &Step = Op.getConstantOperandAPInt(i: 0); |
3432 | |
3433 | if (Step.isPowerOf2()) |
3434 | Known.Zero.setLowBits(Step.logBase2()); |
3435 | |
3436 | const Function &F = getMachineFunction().getFunction(); |
3437 | |
3438 | if (!isUIntN(N: BitWidth, x: Op.getValueType().getVectorMinNumElements())) |
3439 | break; |
3440 | const APInt MinNumElts = |
3441 | APInt(BitWidth, Op.getValueType().getVectorMinNumElements()); |
3442 | |
3443 | bool Overflow; |
3444 | const APInt MaxNumElts = getVScaleRange(F: &F, BitWidth) |
3445 | .getUnsignedMax() |
3446 | .umul_ov(RHS: MinNumElts, Overflow); |
3447 | if (Overflow) |
3448 | break; |
3449 | |
3450 | const APInt MaxValue = (MaxNumElts - 1).umul_ov(RHS: Step, Overflow); |
3451 | if (Overflow) |
3452 | break; |
3453 | |
3454 | Known.Zero.setHighBits(MaxValue.countl_zero()); |
3455 | break; |
3456 | } |
3457 | case ISD::BUILD_VECTOR: |
3458 | assert(!Op.getValueType().isScalableVector()); |
3459 | // Collect the known bits that are shared by every demanded vector element. |
3460 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
3461 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
3462 | if (!DemandedElts[i]) |
3463 | continue; |
3464 | |
3465 | SDValue SrcOp = Op.getOperand(i); |
3466 | Known2 = computeKnownBits(Op: SrcOp, Depth: Depth + 1); |
3467 | |
3468 | // BUILD_VECTOR can implicitly truncate sources, we must handle this. |
3469 | if (SrcOp.getValueSizeInBits() != BitWidth) { |
3470 | assert(SrcOp.getValueSizeInBits() > BitWidth && |
3471 | "Expected BUILD_VECTOR implicit truncation"); |
3472 | Known2 = Known2.trunc(BitWidth); |
3473 | } |
3474 | |
3475 | // Known bits are the values that are shared by every demanded element. |
3476 | Known = Known.intersectWith(RHS: Known2); |
3477 | |
3478 | // If we don't know any bits, early out. |
3479 | if (Known.isUnknown()) |
3480 | break; |
3481 | } |
3482 | break; |
3483 | case ISD::VECTOR_SHUFFLE: { |
3484 | assert(!Op.getValueType().isScalableVector()); |
3485 | // Collect the known bits that are shared by every vector element referenced |
3486 | // by the shuffle. |
3487 | APInt DemandedLHS, DemandedRHS; |
3488 | const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val&: Op); |
3489 | assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); |
3490 | if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: SVN->getMask(), DemandedElts, |
3491 | DemandedLHS, DemandedRHS)) |
3492 | break; |
3493 | |
3494 | // Known bits are the values that are shared by every demanded element. |
3495 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
3496 | if (!!DemandedLHS) { |
3497 | SDValue LHS = Op.getOperand(i: 0); |
3498 | Known2 = computeKnownBits(Op: LHS, DemandedElts: DemandedLHS, Depth: Depth + 1); |
3499 | Known = Known.intersectWith(RHS: Known2); |
3500 | } |
3501 | // If we don't know any bits, early out. |
3502 | if (Known.isUnknown()) |
3503 | break; |
3504 | if (!!DemandedRHS) { |
3505 | SDValue RHS = Op.getOperand(i: 1); |
3506 | Known2 = computeKnownBits(Op: RHS, DemandedElts: DemandedRHS, Depth: Depth + 1); |
3507 | Known = Known.intersectWith(RHS: Known2); |
3508 | } |
3509 | break; |
3510 | } |
3511 | case ISD::VSCALE: { |
3512 | const Function &F = getMachineFunction().getFunction(); |
3513 | const APInt &Multiplier = Op.getConstantOperandAPInt(i: 0); |
3514 | Known = getVScaleRange(F: &F, BitWidth).multiply(Other: Multiplier).toKnownBits(); |
3515 | break; |
3516 | } |
3517 | case ISD::CONCAT_VECTORS: { |
3518 | if (Op.getValueType().isScalableVector()) |
3519 | break; |
3520 | // Split DemandedElts and test each of the demanded subvectors. |
3521 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
3522 | EVT SubVectorVT = Op.getOperand(i: 0).getValueType(); |
3523 | unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); |
3524 | unsigned NumSubVectors = Op.getNumOperands(); |
3525 | for (unsigned i = 0; i != NumSubVectors; ++i) { |
3526 | APInt DemandedSub = |
3527 | DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: i * NumSubVectorElts); |
3528 | if (!!DemandedSub) { |
3529 | SDValue Sub = Op.getOperand(i); |
3530 | Known2 = computeKnownBits(Op: Sub, DemandedElts: DemandedSub, Depth: Depth + 1); |
3531 | Known = Known.intersectWith(RHS: Known2); |
3532 | } |
3533 | // If we don't know any bits, early out. |
3534 | if (Known.isUnknown()) |
3535 | break; |
3536 | } |
3537 | break; |
3538 | } |
3539 | case ISD::INSERT_SUBVECTOR: { |
3540 | if (Op.getValueType().isScalableVector()) |
3541 | break; |
3542 | // Demand any elements from the subvector and the remainder from the src its |
3543 | // inserted into. |
3544 | SDValue Src = Op.getOperand(i: 0); |
3545 | SDValue Sub = Op.getOperand(i: 1); |
3546 | uint64_t Idx = Op.getConstantOperandVal(i: 2); |
3547 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
3548 | APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx); |
3549 | APInt DemandedSrcElts = DemandedElts; |
3550 | DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts); |
3551 | |
3552 | Known.One.setAllBits(); |
3553 | Known.Zero.setAllBits(); |
3554 | if (!!DemandedSubElts) { |
3555 | Known = computeKnownBits(Op: Sub, DemandedElts: DemandedSubElts, Depth: Depth + 1); |
3556 | if (Known.isUnknown()) |
3557 | break; // early-out. |
3558 | } |
3559 | if (!!DemandedSrcElts) { |
3560 | Known2 = computeKnownBits(Op: Src, DemandedElts: DemandedSrcElts, Depth: Depth + 1); |
3561 | Known = Known.intersectWith(RHS: Known2); |
3562 | } |
3563 | break; |
3564 | } |
3565 | case ISD::EXTRACT_SUBVECTOR: { |
3566 | // Offset the demanded elts by the subvector index. |
3567 | SDValue Src = Op.getOperand(i: 0); |
3568 | // Bail until we can represent demanded elements for scalable vectors. |
3569 | if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector()) |
3570 | break; |
3571 | uint64_t Idx = Op.getConstantOperandVal(i: 1); |
3572 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
3573 | APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx); |
3574 | Known = computeKnownBits(Op: Src, DemandedElts: DemandedSrcElts, Depth: Depth + 1); |
3575 | break; |
3576 | } |
3577 | case ISD::SCALAR_TO_VECTOR: { |
3578 | if (Op.getValueType().isScalableVector()) |
3579 | break; |
3580 | // We know about scalar_to_vector as much as we know about it source, |
3581 | // which becomes the first element of otherwise unknown vector. |
3582 | if (DemandedElts != 1) |
3583 | break; |
3584 | |
3585 | SDValue N0 = Op.getOperand(i: 0); |
3586 | Known = computeKnownBits(Op: N0, Depth: Depth + 1); |
3587 | if (N0.getValueSizeInBits() != BitWidth) |
3588 | Known = Known.trunc(BitWidth); |
3589 | |
3590 | break; |
3591 | } |
3592 | case ISD::BITCAST: { |
3593 | if (Op.getValueType().isScalableVector()) |
3594 | break; |
3595 | |
3596 | SDValue N0 = Op.getOperand(i: 0); |
3597 | EVT SubVT = N0.getValueType(); |
3598 | unsigned SubBitWidth = SubVT.getScalarSizeInBits(); |
3599 | |
3600 | // Ignore bitcasts from unsupported types. |
3601 | if (!(SubVT.isInteger() || SubVT.isFloatingPoint())) |
3602 | break; |
3603 | |
3604 | // Fast handling of 'identity' bitcasts. |
3605 | if (BitWidth == SubBitWidth) { |
3606 | Known = computeKnownBits(Op: N0, DemandedElts, Depth: Depth + 1); |
3607 | break; |
3608 | } |
3609 | |
3610 | bool IsLE = getDataLayout().isLittleEndian(); |
3611 | |
3612 | // Bitcast 'small element' vector to 'large element' scalar/vector. |
3613 | if ((BitWidth % SubBitWidth) == 0) { |
3614 | assert(N0.getValueType().isVector() && "Expected bitcast from vector"); |
3615 | |
3616 | // Collect known bits for the (larger) output by collecting the known |
3617 | // bits from each set of sub elements and shift these into place. |
3618 | // We need to separately call computeKnownBits for each set of |
3619 | // sub elements as the knownbits for each is likely to be different. |
3620 | unsigned SubScale = BitWidth / SubBitWidth; |
3621 | APInt SubDemandedElts(NumElts * SubScale, 0); |
3622 | for (unsigned i = 0; i != NumElts; ++i) |
3623 | if (DemandedElts[i]) |
3624 | SubDemandedElts.setBit(i * SubScale); |
3625 | |
3626 | for (unsigned i = 0; i != SubScale; ++i) { |
3627 | Known2 = computeKnownBits(Op: N0, DemandedElts: SubDemandedElts.shl(shiftAmt: i), |
3628 | Depth: Depth + 1); |
3629 | unsigned Shifts = IsLE ? i : SubScale - 1 - i; |
3630 | Known.insertBits(SubBits: Known2, BitPosition: SubBitWidth * Shifts); |
3631 | } |
3632 | } |
3633 | |
3634 | // Bitcast 'large element' scalar/vector to 'small element' vector. |
3635 | if ((SubBitWidth % BitWidth) == 0) { |
3636 | assert(Op.getValueType().isVector() && "Expected bitcast to vector"); |
3637 | |
3638 | // Collect known bits for the (smaller) output by collecting the known |
3639 | // bits from the overlapping larger input elements and extracting the |
3640 | // sub sections we actually care about. |
3641 | unsigned SubScale = SubBitWidth / BitWidth; |
3642 | APInt SubDemandedElts = |
3643 | APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumElts / SubScale); |
3644 | Known2 = computeKnownBits(Op: N0, DemandedElts: SubDemandedElts, Depth: Depth + 1); |
3645 | |
3646 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
3647 | for (unsigned i = 0; i != NumElts; ++i) |
3648 | if (DemandedElts[i]) { |
3649 | unsigned Shifts = IsLE ? i : NumElts - 1 - i; |
3650 | unsigned Offset = (Shifts % SubScale) * BitWidth; |
3651 | Known = Known.intersectWith(RHS: Known2.extractBits(NumBits: BitWidth, BitPosition: Offset)); |
3652 | // If we don't know any bits, early out. |
3653 | if (Known.isUnknown()) |
3654 | break; |
3655 | } |
3656 | } |
3657 | break; |
3658 | } |
3659 | case ISD::AND: |
3660 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3661 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3662 | |
3663 | Known &= Known2; |
3664 | break; |
3665 | case ISD::OR: |
3666 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3667 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3668 | |
3669 | Known |= Known2; |
3670 | break; |
3671 | case ISD::XOR: |
3672 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3673 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3674 | |
3675 | Known ^= Known2; |
3676 | break; |
3677 | case ISD::MUL: { |
3678 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3679 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3680 | bool SelfMultiply = Op.getOperand(i: 0) == Op.getOperand(i: 1); |
3681 | // TODO: SelfMultiply can be poison, but not undef. |
3682 | if (SelfMultiply) |
3683 | SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( |
3684 | Op: Op.getOperand(i: 0), DemandedElts, PoisonOnly: false, Depth: Depth + 1); |
3685 | Known = KnownBits::mul(LHS: Known, RHS: Known2, NoUndefSelfMultiply: SelfMultiply); |
3686 | |
3687 | // If the multiplication is known not to overflow, the product of a number |
3688 | // with itself is non-negative. Only do this if we didn't already computed |
3689 | // the opposite value for the sign bit. |
3690 | if (Op->getFlags().hasNoSignedWrap() && |
3691 | Op.getOperand(i: 0) == Op.getOperand(i: 1) && |
3692 | !Known.isNegative()) |
3693 | Known.makeNonNegative(); |
3694 | break; |
3695 | } |
3696 | case ISD::MULHU: { |
3697 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3698 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3699 | Known = KnownBits::mulhu(LHS: Known, RHS: Known2); |
3700 | break; |
3701 | } |
3702 | case ISD::MULHS: { |
3703 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3704 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3705 | Known = KnownBits::mulhs(LHS: Known, RHS: Known2); |
3706 | break; |
3707 | } |
3708 | case ISD::ABDU: { |
3709 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3710 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3711 | Known = KnownBits::abdu(LHS: Known, RHS: Known2); |
3712 | break; |
3713 | } |
3714 | case ISD::ABDS: { |
3715 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3716 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3717 | Known = KnownBits::abds(LHS: Known, RHS: Known2); |
3718 | unsigned SignBits1 = |
3719 | ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3720 | if (SignBits1 == 1) |
3721 | break; |
3722 | unsigned SignBits0 = |
3723 | ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3724 | Known.Zero.setHighBits(std::min(a: SignBits0, b: SignBits1) - 1); |
3725 | break; |
3726 | } |
3727 | case ISD::UMUL_LOHI: { |
3728 | assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); |
3729 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3730 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3731 | bool SelfMultiply = Op.getOperand(i: 0) == Op.getOperand(i: 1); |
3732 | if (Op.getResNo() == 0) |
3733 | Known = KnownBits::mul(LHS: Known, RHS: Known2, NoUndefSelfMultiply: SelfMultiply); |
3734 | else |
3735 | Known = KnownBits::mulhu(LHS: Known, RHS: Known2); |
3736 | break; |
3737 | } |
3738 | case ISD::SMUL_LOHI: { |
3739 | assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); |
3740 | Known = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3741 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3742 | bool SelfMultiply = Op.getOperand(i: 0) == Op.getOperand(i: 1); |
3743 | if (Op.getResNo() == 0) |
3744 | Known = KnownBits::mul(LHS: Known, RHS: Known2, NoUndefSelfMultiply: SelfMultiply); |
3745 | else |
3746 | Known = KnownBits::mulhs(LHS: Known, RHS: Known2); |
3747 | break; |
3748 | } |
3749 | case ISD::AVGFLOORU: { |
3750 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3751 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3752 | Known = KnownBits::avgFloorU(LHS: Known, RHS: Known2); |
3753 | break; |
3754 | } |
3755 | case ISD::AVGCEILU: { |
3756 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3757 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3758 | Known = KnownBits::avgCeilU(LHS: Known, RHS: Known2); |
3759 | break; |
3760 | } |
3761 | case ISD::AVGFLOORS: { |
3762 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3763 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3764 | Known = KnownBits::avgFloorS(LHS: Known, RHS: Known2); |
3765 | break; |
3766 | } |
3767 | case ISD::AVGCEILS: { |
3768 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3769 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3770 | Known = KnownBits::avgCeilS(LHS: Known, RHS: Known2); |
3771 | break; |
3772 | } |
3773 | case ISD::SELECT: |
3774 | case ISD::VSELECT: |
3775 | Known = computeKnownBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth+1); |
3776 | // If we don't know any bits, early out. |
3777 | if (Known.isUnknown()) |
3778 | break; |
3779 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth+1); |
3780 | |
3781 | // Only known if known in both the LHS and RHS. |
3782 | Known = Known.intersectWith(RHS: Known2); |
3783 | break; |
3784 | case ISD::SELECT_CC: |
3785 | Known = computeKnownBits(Op: Op.getOperand(i: 3), DemandedElts, Depth: Depth+1); |
3786 | // If we don't know any bits, early out. |
3787 | if (Known.isUnknown()) |
3788 | break; |
3789 | Known2 = computeKnownBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth+1); |
3790 | |
3791 | // Only known if known in both the LHS and RHS. |
3792 | Known = Known.intersectWith(RHS: Known2); |
3793 | break; |
3794 | case ISD::SMULO: |
3795 | case ISD::UMULO: |
3796 | if (Op.getResNo() != 1) |
3797 | break; |
3798 | // The boolean result conforms to getBooleanContents. |
3799 | // If we know the result of a setcc has the top bits zero, use this info. |
3800 | // We know that we have an integer-based boolean since these operations |
3801 | // are only available for integer. |
3802 | if (TLI->getBooleanContents(isVec: Op.getValueType().isVector(), isFloat: false) == |
3803 | TargetLowering::ZeroOrOneBooleanContent && |
3804 | BitWidth > 1) |
3805 | Known.Zero.setBitsFrom(1); |
3806 | break; |
3807 | case ISD::SETCC: |
3808 | case ISD::SETCCCARRY: |
3809 | case ISD::STRICT_FSETCC: |
3810 | case ISD::STRICT_FSETCCS: { |
3811 | unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; |
3812 | // If we know the result of a setcc has the top bits zero, use this info. |
3813 | if (TLI->getBooleanContents(Type: Op.getOperand(i: OpNo).getValueType()) == |
3814 | TargetLowering::ZeroOrOneBooleanContent && |
3815 | BitWidth > 1) |
3816 | Known.Zero.setBitsFrom(1); |
3817 | break; |
3818 | } |
3819 | case ISD::SHL: { |
3820 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3821 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3822 | |
3823 | bool NUW = Op->getFlags().hasNoUnsignedWrap(); |
3824 | bool NSW = Op->getFlags().hasNoSignedWrap(); |
3825 | |
3826 | bool ShAmtNonZero = Known2.isNonZero(); |
3827 | |
3828 | Known = KnownBits::shl(LHS: Known, RHS: Known2, NUW, NSW, ShAmtNonZero); |
3829 | |
3830 | // Minimum shift low bits are known zero. |
3831 | if (std::optional<uint64_t> ShMinAmt = |
3832 | getValidMinimumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) |
3833 | Known.Zero.setLowBits(*ShMinAmt); |
3834 | break; |
3835 | } |
3836 | case ISD::SRL: |
3837 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3838 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3839 | Known = KnownBits::lshr(LHS: Known, RHS: Known2, /*ShAmtNonZero=*/false, |
3840 | Exact: Op->getFlags().hasExact()); |
3841 | |
3842 | // Minimum shift high bits are known zero. |
3843 | if (std::optional<uint64_t> ShMinAmt = |
3844 | getValidMinimumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) |
3845 | Known.Zero.setHighBits(*ShMinAmt); |
3846 | break; |
3847 | case ISD::SRA: |
3848 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3849 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3850 | Known = KnownBits::ashr(LHS: Known, RHS: Known2, /*ShAmtNonZero=*/false, |
3851 | Exact: Op->getFlags().hasExact()); |
3852 | break; |
3853 | case ISD::FSHL: |
3854 | case ISD::FSHR: |
3855 | if (ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 2), DemandedElts)) { |
3856 | unsigned Amt = C->getAPIntValue().urem(RHS: BitWidth); |
3857 | |
3858 | // For fshl, 0-shift returns the 1st arg. |
3859 | // For fshr, 0-shift returns the 2nd arg. |
3860 | if (Amt == 0) { |
3861 | Known = computeKnownBits(Op: Op.getOperand(i: Opcode == ISD::FSHL ? 0 : 1), |
3862 | DemandedElts, Depth: Depth + 1); |
3863 | break; |
3864 | } |
3865 | |
3866 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
3867 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
3868 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3869 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3870 | if (Opcode == ISD::FSHL) { |
3871 | Known.One <<= Amt; |
3872 | Known.Zero <<= Amt; |
3873 | Known2.One.lshrInPlace(ShiftAmt: BitWidth - Amt); |
3874 | Known2.Zero.lshrInPlace(ShiftAmt: BitWidth - Amt); |
3875 | } else { |
3876 | Known.One <<= BitWidth - Amt; |
3877 | Known.Zero <<= BitWidth - Amt; |
3878 | Known2.One.lshrInPlace(ShiftAmt: Amt); |
3879 | Known2.Zero.lshrInPlace(ShiftAmt: Amt); |
3880 | } |
3881 | Known = Known.unionWith(RHS: Known2); |
3882 | } |
3883 | break; |
3884 | case ISD::SHL_PARTS: |
3885 | case ISD::SRA_PARTS: |
3886 | case ISD::SRL_PARTS: { |
3887 | assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); |
3888 | |
3889 | // Collect lo/hi source values and concatenate. |
3890 | unsigned LoBits = Op.getOperand(i: 0).getScalarValueSizeInBits(); |
3891 | unsigned HiBits = Op.getOperand(i: 1).getScalarValueSizeInBits(); |
3892 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3893 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
3894 | Known = Known2.concat(Lo: Known); |
3895 | |
3896 | // Collect shift amount. |
3897 | Known2 = computeKnownBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth + 1); |
3898 | |
3899 | if (Opcode == ISD::SHL_PARTS) |
3900 | Known = KnownBits::shl(LHS: Known, RHS: Known2); |
3901 | else if (Opcode == ISD::SRA_PARTS) |
3902 | Known = KnownBits::ashr(LHS: Known, RHS: Known2); |
3903 | else // if (Opcode == ISD::SRL_PARTS) |
3904 | Known = KnownBits::lshr(LHS: Known, RHS: Known2); |
3905 | |
3906 | // TODO: Minimum shift low/high bits are known zero. |
3907 | |
3908 | if (Op.getResNo() == 0) |
3909 | Known = Known.extractBits(NumBits: LoBits, BitPosition: 0); |
3910 | else |
3911 | Known = Known.extractBits(NumBits: HiBits, BitPosition: LoBits); |
3912 | break; |
3913 | } |
3914 | case ISD::SIGN_EXTEND_INREG: { |
3915 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3916 | EVT EVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT(); |
3917 | Known = Known.sextInReg(SrcBitWidth: EVT.getScalarSizeInBits()); |
3918 | break; |
3919 | } |
3920 | case ISD::CTTZ: |
3921 | case ISD::CTTZ_ZERO_UNDEF: { |
3922 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3923 | // If we have a known 1, its position is our upper bound. |
3924 | unsigned PossibleTZ = Known2.countMaxTrailingZeros(); |
3925 | unsigned LowBits = llvm::bit_width(Value: PossibleTZ); |
3926 | Known.Zero.setBitsFrom(LowBits); |
3927 | break; |
3928 | } |
3929 | case ISD::CTLZ: |
3930 | case ISD::CTLZ_ZERO_UNDEF: { |
3931 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3932 | // If we have a known 1, its position is our upper bound. |
3933 | unsigned PossibleLZ = Known2.countMaxLeadingZeros(); |
3934 | unsigned LowBits = llvm::bit_width(Value: PossibleLZ); |
3935 | Known.Zero.setBitsFrom(LowBits); |
3936 | break; |
3937 | } |
3938 | case ISD::CTPOP: { |
3939 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
3940 | // If we know some of the bits are zero, they can't be one. |
3941 | unsigned PossibleOnes = Known2.countMaxPopulation(); |
3942 | Known.Zero.setBitsFrom(llvm::bit_width(Value: PossibleOnes)); |
3943 | break; |
3944 | } |
3945 | case ISD::PARITY: { |
3946 | // Parity returns 0 everywhere but the LSB. |
3947 | Known.Zero.setBitsFrom(1); |
3948 | break; |
3949 | } |
3950 | case ISD::MGATHER: |
3951 | case ISD::MLOAD: { |
3952 | ISD::LoadExtType ETy = |
3953 | (Opcode == ISD::MGATHER) |
3954 | ? cast<MaskedGatherSDNode>(Val&: Op)->getExtensionType() |
3955 | : cast<MaskedLoadSDNode>(Val&: Op)->getExtensionType(); |
3956 | if (ETy == ISD::ZEXTLOAD) { |
3957 | EVT MemVT = cast<MemSDNode>(Val&: Op)->getMemoryVT(); |
3958 | KnownBits Known0(MemVT.getScalarSizeInBits()); |
3959 | return Known0.zext(BitWidth); |
3960 | } |
3961 | break; |
3962 | } |
3963 | case ISD::LOAD: { |
3964 | LoadSDNode *LD = cast<LoadSDNode>(Val&: Op); |
3965 | const Constant *Cst = TLI->getTargetConstantFromLoad(LD); |
3966 | if (ISD::isNON_EXTLoad(N: LD) && Cst) { |
3967 | // Determine any common known bits from the loaded constant pool value. |
3968 | Type *CstTy = Cst->getType(); |
3969 | if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() && |
3970 | !Op.getValueType().isScalableVector()) { |
3971 | // If its a vector splat, then we can (quickly) reuse the scalar path. |
3972 | // NOTE: We assume all elements match and none are UNDEF. |
3973 | if (CstTy->isVectorTy()) { |
3974 | if (const Constant *Splat = Cst->getSplatValue()) { |
3975 | Cst = Splat; |
3976 | CstTy = Cst->getType(); |
3977 | } |
3978 | } |
3979 | // TODO - do we need to handle different bitwidths? |
3980 | if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { |
3981 | // Iterate across all vector elements finding common known bits. |
3982 | Known.One.setAllBits(); |
3983 | Known.Zero.setAllBits(); |
3984 | for (unsigned i = 0; i != NumElts; ++i) { |
3985 | if (!DemandedElts[i]) |
3986 | continue; |
3987 | if (Constant *Elt = Cst->getAggregateElement(Elt: i)) { |
3988 | if (auto *CInt = dyn_cast<ConstantInt>(Val: Elt)) { |
3989 | const APInt &Value = CInt->getValue(); |
3990 | Known.One &= Value; |
3991 | Known.Zero &= ~Value; |
3992 | continue; |
3993 | } |
3994 | if (auto *CFP = dyn_cast<ConstantFP>(Val: Elt)) { |
3995 | APInt Value = CFP->getValueAPF().bitcastToAPInt(); |
3996 | Known.One &= Value; |
3997 | Known.Zero &= ~Value; |
3998 | continue; |
3999 | } |
4000 | } |
4001 | Known.One.clearAllBits(); |
4002 | Known.Zero.clearAllBits(); |
4003 | break; |
4004 | } |
4005 | } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { |
4006 | if (auto *CInt = dyn_cast<ConstantInt>(Val: Cst)) { |
4007 | Known = KnownBits::makeConstant(C: CInt->getValue()); |
4008 | } else if (auto *CFP = dyn_cast<ConstantFP>(Val: Cst)) { |
4009 | Known = |
4010 | KnownBits::makeConstant(C: CFP->getValueAPF().bitcastToAPInt()); |
4011 | } |
4012 | } |
4013 | } |
4014 | } else if (Op.getResNo() == 0) { |
4015 | unsigned ScalarMemorySize = LD->getMemoryVT().getScalarSizeInBits(); |
4016 | KnownBits KnownScalarMemory(ScalarMemorySize); |
4017 | if (const MDNode *MD = LD->getRanges()) |
4018 | computeKnownBitsFromRangeMetadata(Ranges: *MD, Known&: KnownScalarMemory); |
4019 | |
4020 | // Extend the Known bits from memory to the size of the scalar result. |
4021 | if (ISD::isZEXTLoad(N: Op.getNode())) |
4022 | Known = KnownScalarMemory.zext(BitWidth); |
4023 | else if (ISD::isSEXTLoad(N: Op.getNode())) |
4024 | Known = KnownScalarMemory.sext(BitWidth); |
4025 | else if (ISD::isEXTLoad(N: Op.getNode())) |
4026 | Known = KnownScalarMemory.anyext(BitWidth); |
4027 | else |
4028 | Known = KnownScalarMemory; |
4029 | assert(Known.getBitWidth() == BitWidth); |
4030 | return Known; |
4031 | } |
4032 | break; |
4033 | } |
4034 | case ISD::ZERO_EXTEND_VECTOR_INREG: { |
4035 | if (Op.getValueType().isScalableVector()) |
4036 | break; |
4037 | EVT InVT = Op.getOperand(i: 0).getValueType(); |
4038 | APInt InDemandedElts = DemandedElts.zext(width: InVT.getVectorNumElements()); |
4039 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts: InDemandedElts, Depth: Depth + 1); |
4040 | Known = Known.zext(BitWidth); |
4041 | break; |
4042 | } |
4043 | case ISD::ZERO_EXTEND: { |
4044 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4045 | Known = Known.zext(BitWidth); |
4046 | break; |
4047 | } |
4048 | case ISD::SIGN_EXTEND_VECTOR_INREG: { |
4049 | if (Op.getValueType().isScalableVector()) |
4050 | break; |
4051 | EVT InVT = Op.getOperand(i: 0).getValueType(); |
4052 | APInt InDemandedElts = DemandedElts.zext(width: InVT.getVectorNumElements()); |
4053 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts: InDemandedElts, Depth: Depth + 1); |
4054 | // If the sign bit is known to be zero or one, then sext will extend |
4055 | // it to the top bits, else it will just zext. |
4056 | Known = Known.sext(BitWidth); |
4057 | break; |
4058 | } |
4059 | case ISD::SIGN_EXTEND: { |
4060 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4061 | // If the sign bit is known to be zero or one, then sext will extend |
4062 | // it to the top bits, else it will just zext. |
4063 | Known = Known.sext(BitWidth); |
4064 | break; |
4065 | } |
4066 | case ISD::ANY_EXTEND_VECTOR_INREG: { |
4067 | if (Op.getValueType().isScalableVector()) |
4068 | break; |
4069 | EVT InVT = Op.getOperand(i: 0).getValueType(); |
4070 | APInt InDemandedElts = DemandedElts.zext(width: InVT.getVectorNumElements()); |
4071 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts: InDemandedElts, Depth: Depth + 1); |
4072 | Known = Known.anyext(BitWidth); |
4073 | break; |
4074 | } |
4075 | case ISD::ANY_EXTEND: { |
4076 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4077 | Known = Known.anyext(BitWidth); |
4078 | break; |
4079 | } |
4080 | case ISD::TRUNCATE: { |
4081 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4082 | Known = Known.trunc(BitWidth); |
4083 | break; |
4084 | } |
4085 | case ISD::AssertZext: { |
4086 | EVT VT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT(); |
4087 | APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: VT.getSizeInBits()); |
4088 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4089 | Known.Zero |= (~InMask); |
4090 | Known.One &= (~Known.Zero); |
4091 | break; |
4092 | } |
4093 | case ISD::AssertAlign: { |
4094 | unsigned LogOfAlign = Log2(A: cast<AssertAlignSDNode>(Val&: Op)->getAlign()); |
4095 | assert(LogOfAlign != 0); |
4096 | |
4097 | // TODO: Should use maximum with source |
4098 | // If a node is guaranteed to be aligned, set low zero bits accordingly as |
4099 | // well as clearing one bits. |
4100 | Known.Zero.setLowBits(LogOfAlign); |
4101 | Known.One.clearLowBits(loBits: LogOfAlign); |
4102 | break; |
4103 | } |
4104 | case ISD::FGETSIGN: |
4105 | // All bits are zero except the low bit. |
4106 | Known.Zero.setBitsFrom(1); |
4107 | break; |
4108 | case ISD::ADD: |
4109 | case ISD::SUB: { |
4110 | SDNodeFlags Flags = Op.getNode()->getFlags(); |
4111 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4112 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4113 | Known = KnownBits::computeForAddSub( |
4114 | Add: Op.getOpcode() == ISD::ADD, NSW: Flags.hasNoSignedWrap(), |
4115 | NUW: Flags.hasNoUnsignedWrap(), LHS: Known, RHS: Known2); |
4116 | break; |
4117 | } |
4118 | case ISD::USUBO: |
4119 | case ISD::SSUBO: |
4120 | case ISD::USUBO_CARRY: |
4121 | case ISD::SSUBO_CARRY: |
4122 | if (Op.getResNo() == 1) { |
4123 | // If we know the result of a setcc has the top bits zero, use this info. |
4124 | if (TLI->getBooleanContents(Type: Op.getOperand(i: 0).getValueType()) == |
4125 | TargetLowering::ZeroOrOneBooleanContent && |
4126 | BitWidth > 1) |
4127 | Known.Zero.setBitsFrom(1); |
4128 | break; |
4129 | } |
4130 | [[fallthrough]]; |
4131 | case ISD::SUBC: { |
4132 | assert(Op.getResNo() == 0 && |
4133 | "We only compute knownbits for the difference here."); |
4134 | |
4135 | // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in. |
4136 | KnownBits Borrow(1); |
4137 | if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) { |
4138 | Borrow = computeKnownBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth + 1); |
4139 | // Borrow has bit width 1 |
4140 | Borrow = Borrow.trunc(BitWidth: 1); |
4141 | } else { |
4142 | Borrow.setAllZero(); |
4143 | } |
4144 | |
4145 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4146 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4147 | Known = KnownBits::computeForSubBorrow(LHS: Known, RHS: Known2, Borrow); |
4148 | break; |
4149 | } |
4150 | case ISD::UADDO: |
4151 | case ISD::SADDO: |
4152 | case ISD::UADDO_CARRY: |
4153 | case ISD::SADDO_CARRY: |
4154 | if (Op.getResNo() == 1) { |
4155 | // If we know the result of a setcc has the top bits zero, use this info. |
4156 | if (TLI->getBooleanContents(Type: Op.getOperand(i: 0).getValueType()) == |
4157 | TargetLowering::ZeroOrOneBooleanContent && |
4158 | BitWidth > 1) |
4159 | Known.Zero.setBitsFrom(1); |
4160 | break; |
4161 | } |
4162 | [[fallthrough]]; |
4163 | case ISD::ADDC: |
4164 | case ISD::ADDE: { |
4165 | assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); |
4166 | |
4167 | // With ADDE and UADDO_CARRY, a carry bit may be added in. |
4168 | KnownBits Carry(1); |
4169 | if (Opcode == ISD::ADDE) |
4170 | // Can't track carry from glue, set carry to unknown. |
4171 | Carry.resetAll(); |
4172 | else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) { |
4173 | Carry = computeKnownBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth + 1); |
4174 | // Carry has bit width 1 |
4175 | Carry = Carry.trunc(BitWidth: 1); |
4176 | } else { |
4177 | Carry.setAllZero(); |
4178 | } |
4179 | |
4180 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4181 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4182 | Known = KnownBits::computeForAddCarry(LHS: Known, RHS: Known2, Carry); |
4183 | break; |
4184 | } |
4185 | case ISD::UDIV: { |
4186 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4187 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4188 | Known = KnownBits::udiv(LHS: Known, RHS: Known2, Exact: Op->getFlags().hasExact()); |
4189 | break; |
4190 | } |
4191 | case ISD::SDIV: { |
4192 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4193 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4194 | Known = KnownBits::sdiv(LHS: Known, RHS: Known2, Exact: Op->getFlags().hasExact()); |
4195 | break; |
4196 | } |
4197 | case ISD::SREM: { |
4198 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4199 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4200 | Known = KnownBits::srem(LHS: Known, RHS: Known2); |
4201 | break; |
4202 | } |
4203 | case ISD::UREM: { |
4204 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4205 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4206 | Known = KnownBits::urem(LHS: Known, RHS: Known2); |
4207 | break; |
4208 | } |
4209 | case ISD::EXTRACT_ELEMENT: { |
4210 | Known = computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth+1); |
4211 | const unsigned Index = Op.getConstantOperandVal(i: 1); |
4212 | const unsigned EltBitWidth = Op.getValueSizeInBits(); |
4213 | |
4214 | // Remove low part of known bits mask |
4215 | Known.Zero = Known.Zero.getHiBits(numBits: Known.getBitWidth() - Index * EltBitWidth); |
4216 | Known.One = Known.One.getHiBits(numBits: Known.getBitWidth() - Index * EltBitWidth); |
4217 | |
4218 | // Remove high part of known bit mask |
4219 | Known = Known.trunc(BitWidth: EltBitWidth); |
4220 | break; |
4221 | } |
4222 | case ISD::EXTRACT_VECTOR_ELT: { |
4223 | SDValue InVec = Op.getOperand(i: 0); |
4224 | SDValue EltNo = Op.getOperand(i: 1); |
4225 | EVT VecVT = InVec.getValueType(); |
4226 | // computeKnownBits not yet implemented for scalable vectors. |
4227 | if (VecVT.isScalableVector()) |
4228 | break; |
4229 | const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); |
4230 | const unsigned NumSrcElts = VecVT.getVectorNumElements(); |
4231 | |
4232 | // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know |
4233 | // anything about the extended bits. |
4234 | if (BitWidth > EltBitWidth) |
4235 | Known = Known.trunc(BitWidth: EltBitWidth); |
4236 | |
4237 | // If we know the element index, just demand that vector element, else for |
4238 | // an unknown element index, ignore DemandedElts and demand them all. |
4239 | APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts); |
4240 | auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo); |
4241 | if (ConstEltNo && ConstEltNo->getAPIntValue().ult(RHS: NumSrcElts)) |
4242 | DemandedSrcElts = |
4243 | APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue()); |
4244 | |
4245 | Known = computeKnownBits(Op: InVec, DemandedElts: DemandedSrcElts, Depth: Depth + 1); |
4246 | if (BitWidth > EltBitWidth) |
4247 | Known = Known.anyext(BitWidth); |
4248 | break; |
4249 | } |
4250 | case ISD::INSERT_VECTOR_ELT: { |
4251 | if (Op.getValueType().isScalableVector()) |
4252 | break; |
4253 | |
4254 | // If we know the element index, split the demand between the |
4255 | // source vector and the inserted element, otherwise assume we need |
4256 | // the original demanded vector elements and the value. |
4257 | SDValue InVec = Op.getOperand(i: 0); |
4258 | SDValue InVal = Op.getOperand(i: 1); |
4259 | SDValue EltNo = Op.getOperand(i: 2); |
4260 | bool DemandedVal = true; |
4261 | APInt DemandedVecElts = DemandedElts; |
4262 | auto *CEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo); |
4263 | if (CEltNo && CEltNo->getAPIntValue().ult(RHS: NumElts)) { |
4264 | unsigned EltIdx = CEltNo->getZExtValue(); |
4265 | DemandedVal = !!DemandedElts[EltIdx]; |
4266 | DemandedVecElts.clearBit(BitPosition: EltIdx); |
4267 | } |
4268 | Known.One.setAllBits(); |
4269 | Known.Zero.setAllBits(); |
4270 | if (DemandedVal) { |
4271 | Known2 = computeKnownBits(Op: InVal, Depth: Depth + 1); |
4272 | Known = Known.intersectWith(RHS: Known2.zextOrTrunc(BitWidth)); |
4273 | } |
4274 | if (!!DemandedVecElts) { |
4275 | Known2 = computeKnownBits(Op: InVec, DemandedElts: DemandedVecElts, Depth: Depth + 1); |
4276 | Known = Known.intersectWith(RHS: Known2); |
4277 | } |
4278 | break; |
4279 | } |
4280 | case ISD::BITREVERSE: { |
4281 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4282 | Known = Known2.reverseBits(); |
4283 | break; |
4284 | } |
4285 | case ISD::BSWAP: { |
4286 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4287 | Known = Known2.byteSwap(); |
4288 | break; |
4289 | } |
4290 | case ISD::ABS: { |
4291 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4292 | Known = Known2.abs(); |
4293 | Known.Zero.setHighBits( |
4294 | ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1) - 1); |
4295 | break; |
4296 | } |
4297 | case ISD::USUBSAT: { |
4298 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4299 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4300 | Known = KnownBits::usub_sat(LHS: Known, RHS: Known2); |
4301 | break; |
4302 | } |
4303 | case ISD::UMIN: { |
4304 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4305 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4306 | Known = KnownBits::umin(LHS: Known, RHS: Known2); |
4307 | break; |
4308 | } |
4309 | case ISD::UMAX: { |
4310 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4311 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4312 | Known = KnownBits::umax(LHS: Known, RHS: Known2); |
4313 | break; |
4314 | } |
4315 | case ISD::SMIN: |
4316 | case ISD::SMAX: { |
4317 | // If we have a clamp pattern, we know that the number of sign bits will be |
4318 | // the minimum of the clamp min/max range. |
4319 | bool IsMax = (Opcode == ISD::SMAX); |
4320 | ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; |
4321 | if ((CstLow = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts))) |
4322 | if (Op.getOperand(i: 0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) |
4323 | CstHigh = |
4324 | isConstOrConstSplat(N: Op.getOperand(i: 0).getOperand(i: 1), DemandedElts); |
4325 | if (CstLow && CstHigh) { |
4326 | if (!IsMax) |
4327 | std::swap(a&: CstLow, b&: CstHigh); |
4328 | |
4329 | const APInt &ValueLow = CstLow->getAPIntValue(); |
4330 | const APInt &ValueHigh = CstHigh->getAPIntValue(); |
4331 | if (ValueLow.sle(RHS: ValueHigh)) { |
4332 | unsigned LowSignBits = ValueLow.getNumSignBits(); |
4333 | unsigned HighSignBits = ValueHigh.getNumSignBits(); |
4334 | unsigned MinSignBits = std::min(a: LowSignBits, b: HighSignBits); |
4335 | if (ValueLow.isNegative() && ValueHigh.isNegative()) { |
4336 | Known.One.setHighBits(MinSignBits); |
4337 | break; |
4338 | } |
4339 | if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { |
4340 | Known.Zero.setHighBits(MinSignBits); |
4341 | break; |
4342 | } |
4343 | } |
4344 | } |
4345 | |
4346 | Known = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4347 | Known2 = computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4348 | if (IsMax) |
4349 | Known = KnownBits::smax(LHS: Known, RHS: Known2); |
4350 | else |
4351 | Known = KnownBits::smin(LHS: Known, RHS: Known2); |
4352 | |
4353 | // For SMAX, if CstLow is non-negative we know the result will be |
4354 | // non-negative and thus all sign bits are 0. |
4355 | // TODO: There's an equivalent of this for smin with negative constant for |
4356 | // known ones. |
4357 | if (IsMax && CstLow) { |
4358 | const APInt &ValueLow = CstLow->getAPIntValue(); |
4359 | if (ValueLow.isNonNegative()) { |
4360 | unsigned SignBits = ComputeNumSignBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
4361 | Known.Zero.setHighBits(std::min(a: SignBits, b: ValueLow.getNumSignBits())); |
4362 | } |
4363 | } |
4364 | |
4365 | break; |
4366 | } |
4367 | case ISD::UINT_TO_FP: { |
4368 | Known.makeNonNegative(); |
4369 | break; |
4370 | } |
4371 | case ISD::SINT_TO_FP: { |
4372 | Known2 = computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4373 | if (Known2.isNonNegative()) |
4374 | Known.makeNonNegative(); |
4375 | else if (Known2.isNegative()) |
4376 | Known.makeNegative(); |
4377 | break; |
4378 | } |
4379 | case ISD::FP_TO_UINT_SAT: { |
4380 | // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT. |
4381 | EVT VT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT(); |
4382 | Known.Zero |= APInt::getBitsSetFrom(numBits: BitWidth, loBit: VT.getScalarSizeInBits()); |
4383 | break; |
4384 | } |
4385 | case ISD::ATOMIC_LOAD: { |
4386 | // If we are looking at the loaded value. |
4387 | if (Op.getResNo() == 0) { |
4388 | auto *AT = cast<AtomicSDNode>(Val&: Op); |
4389 | unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits(); |
4390 | KnownBits KnownScalarMemory(ScalarMemorySize); |
4391 | if (const MDNode *MD = AT->getRanges()) |
4392 | computeKnownBitsFromRangeMetadata(Ranges: *MD, Known&: KnownScalarMemory); |
4393 | |
4394 | switch (AT->getExtensionType()) { |
4395 | case ISD::ZEXTLOAD: |
4396 | Known = KnownScalarMemory.zext(BitWidth); |
4397 | break; |
4398 | case ISD::SEXTLOAD: |
4399 | Known = KnownScalarMemory.sext(BitWidth); |
4400 | break; |
4401 | case ISD::EXTLOAD: |
4402 | switch (TLI->getExtendForAtomicOps()) { |
4403 | case ISD::ZERO_EXTEND: |
4404 | Known = KnownScalarMemory.zext(BitWidth); |
4405 | break; |
4406 | case ISD::SIGN_EXTEND: |
4407 | Known = KnownScalarMemory.sext(BitWidth); |
4408 | break; |
4409 | default: |
4410 | Known = KnownScalarMemory.anyext(BitWidth); |
4411 | break; |
4412 | } |
4413 | break; |
4414 | case ISD::NON_EXTLOAD: |
4415 | Known = KnownScalarMemory; |
4416 | break; |
4417 | } |
4418 | assert(Known.getBitWidth() == BitWidth); |
4419 | } |
4420 | break; |
4421 | } |
4422 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
4423 | if (Op.getResNo() == 1) { |
4424 | // The boolean result conforms to getBooleanContents. |
4425 | // If we know the result of a setcc has the top bits zero, use this info. |
4426 | // We know that we have an integer-based boolean since these operations |
4427 | // are only available for integer. |
4428 | if (TLI->getBooleanContents(isVec: Op.getValueType().isVector(), isFloat: false) == |
4429 | TargetLowering::ZeroOrOneBooleanContent && |
4430 | BitWidth > 1) |
4431 | Known.Zero.setBitsFrom(1); |
4432 | break; |
4433 | } |
4434 | [[fallthrough]]; |
4435 | case ISD::ATOMIC_CMP_SWAP: |
4436 | case ISD::ATOMIC_SWAP: |
4437 | case ISD::ATOMIC_LOAD_ADD: |
4438 | case ISD::ATOMIC_LOAD_SUB: |
4439 | case ISD::ATOMIC_LOAD_AND: |
4440 | case ISD::ATOMIC_LOAD_CLR: |
4441 | case ISD::ATOMIC_LOAD_OR: |
4442 | case ISD::ATOMIC_LOAD_XOR: |
4443 | case ISD::ATOMIC_LOAD_NAND: |
4444 | case ISD::ATOMIC_LOAD_MIN: |
4445 | case ISD::ATOMIC_LOAD_MAX: |
4446 | case ISD::ATOMIC_LOAD_UMIN: |
4447 | case ISD::ATOMIC_LOAD_UMAX: { |
4448 | // If we are looking at the loaded value. |
4449 | if (Op.getResNo() == 0) { |
4450 | auto *AT = cast<AtomicSDNode>(Val&: Op); |
4451 | unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits(); |
4452 | |
4453 | if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) |
4454 | Known.Zero.setBitsFrom(MemBits); |
4455 | } |
4456 | break; |
4457 | } |
4458 | case ISD::FrameIndex: |
4459 | case ISD::TargetFrameIndex: |
4460 | TLI->computeKnownBitsForFrameIndex(FIOp: cast<FrameIndexSDNode>(Val&: Op)->getIndex(), |
4461 | Known, MF: getMachineFunction()); |
4462 | break; |
4463 | |
4464 | default: |
4465 | if (Opcode < ISD::BUILTIN_OP_END) |
4466 | break; |
4467 | [[fallthrough]]; |
4468 | case ISD::INTRINSIC_WO_CHAIN: |
4469 | case ISD::INTRINSIC_W_CHAIN: |
4470 | case ISD::INTRINSIC_VOID: |
4471 | // TODO: Probably okay to remove after audit; here to reduce change size |
4472 | // in initial enablement patch for scalable vectors |
4473 | if (Op.getValueType().isScalableVector()) |
4474 | break; |
4475 | |
4476 | // Allow the target to implement this method for its nodes. |
4477 | TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: *this, Depth); |
4478 | break; |
4479 | } |
4480 | |
4481 | return Known; |
4482 | } |
4483 | |
4484 | /// Convert ConstantRange OverflowResult into SelectionDAG::OverflowKind. |
4485 | static SelectionDAG::OverflowKind mapOverflowResult(ConstantRange::OverflowResult OR) { |
4486 | switch (OR) { |
4487 | case ConstantRange::OverflowResult::MayOverflow: |
4488 | return SelectionDAG::OFK_Sometime; |
4489 | case ConstantRange::OverflowResult::AlwaysOverflowsLow: |
4490 | case ConstantRange::OverflowResult::AlwaysOverflowsHigh: |
4491 | return SelectionDAG::OFK_Always; |
4492 | case ConstantRange::OverflowResult::NeverOverflows: |
4493 | return SelectionDAG::OFK_Never; |
4494 | } |
4495 | llvm_unreachable("Unknown OverflowResult"); |
4496 | } |
4497 | |
4498 | SelectionDAG::OverflowKind |
4499 | SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const { |
4500 | // X + 0 never overflow |
4501 | if (isNullConstant(V: N1)) |
4502 | return OFK_Never; |
4503 | |
4504 | // If both operands each have at least two sign bits, the addition |
4505 | // cannot overflow. |
4506 | if (ComputeNumSignBits(Op: N0) > 1 && ComputeNumSignBits(Op: N1) > 1) |
4507 | return OFK_Never; |
4508 | |
4509 | // TODO: Add ConstantRange::signedAddMayOverflow handling. |
4510 | return OFK_Sometime; |
4511 | } |
4512 | |
4513 | SelectionDAG::OverflowKind |
4514 | SelectionDAG::computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const { |
4515 | // X + 0 never overflow |
4516 | if (isNullConstant(V: N1)) |
4517 | return OFK_Never; |
4518 | |
4519 | // mulhi + 1 never overflow |
4520 | KnownBits N1Known = computeKnownBits(Op: N1); |
4521 | if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && |
4522 | N1Known.getMaxValue().ult(RHS: 2)) |
4523 | return OFK_Never; |
4524 | |
4525 | KnownBits N0Known = computeKnownBits(Op: N0); |
4526 | if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1 && |
4527 | N0Known.getMaxValue().ult(RHS: 2)) |
4528 | return OFK_Never; |
4529 | |
4530 | // Fallback to ConstantRange::unsignedAddMayOverflow handling. |
4531 | ConstantRange N0Range = ConstantRange::fromKnownBits(Known: N0Known, IsSigned: false); |
4532 | ConstantRange N1Range = ConstantRange::fromKnownBits(Known: N1Known, IsSigned: false); |
4533 | return mapOverflowResult(OR: N0Range.unsignedAddMayOverflow(Other: N1Range)); |
4534 | } |
4535 | |
4536 | SelectionDAG::OverflowKind |
4537 | SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const { |
4538 | // X - 0 never overflow |
4539 | if (isNullConstant(V: N1)) |
4540 | return OFK_Never; |
4541 | |
4542 | // If both operands each have at least two sign bits, the subtraction |
4543 | // cannot overflow. |
4544 | if (ComputeNumSignBits(Op: N0) > 1 && ComputeNumSignBits(Op: N1) > 1) |
4545 | return OFK_Never; |
4546 | |
4547 | KnownBits N0Known = computeKnownBits(Op: N0); |
4548 | KnownBits N1Known = computeKnownBits(Op: N1); |
4549 | ConstantRange N0Range = ConstantRange::fromKnownBits(Known: N0Known, IsSigned: true); |
4550 | ConstantRange N1Range = ConstantRange::fromKnownBits(Known: N1Known, IsSigned: true); |
4551 | return mapOverflowResult(OR: N0Range.signedSubMayOverflow(Other: N1Range)); |
4552 | } |
4553 | |
4554 | SelectionDAG::OverflowKind |
4555 | SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const { |
4556 | // X - 0 never overflow |
4557 | if (isNullConstant(V: N1)) |
4558 | return OFK_Never; |
4559 | |
4560 | KnownBits N0Known = computeKnownBits(Op: N0); |
4561 | KnownBits N1Known = computeKnownBits(Op: N1); |
4562 | ConstantRange N0Range = ConstantRange::fromKnownBits(Known: N0Known, IsSigned: false); |
4563 | ConstantRange N1Range = ConstantRange::fromKnownBits(Known: N1Known, IsSigned: false); |
4564 | return mapOverflowResult(OR: N0Range.unsignedSubMayOverflow(Other: N1Range)); |
4565 | } |
4566 | |
4567 | SelectionDAG::OverflowKind |
4568 | SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const { |
4569 | // X * 0 and X * 1 never overflow. |
4570 | if (isNullConstant(V: N1) || isOneConstant(V: N1)) |
4571 | return OFK_Never; |
4572 | |
4573 | KnownBits N0Known = computeKnownBits(Op: N0); |
4574 | KnownBits N1Known = computeKnownBits(Op: N1); |
4575 | ConstantRange N0Range = ConstantRange::fromKnownBits(Known: N0Known, IsSigned: false); |
4576 | ConstantRange N1Range = ConstantRange::fromKnownBits(Known: N1Known, IsSigned: false); |
4577 | return mapOverflowResult(OR: N0Range.unsignedMulMayOverflow(Other: N1Range)); |
4578 | } |
4579 | |
4580 | SelectionDAG::OverflowKind |
4581 | SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const { |
4582 | // X * 0 and X * 1 never overflow. |
4583 | if (isNullConstant(V: N1) || isOneConstant(V: N1)) |
4584 | return OFK_Never; |
4585 | |
4586 | // Get the size of the result. |
4587 | unsigned BitWidth = N0.getScalarValueSizeInBits(); |
4588 | |
4589 | // Sum of the sign bits. |
4590 | unsigned SignBits = ComputeNumSignBits(Op: N0) + ComputeNumSignBits(Op: N1); |
4591 | |
4592 | // If we have enough sign bits, then there's no overflow. |
4593 | if (SignBits > BitWidth + 1) |
4594 | return OFK_Never; |
4595 | |
4596 | if (SignBits == BitWidth + 1) { |
4597 | // The overflow occurs when the true multiplication of the |
4598 | // the operands is the minimum negative number. |
4599 | KnownBits N0Known = computeKnownBits(Op: N0); |
4600 | KnownBits N1Known = computeKnownBits(Op: N1); |
4601 | // If one of the operands is non-negative, then there's no |
4602 | // overflow. |
4603 | if (N0Known.isNonNegative() || N1Known.isNonNegative()) |
4604 | return OFK_Never; |
4605 | } |
4606 | |
4607 | return OFK_Sometime; |
4608 | } |
4609 | |
4610 | bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { |
4611 | if (Depth >= MaxRecursionDepth) |
4612 | return false; // Limit search depth. |
4613 | |
4614 | EVT OpVT = Val.getValueType(); |
4615 | unsigned BitWidth = OpVT.getScalarSizeInBits(); |
4616 | |
4617 | // Is the constant a known power of 2? |
4618 | if (ISD::matchUnaryPredicate(Op: Val, Match: [BitWidth](ConstantSDNode *C) { |
4619 | return C->getAPIntValue().zextOrTrunc(width: BitWidth).isPowerOf2(); |
4620 | })) |
4621 | return true; |
4622 | |
4623 | // A left-shift of a constant one will have exactly one bit set because |
4624 | // shifting the bit off the end is undefined. |
4625 | if (Val.getOpcode() == ISD::SHL) { |
4626 | auto *C = isConstOrConstSplat(N: Val.getOperand(i: 0)); |
4627 | if (C && C->getAPIntValue() == 1) |
4628 | return true; |
4629 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1) && |
4630 | isKnownNeverZero(Op: Val, Depth); |
4631 | } |
4632 | |
4633 | // Similarly, a logical right-shift of a constant sign-bit will have exactly |
4634 | // one bit set. |
4635 | if (Val.getOpcode() == ISD::SRL) { |
4636 | auto *C = isConstOrConstSplat(N: Val.getOperand(i: 0)); |
4637 | if (C && C->getAPIntValue().isSignMask()) |
4638 | return true; |
4639 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1) && |
4640 | isKnownNeverZero(Op: Val, Depth); |
4641 | } |
4642 | |
4643 | if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR) |
4644 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1); |
4645 | |
4646 | // Are all operands of a build vector constant powers of two? |
4647 | if (Val.getOpcode() == ISD::BUILD_VECTOR) |
4648 | if (llvm::all_of(Range: Val->ops(), P: [BitWidth](SDValue E) { |
4649 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: E)) |
4650 | return C->getAPIntValue().zextOrTrunc(width: BitWidth).isPowerOf2(); |
4651 | return false; |
4652 | })) |
4653 | return true; |
4654 | |
4655 | // Is the operand of a splat vector a constant power of two? |
4656 | if (Val.getOpcode() == ISD::SPLAT_VECTOR) |
4657 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Val->getOperand(Num: 0))) |
4658 | if (C->getAPIntValue().zextOrTrunc(width: BitWidth).isPowerOf2()) |
4659 | return true; |
4660 | |
4661 | // vscale(power-of-two) is a power-of-two for some targets |
4662 | if (Val.getOpcode() == ISD::VSCALE && |
4663 | getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() && |
4664 | isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1)) |
4665 | return true; |
4666 | |
4667 | if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX || |
4668 | Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX) |
4669 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 1), Depth: Depth + 1) && |
4670 | isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1); |
4671 | |
4672 | if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT) |
4673 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 2), Depth: Depth + 1) && |
4674 | isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 1), Depth: Depth + 1); |
4675 | |
4676 | // Looking for `x & -x` pattern: |
4677 | // If x == 0: |
4678 | // x & -x -> 0 |
4679 | // If x != 0: |
4680 | // x & -x -> non-zero pow2 |
4681 | // so if we find the pattern return whether we know `x` is non-zero. |
4682 | SDValue X; |
4683 | if (sd_match(N: Val, P: m_And(L: m_Value(N&: X), R: m_Neg(V: m_Deferred(V&: X))))) |
4684 | return isKnownNeverZero(Op: X, Depth); |
4685 | |
4686 | if (Val.getOpcode() == ISD::ZERO_EXTEND) |
4687 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1); |
4688 | |
4689 | // More could be done here, though the above checks are enough |
4690 | // to handle some common cases. |
4691 | return false; |
4692 | } |
4693 | |
4694 | bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const { |
4695 | if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(N: Val, AllowUndefs: true)) |
4696 | return C1->getValueAPF().getExactLog2Abs() >= 0; |
4697 | |
4698 | if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP) |
4699 | return isKnownToBeAPowerOfTwo(Val: Val.getOperand(i: 0), Depth: Depth + 1); |
4700 | |
4701 | return false; |
4702 | } |
4703 | |
4704 | unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { |
4705 | EVT VT = Op.getValueType(); |
4706 | |
4707 | // Since the number of lanes in a scalable vector is unknown at compile time, |
4708 | // we track one bit which is implicitly broadcast to all lanes. This means |
4709 | // that all lanes in a scalable vector are considered demanded. |
4710 | APInt DemandedElts = VT.isFixedLengthVector() |
4711 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
4712 | : APInt(1, 1); |
4713 | return ComputeNumSignBits(Op, DemandedElts, Depth); |
4714 | } |
4715 | |
4716 | unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, |
4717 | unsigned Depth) const { |
4718 | EVT VT = Op.getValueType(); |
4719 | assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); |
4720 | unsigned VTBits = VT.getScalarSizeInBits(); |
4721 | unsigned NumElts = DemandedElts.getBitWidth(); |
4722 | unsigned Tmp, Tmp2; |
4723 | unsigned FirstAnswer = 1; |
4724 | |
4725 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
4726 | const APInt &Val = C->getAPIntValue(); |
4727 | return Val.getNumSignBits(); |
4728 | } |
4729 | |
4730 | if (Depth >= MaxRecursionDepth) |
4731 | return 1; // Limit search depth. |
4732 | |
4733 | if (!DemandedElts) |
4734 | return 1; // No demanded elts, better to assume we don't know anything. |
4735 | |
4736 | unsigned Opcode = Op.getOpcode(); |
4737 | switch (Opcode) { |
4738 | default: break; |
4739 | case ISD::AssertSext: |
4740 | Tmp = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT().getSizeInBits(); |
4741 | return VTBits-Tmp+1; |
4742 | case ISD::AssertZext: |
4743 | Tmp = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT().getSizeInBits(); |
4744 | return VTBits-Tmp; |
4745 | case ISD::MERGE_VALUES: |
4746 | return ComputeNumSignBits(Op: Op.getOperand(i: Op.getResNo()), DemandedElts, |
4747 | Depth: Depth + 1); |
4748 | case ISD::SPLAT_VECTOR: { |
4749 | // Check if the sign bits of source go down as far as the truncated value. |
4750 | unsigned NumSrcBits = Op.getOperand(i: 0).getValueSizeInBits(); |
4751 | unsigned NumSrcSignBits = ComputeNumSignBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
4752 | if (NumSrcSignBits > (NumSrcBits - VTBits)) |
4753 | return NumSrcSignBits - (NumSrcBits - VTBits); |
4754 | break; |
4755 | } |
4756 | case ISD::BUILD_VECTOR: |
4757 | assert(!VT.isScalableVector()); |
4758 | Tmp = VTBits; |
4759 | for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { |
4760 | if (!DemandedElts[i]) |
4761 | continue; |
4762 | |
4763 | SDValue SrcOp = Op.getOperand(i); |
4764 | // BUILD_VECTOR can implicitly truncate sources, we handle this specially |
4765 | // for constant nodes to ensure we only look at the sign bits. |
4766 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: SrcOp)) { |
4767 | APInt T = C->getAPIntValue().trunc(width: VTBits); |
4768 | Tmp2 = T.getNumSignBits(); |
4769 | } else { |
4770 | Tmp2 = ComputeNumSignBits(Op: SrcOp, Depth: Depth + 1); |
4771 | |
4772 | if (SrcOp.getValueSizeInBits() != VTBits) { |
4773 | assert(SrcOp.getValueSizeInBits() > VTBits && |
4774 | "Expected BUILD_VECTOR implicit truncation"); |
4775 | unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits; |
4776 | Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1); |
4777 | } |
4778 | } |
4779 | Tmp = std::min(a: Tmp, b: Tmp2); |
4780 | } |
4781 | return Tmp; |
4782 | |
4783 | case ISD::VECTOR_SHUFFLE: { |
4784 | // Collect the minimum number of sign bits that are shared by every vector |
4785 | // element referenced by the shuffle. |
4786 | APInt DemandedLHS, DemandedRHS; |
4787 | const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val&: Op); |
4788 | assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); |
4789 | if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: SVN->getMask(), DemandedElts, |
4790 | DemandedLHS, DemandedRHS)) |
4791 | return 1; |
4792 | |
4793 | Tmp = std::numeric_limits<unsigned>::max(); |
4794 | if (!!DemandedLHS) |
4795 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts: DemandedLHS, Depth: Depth + 1); |
4796 | if (!!DemandedRHS) { |
4797 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts: DemandedRHS, Depth: Depth + 1); |
4798 | Tmp = std::min(a: Tmp, b: Tmp2); |
4799 | } |
4800 | // If we don't know anything, early out and try computeKnownBits fall-back. |
4801 | if (Tmp == 1) |
4802 | break; |
4803 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
4804 | return Tmp; |
4805 | } |
4806 | |
4807 | case ISD::BITCAST: { |
4808 | if (VT.isScalableVector()) |
4809 | break; |
4810 | SDValue N0 = Op.getOperand(i: 0); |
4811 | EVT SrcVT = N0.getValueType(); |
4812 | unsigned SrcBits = SrcVT.getScalarSizeInBits(); |
4813 | |
4814 | // Ignore bitcasts from unsupported types.. |
4815 | if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint())) |
4816 | break; |
4817 | |
4818 | // Fast handling of 'identity' bitcasts. |
4819 | if (VTBits == SrcBits) |
4820 | return ComputeNumSignBits(Op: N0, DemandedElts, Depth: Depth + 1); |
4821 | |
4822 | bool IsLE = getDataLayout().isLittleEndian(); |
4823 | |
4824 | // Bitcast 'large element' scalar/vector to 'small element' vector. |
4825 | if ((SrcBits % VTBits) == 0) { |
4826 | assert(VT.isVector() && "Expected bitcast to vector"); |
4827 | |
4828 | unsigned Scale = SrcBits / VTBits; |
4829 | APInt SrcDemandedElts = |
4830 | APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumElts / Scale); |
4831 | |
4832 | // Fast case - sign splat can be simply split across the small elements. |
4833 | Tmp = ComputeNumSignBits(Op: N0, DemandedElts: SrcDemandedElts, Depth: Depth + 1); |
4834 | if (Tmp == SrcBits) |
4835 | return VTBits; |
4836 | |
4837 | // Slow case - determine how far the sign extends into each sub-element. |
4838 | Tmp2 = VTBits; |
4839 | for (unsigned i = 0; i != NumElts; ++i) |
4840 | if (DemandedElts[i]) { |
4841 | unsigned SubOffset = i % Scale; |
4842 | SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset); |
4843 | SubOffset = SubOffset * VTBits; |
4844 | if (Tmp <= SubOffset) |
4845 | return 1; |
4846 | Tmp2 = std::min(a: Tmp2, b: Tmp - SubOffset); |
4847 | } |
4848 | return Tmp2; |
4849 | } |
4850 | break; |
4851 | } |
4852 | |
4853 | case ISD::FP_TO_SINT_SAT: |
4854 | // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT. |
4855 | Tmp = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT().getScalarSizeInBits(); |
4856 | return VTBits - Tmp + 1; |
4857 | case ISD::SIGN_EXTEND: |
4858 | Tmp = VTBits - Op.getOperand(i: 0).getScalarValueSizeInBits(); |
4859 | return ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth+1) + Tmp; |
4860 | case ISD::SIGN_EXTEND_INREG: |
4861 | // Max of the input and what this extends. |
4862 | Tmp = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT().getScalarSizeInBits(); |
4863 | Tmp = VTBits-Tmp+1; |
4864 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth+1); |
4865 | return std::max(a: Tmp, b: Tmp2); |
4866 | case ISD::SIGN_EXTEND_VECTOR_INREG: { |
4867 | if (VT.isScalableVector()) |
4868 | break; |
4869 | SDValue Src = Op.getOperand(i: 0); |
4870 | EVT SrcVT = Src.getValueType(); |
4871 | APInt DemandedSrcElts = DemandedElts.zext(width: SrcVT.getVectorNumElements()); |
4872 | Tmp = VTBits - SrcVT.getScalarSizeInBits(); |
4873 | return ComputeNumSignBits(Op: Src, DemandedElts: DemandedSrcElts, Depth: Depth+1) + Tmp; |
4874 | } |
4875 | case ISD::SRA: |
4876 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4877 | // SRA X, C -> adds C sign bits. |
4878 | if (std::optional<uint64_t> ShAmt = |
4879 | getValidMinimumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) |
4880 | Tmp = std::min<uint64_t>(a: Tmp + *ShAmt, b: VTBits); |
4881 | return Tmp; |
4882 | case ISD::SHL: |
4883 | if (std::optional<ConstantRange> ShAmtRange = |
4884 | getValidShiftAmountRange(V: Op, DemandedElts, Depth: Depth + 1)) { |
4885 | uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); |
4886 | uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); |
4887 | // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are |
4888 | // shifted out, then we can compute the number of sign bits for the |
4889 | // operand being extended. A future improvement could be to pass along the |
4890 | // "shifted left by" information in the recursive calls to |
4891 | // ComputeKnownSignBits. Allowing us to handle this more generically. |
4892 | if (ISD::isExtOpcode(Opcode: Op.getOperand(i: 0).getOpcode())) { |
4893 | SDValue Ext = Op.getOperand(i: 0); |
4894 | EVT ExtVT = Ext.getValueType(); |
4895 | SDValue Extendee = Ext.getOperand(i: 0); |
4896 | EVT ExtendeeVT = Extendee.getValueType(); |
4897 | uint64_t SizeDifference = |
4898 | ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits(); |
4899 | if (SizeDifference <= MinShAmt) { |
4900 | Tmp = SizeDifference + |
4901 | ComputeNumSignBits(Op: Extendee, DemandedElts, Depth: Depth + 1); |
4902 | if (MaxShAmt < Tmp) |
4903 | return Tmp - MaxShAmt; |
4904 | } |
4905 | } |
4906 | // shl destroys sign bits, ensure it doesn't shift out all sign bits. |
4907 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4908 | if (MaxShAmt < Tmp) |
4909 | return Tmp - MaxShAmt; |
4910 | } |
4911 | break; |
4912 | case ISD::AND: |
4913 | case ISD::OR: |
4914 | case ISD::XOR: // NOT is handled here. |
4915 | // Logical binary ops preserve the number of sign bits at the worst. |
4916 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth+1); |
4917 | if (Tmp != 1) { |
4918 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth+1); |
4919 | FirstAnswer = std::min(a: Tmp, b: Tmp2); |
4920 | // We computed what we know about the sign bits as our first |
4921 | // answer. Now proceed to the generic code that uses |
4922 | // computeKnownBits, and pick whichever answer is better. |
4923 | } |
4924 | break; |
4925 | |
4926 | case ISD::SELECT: |
4927 | case ISD::VSELECT: |
4928 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth+1); |
4929 | if (Tmp == 1) return 1; // Early out. |
4930 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth+1); |
4931 | return std::min(a: Tmp, b: Tmp2); |
4932 | case ISD::SELECT_CC: |
4933 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 2), DemandedElts, Depth: Depth+1); |
4934 | if (Tmp == 1) return 1; // Early out. |
4935 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 3), DemandedElts, Depth: Depth+1); |
4936 | return std::min(a: Tmp, b: Tmp2); |
4937 | |
4938 | case ISD::SMIN: |
4939 | case ISD::SMAX: { |
4940 | // If we have a clamp pattern, we know that the number of sign bits will be |
4941 | // the minimum of the clamp min/max range. |
4942 | bool IsMax = (Opcode == ISD::SMAX); |
4943 | ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; |
4944 | if ((CstLow = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts))) |
4945 | if (Op.getOperand(i: 0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) |
4946 | CstHigh = |
4947 | isConstOrConstSplat(N: Op.getOperand(i: 0).getOperand(i: 1), DemandedElts); |
4948 | if (CstLow && CstHigh) { |
4949 | if (!IsMax) |
4950 | std::swap(a&: CstLow, b&: CstHigh); |
4951 | if (CstLow->getAPIntValue().sle(RHS: CstHigh->getAPIntValue())) { |
4952 | Tmp = CstLow->getAPIntValue().getNumSignBits(); |
4953 | Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); |
4954 | return std::min(a: Tmp, b: Tmp2); |
4955 | } |
4956 | } |
4957 | |
4958 | // Fallback - just get the minimum number of sign bits of the operands. |
4959 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4960 | if (Tmp == 1) |
4961 | return 1; // Early out. |
4962 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4963 | return std::min(a: Tmp, b: Tmp2); |
4964 | } |
4965 | case ISD::UMIN: |
4966 | case ISD::UMAX: |
4967 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
4968 | if (Tmp == 1) |
4969 | return 1; // Early out. |
4970 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
4971 | return std::min(a: Tmp, b: Tmp2); |
4972 | case ISD::SSUBO_CARRY: |
4973 | case ISD::USUBO_CARRY: |
4974 | // sub_carry(x,x,c) -> 0/-1 (sext carry) |
4975 | if (Op.getResNo() == 0 && Op.getOperand(i: 0) == Op.getOperand(i: 1)) |
4976 | return VTBits; |
4977 | [[fallthrough]]; |
4978 | case ISD::SADDO: |
4979 | case ISD::UADDO: |
4980 | case ISD::SADDO_CARRY: |
4981 | case ISD::UADDO_CARRY: |
4982 | case ISD::SSUBO: |
4983 | case ISD::USUBO: |
4984 | case ISD::SMULO: |
4985 | case ISD::UMULO: |
4986 | if (Op.getResNo() != 1) |
4987 | break; |
4988 | // The boolean result conforms to getBooleanContents. Fall through. |
4989 | // If setcc returns 0/-1, all bits are sign bits. |
4990 | // We know that we have an integer-based boolean since these operations |
4991 | // are only available for integer. |
4992 | if (TLI->getBooleanContents(isVec: VT.isVector(), isFloat: false) == |
4993 | TargetLowering::ZeroOrNegativeOneBooleanContent) |
4994 | return VTBits; |
4995 | break; |
4996 | case ISD::SETCC: |
4997 | case ISD::SETCCCARRY: |
4998 | case ISD::STRICT_FSETCC: |
4999 | case ISD::STRICT_FSETCCS: { |
5000 | unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; |
5001 | // If setcc returns 0/-1, all bits are sign bits. |
5002 | if (TLI->getBooleanContents(Type: Op.getOperand(i: OpNo).getValueType()) == |
5003 | TargetLowering::ZeroOrNegativeOneBooleanContent) |
5004 | return VTBits; |
5005 | break; |
5006 | } |
5007 | case ISD::ROTL: |
5008 | case ISD::ROTR: |
5009 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
5010 | |
5011 | // If we're rotating an 0/-1 value, then it stays an 0/-1 value. |
5012 | if (Tmp == VTBits) |
5013 | return VTBits; |
5014 | |
5015 | if (ConstantSDNode *C = |
5016 | isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts)) { |
5017 | unsigned RotAmt = C->getAPIntValue().urem(RHS: VTBits); |
5018 | |
5019 | // Handle rotate right by N like a rotate left by 32-N. |
5020 | if (Opcode == ISD::ROTR) |
5021 | RotAmt = (VTBits - RotAmt) % VTBits; |
5022 | |
5023 | // If we aren't rotating out all of the known-in sign bits, return the |
5024 | // number that are left. This handles rotl(sext(x), 1) for example. |
5025 | if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); |
5026 | } |
5027 | break; |
5028 | case ISD::ADD: |
5029 | case ISD::ADDC: |
5030 | // Add can have at most one carry bit. Thus we know that the output |
5031 | // is, at worst, one more bit than the inputs. |
5032 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
5033 | if (Tmp == 1) return 1; // Early out. |
5034 | |
5035 | // Special case decrementing a value (ADD X, -1): |
5036 | if (ConstantSDNode *CRHS = |
5037 | isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts)) |
5038 | if (CRHS->isAllOnes()) { |
5039 | KnownBits Known = |
5040 | computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
5041 | |
5042 | // If the input is known to be 0 or 1, the output is 0/-1, which is all |
5043 | // sign bits set. |
5044 | if ((Known.Zero | 1).isAllOnes()) |
5045 | return VTBits; |
5046 | |
5047 | // If we are subtracting one from a positive number, there is no carry |
5048 | // out of the result. |
5049 | if (Known.isNonNegative()) |
5050 | return Tmp; |
5051 | } |
5052 | |
5053 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
5054 | if (Tmp2 == 1) return 1; // Early out. |
5055 | return std::min(a: Tmp, b: Tmp2) - 1; |
5056 | case ISD::SUB: |
5057 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
5058 | if (Tmp2 == 1) return 1; // Early out. |
5059 | |
5060 | // Handle NEG. |
5061 | if (ConstantSDNode *CLHS = |
5062 | isConstOrConstSplat(N: Op.getOperand(i: 0), DemandedElts)) |
5063 | if (CLHS->isZero()) { |
5064 | KnownBits Known = |
5065 | computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
5066 | // If the input is known to be 0 or 1, the output is 0/-1, which is all |
5067 | // sign bits set. |
5068 | if ((Known.Zero | 1).isAllOnes()) |
5069 | return VTBits; |
5070 | |
5071 | // If the input is known to be positive (the sign bit is known clear), |
5072 | // the output of the NEG has the same number of sign bits as the input. |
5073 | if (Known.isNonNegative()) |
5074 | return Tmp2; |
5075 | |
5076 | // Otherwise, we treat this like a SUB. |
5077 | } |
5078 | |
5079 | // Sub can have at most one carry bit. Thus we know that the output |
5080 | // is, at worst, one more bit than the inputs. |
5081 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
5082 | if (Tmp == 1) return 1; // Early out. |
5083 | return std::min(a: Tmp, b: Tmp2) - 1; |
5084 | case ISD::MUL: { |
5085 | // The output of the Mul can be at most twice the valid bits in the inputs. |
5086 | unsigned SignBitsOp0 = ComputeNumSignBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5087 | if (SignBitsOp0 == 1) |
5088 | break; |
5089 | unsigned SignBitsOp1 = ComputeNumSignBits(Op: Op.getOperand(i: 1), Depth: Depth + 1); |
5090 | if (SignBitsOp1 == 1) |
5091 | break; |
5092 | unsigned OutValidBits = |
5093 | (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); |
5094 | return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; |
5095 | } |
5096 | case ISD::AVGCEILS: |
5097 | case ISD::AVGFLOORS: |
5098 | Tmp = ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
5099 | if (Tmp == 1) |
5100 | return 1; // Early out. |
5101 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
5102 | return std::min(a: Tmp, b: Tmp2); |
5103 | case ISD::SREM: |
5104 | // The sign bit is the LHS's sign bit, except when the result of the |
5105 | // remainder is zero. The magnitude of the result should be less than or |
5106 | // equal to the magnitude of the LHS. Therefore, the result should have |
5107 | // at least as many sign bits as the left hand side. |
5108 | return ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
5109 | case ISD::TRUNCATE: { |
5110 | // Check if the sign bits of source go down as far as the truncated value. |
5111 | unsigned NumSrcBits = Op.getOperand(i: 0).getScalarValueSizeInBits(); |
5112 | unsigned NumSrcSignBits = ComputeNumSignBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5113 | if (NumSrcSignBits > (NumSrcBits - VTBits)) |
5114 | return NumSrcSignBits - (NumSrcBits - VTBits); |
5115 | break; |
5116 | } |
5117 | case ISD::EXTRACT_ELEMENT: { |
5118 | if (VT.isScalableVector()) |
5119 | break; |
5120 | const int KnownSign = ComputeNumSignBits(Op: Op.getOperand(i: 0), Depth: Depth+1); |
5121 | const int BitWidth = Op.getValueSizeInBits(); |
5122 | const int Items = Op.getOperand(i: 0).getValueSizeInBits() / BitWidth; |
5123 | |
5124 | // Get reverse index (starting from 1), Op1 value indexes elements from |
5125 | // little end. Sign starts at big end. |
5126 | const int rIndex = Items - 1 - Op.getConstantOperandVal(i: 1); |
5127 | |
5128 | // If the sign portion ends in our element the subtraction gives correct |
5129 | // result. Otherwise it gives either negative or > bitwidth result |
5130 | return std::clamp(val: KnownSign - rIndex * BitWidth, lo: 0, hi: BitWidth); |
5131 | } |
5132 | case ISD::INSERT_VECTOR_ELT: { |
5133 | if (VT.isScalableVector()) |
5134 | break; |
5135 | // If we know the element index, split the demand between the |
5136 | // source vector and the inserted element, otherwise assume we need |
5137 | // the original demanded vector elements and the value. |
5138 | SDValue InVec = Op.getOperand(i: 0); |
5139 | SDValue InVal = Op.getOperand(i: 1); |
5140 | SDValue EltNo = Op.getOperand(i: 2); |
5141 | bool DemandedVal = true; |
5142 | APInt DemandedVecElts = DemandedElts; |
5143 | auto *CEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo); |
5144 | if (CEltNo && CEltNo->getAPIntValue().ult(RHS: NumElts)) { |
5145 | unsigned EltIdx = CEltNo->getZExtValue(); |
5146 | DemandedVal = !!DemandedElts[EltIdx]; |
5147 | DemandedVecElts.clearBit(BitPosition: EltIdx); |
5148 | } |
5149 | Tmp = std::numeric_limits<unsigned>::max(); |
5150 | if (DemandedVal) { |
5151 | // TODO - handle implicit truncation of inserted elements. |
5152 | if (InVal.getScalarValueSizeInBits() != VTBits) |
5153 | break; |
5154 | Tmp2 = ComputeNumSignBits(Op: InVal, Depth: Depth + 1); |
5155 | Tmp = std::min(a: Tmp, b: Tmp2); |
5156 | } |
5157 | if (!!DemandedVecElts) { |
5158 | Tmp2 = ComputeNumSignBits(Op: InVec, DemandedElts: DemandedVecElts, Depth: Depth + 1); |
5159 | Tmp = std::min(a: Tmp, b: Tmp2); |
5160 | } |
5161 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
5162 | return Tmp; |
5163 | } |
5164 | case ISD::EXTRACT_VECTOR_ELT: { |
5165 | assert(!VT.isScalableVector()); |
5166 | SDValue InVec = Op.getOperand(i: 0); |
5167 | SDValue EltNo = Op.getOperand(i: 1); |
5168 | EVT VecVT = InVec.getValueType(); |
5169 | // ComputeNumSignBits not yet implemented for scalable vectors. |
5170 | if (VecVT.isScalableVector()) |
5171 | break; |
5172 | const unsigned BitWidth = Op.getValueSizeInBits(); |
5173 | const unsigned EltBitWidth = Op.getOperand(i: 0).getScalarValueSizeInBits(); |
5174 | const unsigned NumSrcElts = VecVT.getVectorNumElements(); |
5175 | |
5176 | // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know |
5177 | // anything about sign bits. But if the sizes match we can derive knowledge |
5178 | // about sign bits from the vector operand. |
5179 | if (BitWidth != EltBitWidth) |
5180 | break; |
5181 | |
5182 | // If we know the element index, just demand that vector element, else for |
5183 | // an unknown element index, ignore DemandedElts and demand them all. |
5184 | APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts); |
5185 | auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo); |
5186 | if (ConstEltNo && ConstEltNo->getAPIntValue().ult(RHS: NumSrcElts)) |
5187 | DemandedSrcElts = |
5188 | APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue()); |
5189 | |
5190 | return ComputeNumSignBits(Op: InVec, DemandedElts: DemandedSrcElts, Depth: Depth + 1); |
5191 | } |
5192 | case ISD::EXTRACT_SUBVECTOR: { |
5193 | // Offset the demanded elts by the subvector index. |
5194 | SDValue Src = Op.getOperand(i: 0); |
5195 | // Bail until we can represent demanded elements for scalable vectors. |
5196 | if (Src.getValueType().isScalableVector()) |
5197 | break; |
5198 | uint64_t Idx = Op.getConstantOperandVal(i: 1); |
5199 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
5200 | APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx); |
5201 | return ComputeNumSignBits(Op: Src, DemandedElts: DemandedSrcElts, Depth: Depth + 1); |
5202 | } |
5203 | case ISD::CONCAT_VECTORS: { |
5204 | if (VT.isScalableVector()) |
5205 | break; |
5206 | // Determine the minimum number of sign bits across all demanded |
5207 | // elts of the input vectors. Early out if the result is already 1. |
5208 | Tmp = std::numeric_limits<unsigned>::max(); |
5209 | EVT SubVectorVT = Op.getOperand(i: 0).getValueType(); |
5210 | unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); |
5211 | unsigned NumSubVectors = Op.getNumOperands(); |
5212 | for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { |
5213 | APInt DemandedSub = |
5214 | DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: i * NumSubVectorElts); |
5215 | if (!DemandedSub) |
5216 | continue; |
5217 | Tmp2 = ComputeNumSignBits(Op: Op.getOperand(i), DemandedElts: DemandedSub, Depth: Depth + 1); |
5218 | Tmp = std::min(a: Tmp, b: Tmp2); |
5219 | } |
5220 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
5221 | return Tmp; |
5222 | } |
5223 | case ISD::INSERT_SUBVECTOR: { |
5224 | if (VT.isScalableVector()) |
5225 | break; |
5226 | // Demand any elements from the subvector and the remainder from the src its |
5227 | // inserted into. |
5228 | SDValue Src = Op.getOperand(i: 0); |
5229 | SDValue Sub = Op.getOperand(i: 1); |
5230 | uint64_t Idx = Op.getConstantOperandVal(i: 2); |
5231 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
5232 | APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx); |
5233 | APInt DemandedSrcElts = DemandedElts; |
5234 | DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts); |
5235 | |
5236 | Tmp = std::numeric_limits<unsigned>::max(); |
5237 | if (!!DemandedSubElts) { |
5238 | Tmp = ComputeNumSignBits(Op: Sub, DemandedElts: DemandedSubElts, Depth: Depth + 1); |
5239 | if (Tmp == 1) |
5240 | return 1; // early-out |
5241 | } |
5242 | if (!!DemandedSrcElts) { |
5243 | Tmp2 = ComputeNumSignBits(Op: Src, DemandedElts: DemandedSrcElts, Depth: Depth + 1); |
5244 | Tmp = std::min(a: Tmp, b: Tmp2); |
5245 | } |
5246 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
5247 | return Tmp; |
5248 | } |
5249 | case ISD::LOAD: { |
5250 | LoadSDNode *LD = cast<LoadSDNode>(Val&: Op); |
5251 | if (const MDNode *Ranges = LD->getRanges()) { |
5252 | if (DemandedElts != 1) |
5253 | break; |
5254 | |
5255 | ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges); |
5256 | if (VTBits > CR.getBitWidth()) { |
5257 | switch (LD->getExtensionType()) { |
5258 | case ISD::SEXTLOAD: |
5259 | CR = CR.signExtend(BitWidth: VTBits); |
5260 | break; |
5261 | case ISD::ZEXTLOAD: |
5262 | CR = CR.zeroExtend(BitWidth: VTBits); |
5263 | break; |
5264 | default: |
5265 | break; |
5266 | } |
5267 | } |
5268 | |
5269 | if (VTBits != CR.getBitWidth()) |
5270 | break; |
5271 | return std::min(a: CR.getSignedMin().getNumSignBits(), |
5272 | b: CR.getSignedMax().getNumSignBits()); |
5273 | } |
5274 | |
5275 | break; |
5276 | } |
5277 | case ISD::ATOMIC_CMP_SWAP: |
5278 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
5279 | case ISD::ATOMIC_SWAP: |
5280 | case ISD::ATOMIC_LOAD_ADD: |
5281 | case ISD::ATOMIC_LOAD_SUB: |
5282 | case ISD::ATOMIC_LOAD_AND: |
5283 | case ISD::ATOMIC_LOAD_CLR: |
5284 | case ISD::ATOMIC_LOAD_OR: |
5285 | case ISD::ATOMIC_LOAD_XOR: |
5286 | case ISD::ATOMIC_LOAD_NAND: |
5287 | case ISD::ATOMIC_LOAD_MIN: |
5288 | case ISD::ATOMIC_LOAD_MAX: |
5289 | case ISD::ATOMIC_LOAD_UMIN: |
5290 | case ISD::ATOMIC_LOAD_UMAX: |
5291 | case ISD::ATOMIC_LOAD: { |
5292 | auto *AT = cast<AtomicSDNode>(Val&: Op); |
5293 | // If we are looking at the loaded value. |
5294 | if (Op.getResNo() == 0) { |
5295 | Tmp = AT->getMemoryVT().getScalarSizeInBits(); |
5296 | if (Tmp == VTBits) |
5297 | return 1; // early-out |
5298 | |
5299 | // For atomic_load, prefer to use the extension type. |
5300 | if (Op->getOpcode() == ISD::ATOMIC_LOAD) { |
5301 | switch (AT->getExtensionType()) { |
5302 | default: |
5303 | break; |
5304 | case ISD::SEXTLOAD: |
5305 | return VTBits - Tmp + 1; |
5306 | case ISD::ZEXTLOAD: |
5307 | return VTBits - Tmp; |
5308 | } |
5309 | } |
5310 | |
5311 | if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND) |
5312 | return VTBits - Tmp + 1; |
5313 | if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) |
5314 | return VTBits - Tmp; |
5315 | } |
5316 | break; |
5317 | } |
5318 | } |
5319 | |
5320 | // If we are looking at the loaded value of the SDNode. |
5321 | if (Op.getResNo() == 0) { |
5322 | // Handle LOADX separately here. EXTLOAD case will fallthrough. |
5323 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op)) { |
5324 | unsigned ExtType = LD->getExtensionType(); |
5325 | switch (ExtType) { |
5326 | default: break; |
5327 | case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. |
5328 | Tmp = LD->getMemoryVT().getScalarSizeInBits(); |
5329 | return VTBits - Tmp + 1; |
5330 | case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. |
5331 | Tmp = LD->getMemoryVT().getScalarSizeInBits(); |
5332 | return VTBits - Tmp; |
5333 | case ISD::NON_EXTLOAD: |
5334 | if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { |
5335 | // We only need to handle vectors - computeKnownBits should handle |
5336 | // scalar cases. |
5337 | Type *CstTy = Cst->getType(); |
5338 | if (CstTy->isVectorTy() && !VT.isScalableVector() && |
5339 | (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() && |
5340 | VTBits == CstTy->getScalarSizeInBits()) { |
5341 | Tmp = VTBits; |
5342 | for (unsigned i = 0; i != NumElts; ++i) { |
5343 | if (!DemandedElts[i]) |
5344 | continue; |
5345 | if (Constant *Elt = Cst->getAggregateElement(Elt: i)) { |
5346 | if (auto *CInt = dyn_cast<ConstantInt>(Val: Elt)) { |
5347 | const APInt &Value = CInt->getValue(); |
5348 | Tmp = std::min(a: Tmp, b: Value.getNumSignBits()); |
5349 | continue; |
5350 | } |
5351 | if (auto *CFP = dyn_cast<ConstantFP>(Val: Elt)) { |
5352 | APInt Value = CFP->getValueAPF().bitcastToAPInt(); |
5353 | Tmp = std::min(a: Tmp, b: Value.getNumSignBits()); |
5354 | continue; |
5355 | } |
5356 | } |
5357 | // Unknown type. Conservatively assume no bits match sign bit. |
5358 | return 1; |
5359 | } |
5360 | return Tmp; |
5361 | } |
5362 | } |
5363 | break; |
5364 | } |
5365 | } |
5366 | } |
5367 | |
5368 | // Allow the target to implement this method for its nodes. |
5369 | if (Opcode >= ISD::BUILTIN_OP_END || |
5370 | Opcode == ISD::INTRINSIC_WO_CHAIN || |
5371 | Opcode == ISD::INTRINSIC_W_CHAIN || |
5372 | Opcode == ISD::INTRINSIC_VOID) { |
5373 | // TODO: This can probably be removed once target code is audited. This |
5374 | // is here purely to reduce patch size and review complexity. |
5375 | if (!VT.isScalableVector()) { |
5376 | unsigned NumBits = |
5377 | TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, DAG: *this, Depth); |
5378 | if (NumBits > 1) |
5379 | FirstAnswer = std::max(a: FirstAnswer, b: NumBits); |
5380 | } |
5381 | } |
5382 | |
5383 | // Finally, if we can prove that the top bits of the result are 0's or 1's, |
5384 | // use this information. |
5385 | KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); |
5386 | return std::max(a: FirstAnswer, b: Known.countMinSignBits()); |
5387 | } |
5388 | |
5389 | unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, |
5390 | unsigned Depth) const { |
5391 | unsigned SignBits = ComputeNumSignBits(Op, Depth); |
5392 | return Op.getScalarValueSizeInBits() - SignBits + 1; |
5393 | } |
5394 | |
5395 | unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, |
5396 | const APInt &DemandedElts, |
5397 | unsigned Depth) const { |
5398 | unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); |
5399 | return Op.getScalarValueSizeInBits() - SignBits + 1; |
5400 | } |
5401 | |
5402 | bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, |
5403 | unsigned Depth) const { |
5404 | // Early out for FREEZE. |
5405 | if (Op.getOpcode() == ISD::FREEZE) |
5406 | return true; |
5407 | |
5408 | EVT VT = Op.getValueType(); |
5409 | APInt DemandedElts = VT.isFixedLengthVector() |
5410 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
5411 | : APInt(1, 1); |
5412 | return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); |
5413 | } |
5414 | |
5415 | bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, |
5416 | const APInt &DemandedElts, |
5417 | bool PoisonOnly, |
5418 | unsigned Depth) const { |
5419 | unsigned Opcode = Op.getOpcode(); |
5420 | |
5421 | // Early out for FREEZE. |
5422 | if (Opcode == ISD::FREEZE) |
5423 | return true; |
5424 | |
5425 | if (Depth >= MaxRecursionDepth) |
5426 | return false; // Limit search depth. |
5427 | |
5428 | if (isIntOrFPConstant(V: Op)) |
5429 | return true; |
5430 | |
5431 | switch (Opcode) { |
5432 | case ISD::CONDCODE: |
5433 | case ISD::VALUETYPE: |
5434 | case ISD::FrameIndex: |
5435 | case ISD::TargetFrameIndex: |
5436 | case ISD::CopyFromReg: |
5437 | return true; |
5438 | |
5439 | case ISD::POISON: |
5440 | return false; |
5441 | |
5442 | case ISD::UNDEF: |
5443 | return PoisonOnly; |
5444 | |
5445 | case ISD::BUILD_VECTOR: |
5446 | // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - |
5447 | // this shouldn't affect the result. |
5448 | for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) { |
5449 | if (!DemandedElts[i]) |
5450 | continue; |
5451 | if (!isGuaranteedNotToBeUndefOrPoison(Op: Op.getOperand(i), PoisonOnly, |
5452 | Depth: Depth + 1)) |
5453 | return false; |
5454 | } |
5455 | return true; |
5456 | |
5457 | case ISD::SPLAT_VECTOR: |
5458 | return isGuaranteedNotToBeUndefOrPoison(Op: Op.getOperand(i: 0), PoisonOnly, |
5459 | Depth: Depth + 1); |
5460 | |
5461 | case ISD::VECTOR_SHUFFLE: { |
5462 | APInt DemandedLHS, DemandedRHS; |
5463 | auto *SVN = cast<ShuffleVectorSDNode>(Val&: Op); |
5464 | if (!getShuffleDemandedElts(SrcWidth: DemandedElts.getBitWidth(), Mask: SVN->getMask(), |
5465 | DemandedElts, DemandedLHS, DemandedRHS, |
5466 | /*AllowUndefElts=*/false)) |
5467 | return false; |
5468 | if (!DemandedLHS.isZero() && |
5469 | !isGuaranteedNotToBeUndefOrPoison(Op: Op.getOperand(i: 0), DemandedElts: DemandedLHS, |
5470 | PoisonOnly, Depth: Depth + 1)) |
5471 | return false; |
5472 | if (!DemandedRHS.isZero() && |
5473 | !isGuaranteedNotToBeUndefOrPoison(Op: Op.getOperand(i: 1), DemandedElts: DemandedRHS, |
5474 | PoisonOnly, Depth: Depth + 1)) |
5475 | return false; |
5476 | return true; |
5477 | } |
5478 | |
5479 | // TODO: Search for noundef attributes from library functions. |
5480 | |
5481 | // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. |
5482 | |
5483 | default: |
5484 | // Allow the target to implement this method for its nodes. |
5485 | if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || |
5486 | Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) |
5487 | return TLI->isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
5488 | Op, DemandedElts, DAG: *this, PoisonOnly, Depth); |
5489 | break; |
5490 | } |
5491 | |
5492 | // If Op can't create undef/poison and none of its operands are undef/poison |
5493 | // then Op is never undef/poison. |
5494 | // NOTE: TargetNodes can handle this in themselves in |
5495 | // isGuaranteedNotToBeUndefOrPoisonForTargetNode or let |
5496 | // TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode handle it. |
5497 | return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true, |
5498 | Depth) && |
5499 | all_of(Range: Op->ops(), P: [&](SDValue V) { |
5500 | return isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly, Depth: Depth + 1); |
5501 | }); |
5502 | } |
5503 | |
5504 | bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, |
5505 | bool ConsiderFlags, |
5506 | unsigned Depth) const { |
5507 | EVT VT = Op.getValueType(); |
5508 | APInt DemandedElts = VT.isFixedLengthVector() |
5509 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
5510 | : APInt(1, 1); |
5511 | return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags, |
5512 | Depth); |
5513 | } |
5514 | |
5515 | bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, |
5516 | bool PoisonOnly, bool ConsiderFlags, |
5517 | unsigned Depth) const { |
5518 | if (ConsiderFlags && Op->hasPoisonGeneratingFlags()) |
5519 | return true; |
5520 | |
5521 | unsigned Opcode = Op.getOpcode(); |
5522 | switch (Opcode) { |
5523 | case ISD::FREEZE: |
5524 | case ISD::CONCAT_VECTORS: |
5525 | case ISD::INSERT_SUBVECTOR: |
5526 | case ISD::EXTRACT_SUBVECTOR: |
5527 | case ISD::SADDSAT: |
5528 | case ISD::UADDSAT: |
5529 | case ISD::SSUBSAT: |
5530 | case ISD::USUBSAT: |
5531 | case ISD::MULHU: |
5532 | case ISD::MULHS: |
5533 | case ISD::SMIN: |
5534 | case ISD::SMAX: |
5535 | case ISD::UMIN: |
5536 | case ISD::UMAX: |
5537 | case ISD::AND: |
5538 | case ISD::XOR: |
5539 | case ISD::ROTL: |
5540 | case ISD::ROTR: |
5541 | case ISD::FSHL: |
5542 | case ISD::FSHR: |
5543 | case ISD::BSWAP: |
5544 | case ISD::CTPOP: |
5545 | case ISD::BITREVERSE: |
5546 | case ISD::PARITY: |
5547 | case ISD::SIGN_EXTEND: |
5548 | case ISD::TRUNCATE: |
5549 | case ISD::SIGN_EXTEND_INREG: |
5550 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
5551 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
5552 | case ISD::BITCAST: |
5553 | case ISD::BUILD_VECTOR: |
5554 | case ISD::BUILD_PAIR: |
5555 | case ISD::SPLAT_VECTOR: |
5556 | case ISD::VSELECT: |
5557 | return false; |
5558 | |
5559 | case ISD::SELECT_CC: |
5560 | case ISD::SETCC: { |
5561 | // Integer setcc cannot create undef or poison. |
5562 | if (Op.getOperand(i: 0).getValueType().isInteger()) |
5563 | return false; |
5564 | |
5565 | // FP compares are more complicated. They can create poison for nan/infinity |
5566 | // based on options and flags. The options and flags also cause special |
5567 | // nonan condition codes to be used. Those condition codes may be preserved |
5568 | // even if the nonan flag is dropped somewhere. |
5569 | unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4; |
5570 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Op.getOperand(i: CCOp))->get(); |
5571 | if (((unsigned)CCCode & 0x10U)) |
5572 | return true; |
5573 | |
5574 | const TargetOptions &Options = getTarget().Options; |
5575 | return Options.NoNaNsFPMath || Options.NoInfsFPMath; |
5576 | } |
5577 | |
5578 | case ISD::OR: |
5579 | case ISD::ZERO_EXTEND: |
5580 | case ISD::ADD: |
5581 | case ISD::SUB: |
5582 | case ISD::MUL: |
5583 | case ISD::FNEG: |
5584 | case ISD::FADD: |
5585 | case ISD::FSUB: |
5586 | case ISD::FMUL: |
5587 | case ISD::FDIV: |
5588 | case ISD::FREM: |
5589 | // No poison except from flags (which is handled above) |
5590 | return false; |
5591 | |
5592 | case ISD::SHL: |
5593 | case ISD::SRL: |
5594 | case ISD::SRA: |
5595 | // If the max shift amount isn't in range, then the shift can |
5596 | // create poison. |
5597 | return !isGuaranteedNotToBeUndefOrPoison(Op: Op.getOperand(i: 1), DemandedElts, |
5598 | PoisonOnly, Depth: Depth + 1) || |
5599 | !getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1); |
5600 | |
5601 | case ISD::SCALAR_TO_VECTOR: |
5602 | // Check if we demand any upper (undef) elements. |
5603 | return !PoisonOnly && DemandedElts.ugt(RHS: 1); |
5604 | |
5605 | case ISD::INSERT_VECTOR_ELT: |
5606 | case ISD::EXTRACT_VECTOR_ELT: { |
5607 | // Ensure that the element index is in bounds. |
5608 | EVT VecVT = Op.getOperand(i: 0).getValueType(); |
5609 | SDValue Idx = Op.getOperand(i: Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1); |
5610 | if (isGuaranteedNotToBeUndefOrPoison(Op: Idx, DemandedElts, PoisonOnly, |
5611 | Depth: Depth + 1)) { |
5612 | KnownBits KnownIdx = computeKnownBits(Op: Idx, Depth: Depth + 1); |
5613 | return KnownIdx.getMaxValue().uge(RHS: VecVT.getVectorMinNumElements()); |
5614 | } |
5615 | return true; |
5616 | } |
5617 | |
5618 | case ISD::VECTOR_SHUFFLE: { |
5619 | // Check for any demanded shuffle element that is undef. |
5620 | auto *SVN = cast<ShuffleVectorSDNode>(Val&: Op); |
5621 | for (auto [Idx, Elt] : enumerate(First: SVN->getMask())) |
5622 | if (Elt < 0 && DemandedElts[Idx]) |
5623 | return true; |
5624 | return false; |
5625 | } |
5626 | |
5627 | default: |
5628 | // Allow the target to implement this method for its nodes. |
5629 | if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || |
5630 | Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) |
5631 | return TLI->canCreateUndefOrPoisonForTargetNode( |
5632 | Op, DemandedElts, DAG: *this, PoisonOnly, ConsiderFlags, Depth); |
5633 | break; |
5634 | } |
5635 | |
5636 | // Be conservative and return true. |
5637 | return true; |
5638 | } |
5639 | |
5640 | bool SelectionDAG::isADDLike(SDValue Op, bool NoWrap) const { |
5641 | unsigned Opcode = Op.getOpcode(); |
5642 | if (Opcode == ISD::OR) |
5643 | return Op->getFlags().hasDisjoint() || |
5644 | haveNoCommonBitsSet(A: Op.getOperand(i: 0), B: Op.getOperand(i: 1)); |
5645 | if (Opcode == ISD::XOR) |
5646 | return !NoWrap && isMinSignedConstant(V: Op.getOperand(i: 1)); |
5647 | return false; |
5648 | } |
5649 | |
5650 | bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { |
5651 | return Op.getNumOperands() == 2 && isa<ConstantSDNode>(Val: Op.getOperand(i: 1)) && |
5652 | (Op.isAnyAdd() || isADDLike(Op)); |
5653 | } |
5654 | |
5655 | bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, |
5656 | unsigned Depth) const { |
5657 | EVT VT = Op.getValueType(); |
5658 | |
5659 | // Since the number of lanes in a scalable vector is unknown at compile time, |
5660 | // we track one bit which is implicitly broadcast to all lanes. This means |
5661 | // that all lanes in a scalable vector are considered demanded. |
5662 | APInt DemandedElts = VT.isFixedLengthVector() |
5663 | ? APInt::getAllOnes(numBits: VT.getVectorNumElements()) |
5664 | : APInt(1, 1); |
5665 | |
5666 | return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth); |
5667 | } |
5668 | |
5669 | bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, |
5670 | bool SNaN, unsigned Depth) const { |
5671 | assert(!DemandedElts.isZero() && "No demanded elements"); |
5672 | |
5673 | // If we're told that NaNs won't happen, assume they won't. |
5674 | if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) |
5675 | return true; |
5676 | |
5677 | if (Depth >= MaxRecursionDepth) |
5678 | return false; // Limit search depth. |
5679 | |
5680 | // If the value is a constant, we can obviously see if it is a NaN or not. |
5681 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op)) { |
5682 | return !C->getValueAPF().isNaN() || |
5683 | (SNaN && !C->getValueAPF().isSignaling()); |
5684 | } |
5685 | |
5686 | unsigned Opcode = Op.getOpcode(); |
5687 | switch (Opcode) { |
5688 | case ISD::FADD: |
5689 | case ISD::FSUB: |
5690 | case ISD::FMUL: |
5691 | case ISD::FDIV: |
5692 | case ISD::FREM: |
5693 | case ISD::FSIN: |
5694 | case ISD::FCOS: |
5695 | case ISD::FTAN: |
5696 | case ISD::FASIN: |
5697 | case ISD::FACOS: |
5698 | case ISD::FATAN: |
5699 | case ISD::FATAN2: |
5700 | case ISD::FSINH: |
5701 | case ISD::FCOSH: |
5702 | case ISD::FTANH: |
5703 | case ISD::FMA: |
5704 | case ISD::FMAD: { |
5705 | if (SNaN) |
5706 | return true; |
5707 | // TODO: Need isKnownNeverInfinity |
5708 | return false; |
5709 | } |
5710 | case ISD::FCANONICALIZE: |
5711 | case ISD::FEXP: |
5712 | case ISD::FEXP2: |
5713 | case ISD::FEXP10: |
5714 | case ISD::FTRUNC: |
5715 | case ISD::FFLOOR: |
5716 | case ISD::FCEIL: |
5717 | case ISD::FROUND: |
5718 | case ISD::FROUNDEVEN: |
5719 | case ISD::LROUND: |
5720 | case ISD::LLROUND: |
5721 | case ISD::FRINT: |
5722 | case ISD::LRINT: |
5723 | case ISD::LLRINT: |
5724 | case ISD::FNEARBYINT: |
5725 | case ISD::FLDEXP: { |
5726 | if (SNaN) |
5727 | return true; |
5728 | return isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN, Depth: Depth + 1); |
5729 | } |
5730 | case ISD::FABS: |
5731 | case ISD::FNEG: |
5732 | case ISD::FCOPYSIGN: { |
5733 | return isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN, Depth: Depth + 1); |
5734 | } |
5735 | case ISD::SELECT: |
5736 | return isKnownNeverNaN(Op: Op.getOperand(i: 1), DemandedElts, SNaN, Depth: Depth + 1) && |
5737 | isKnownNeverNaN(Op: Op.getOperand(i: 2), DemandedElts, SNaN, Depth: Depth + 1); |
5738 | case ISD::FP_EXTEND: |
5739 | case ISD::FP_ROUND: { |
5740 | if (SNaN) |
5741 | return true; |
5742 | return isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN, Depth: Depth + 1); |
5743 | } |
5744 | case ISD::SINT_TO_FP: |
5745 | case ISD::UINT_TO_FP: |
5746 | return true; |
5747 | case ISD::FSQRT: // Need is known positive |
5748 | case ISD::FLOG: |
5749 | case ISD::FLOG2: |
5750 | case ISD::FLOG10: |
5751 | case ISD::FPOWI: |
5752 | case ISD::FPOW: { |
5753 | if (SNaN) |
5754 | return true; |
5755 | // TODO: Refine on operand |
5756 | return false; |
5757 | } |
5758 | case ISD::FMINNUM: |
5759 | case ISD::FMAXNUM: |
5760 | case ISD::FMINIMUMNUM: |
5761 | case ISD::FMAXIMUMNUM: { |
5762 | // Only one needs to be known not-nan, since it will be returned if the |
5763 | // other ends up being one. |
5764 | return isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN, Depth: Depth + 1) || |
5765 | isKnownNeverNaN(Op: Op.getOperand(i: 1), DemandedElts, SNaN, Depth: Depth + 1); |
5766 | } |
5767 | case ISD::FMINNUM_IEEE: |
5768 | case ISD::FMAXNUM_IEEE: { |
5769 | if (SNaN) |
5770 | return true; |
5771 | // This can return a NaN if either operand is an sNaN, or if both operands |
5772 | // are NaN. |
5773 | return (isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN: false, Depth: Depth + 1) && |
5774 | isKnownNeverSNaN(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1)) || |
5775 | (isKnownNeverNaN(Op: Op.getOperand(i: 1), DemandedElts, SNaN: false, Depth: Depth + 1) && |
5776 | isKnownNeverSNaN(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1)); |
5777 | } |
5778 | case ISD::FMINIMUM: |
5779 | case ISD::FMAXIMUM: { |
5780 | // TODO: Does this quiet or return the origina NaN as-is? |
5781 | return isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN, Depth: Depth + 1) && |
5782 | isKnownNeverNaN(Op: Op.getOperand(i: 1), DemandedElts, SNaN, Depth: Depth + 1); |
5783 | } |
5784 | case ISD::EXTRACT_VECTOR_ELT: { |
5785 | SDValue Src = Op.getOperand(i: 0); |
5786 | auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1)); |
5787 | EVT SrcVT = Src.getValueType(); |
5788 | if (SrcVT.isFixedLengthVector() && Idx && |
5789 | Idx->getAPIntValue().ult(RHS: SrcVT.getVectorNumElements())) { |
5790 | APInt DemandedSrcElts = APInt::getOneBitSet(numBits: SrcVT.getVectorNumElements(), |
5791 | BitNo: Idx->getZExtValue()); |
5792 | return isKnownNeverNaN(Op: Src, DemandedElts: DemandedSrcElts, SNaN, Depth: Depth + 1); |
5793 | } |
5794 | return isKnownNeverNaN(Op: Src, SNaN, Depth: Depth + 1); |
5795 | } |
5796 | case ISD::EXTRACT_SUBVECTOR: { |
5797 | SDValue Src = Op.getOperand(i: 0); |
5798 | if (Src.getValueType().isFixedLengthVector()) { |
5799 | unsigned Idx = Op.getConstantOperandVal(i: 1); |
5800 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
5801 | APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx); |
5802 | return isKnownNeverNaN(Op: Src, DemandedElts: DemandedSrcElts, SNaN, Depth: Depth + 1); |
5803 | } |
5804 | return isKnownNeverNaN(Op: Src, SNaN, Depth: Depth + 1); |
5805 | } |
5806 | case ISD::INSERT_SUBVECTOR: { |
5807 | SDValue BaseVector = Op.getOperand(i: 0); |
5808 | SDValue SubVector = Op.getOperand(i: 1); |
5809 | EVT BaseVectorVT = BaseVector.getValueType(); |
5810 | if (BaseVectorVT.isFixedLengthVector()) { |
5811 | unsigned Idx = Op.getConstantOperandVal(i: 2); |
5812 | unsigned NumBaseElts = BaseVectorVT.getVectorNumElements(); |
5813 | unsigned NumSubElts = SubVector.getValueType().getVectorNumElements(); |
5814 | |
5815 | // Clear/Extract the bits at the position where the subvector will be |
5816 | // inserted. |
5817 | APInt DemandedMask = |
5818 | APInt::getBitsSet(numBits: NumBaseElts, loBit: Idx, hiBit: Idx + NumSubElts); |
5819 | APInt DemandedSrcElts = DemandedElts & ~DemandedMask; |
5820 | APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx); |
5821 | |
5822 | bool NeverNaN = true; |
5823 | if (!DemandedSrcElts.isZero()) |
5824 | NeverNaN &= |
5825 | isKnownNeverNaN(Op: BaseVector, DemandedElts: DemandedSrcElts, SNaN, Depth: Depth + 1); |
5826 | if (NeverNaN && !DemandedSubElts.isZero()) |
5827 | NeverNaN &= |
5828 | isKnownNeverNaN(Op: SubVector, DemandedElts: DemandedSubElts, SNaN, Depth: Depth + 1); |
5829 | return NeverNaN; |
5830 | } |
5831 | return isKnownNeverNaN(Op: BaseVector, SNaN, Depth: Depth + 1) && |
5832 | isKnownNeverNaN(Op: SubVector, SNaN, Depth: Depth + 1); |
5833 | } |
5834 | case ISD::BUILD_VECTOR: { |
5835 | unsigned NumElts = Op.getNumOperands(); |
5836 | for (unsigned I = 0; I != NumElts; ++I) |
5837 | if (DemandedElts[I] && |
5838 | !isKnownNeverNaN(Op: Op.getOperand(i: I), SNaN, Depth: Depth + 1)) |
5839 | return false; |
5840 | return true; |
5841 | } |
5842 | case ISD::AssertNoFPClass: { |
5843 | FPClassTest NoFPClass = |
5844 | static_cast<FPClassTest>(Op.getConstantOperandVal(i: 1)); |
5845 | if ((NoFPClass & fcNan) == fcNan) |
5846 | return true; |
5847 | if (SNaN && (NoFPClass & fcSNan) == fcSNan) |
5848 | return true; |
5849 | return isKnownNeverNaN(Op: Op.getOperand(i: 0), DemandedElts, SNaN, Depth: Depth + 1); |
5850 | } |
5851 | default: |
5852 | if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || |
5853 | Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) { |
5854 | return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, DAG: *this, SNaN, |
5855 | Depth); |
5856 | } |
5857 | |
5858 | return false; |
5859 | } |
5860 | } |
5861 | |
5862 | bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { |
5863 | assert(Op.getValueType().isFloatingPoint() && |
5864 | "Floating point type expected"); |
5865 | |
5866 | // If the value is a constant, we can obviously see if it is a zero or not. |
5867 | return ISD::matchUnaryFpPredicate( |
5868 | Op, Match: [](ConstantFPSDNode *C) { return !C->isZero(); }); |
5869 | } |
5870 | |
5871 | bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { |
5872 | if (Depth >= MaxRecursionDepth) |
5873 | return false; // Limit search depth. |
5874 | |
5875 | assert(!Op.getValueType().isFloatingPoint() && |
5876 | "Floating point types unsupported - use isKnownNeverZeroFloat"); |
5877 | |
5878 | // If the value is a constant, we can obviously see if it is a zero or not. |
5879 | if (ISD::matchUnaryPredicate(Op, |
5880 | Match: [](ConstantSDNode *C) { return !C->isZero(); })) |
5881 | return true; |
5882 | |
5883 | // TODO: Recognize more cases here. Most of the cases are also incomplete to |
5884 | // some degree. |
5885 | switch (Op.getOpcode()) { |
5886 | default: |
5887 | break; |
5888 | |
5889 | case ISD::OR: |
5890 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) || |
5891 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5892 | |
5893 | case ISD::VSELECT: |
5894 | case ISD::SELECT: |
5895 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) && |
5896 | isKnownNeverZero(Op: Op.getOperand(i: 2), Depth: Depth + 1); |
5897 | |
5898 | case ISD::SHL: { |
5899 | if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) |
5900 | return isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5901 | KnownBits ValKnown = computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5902 | // 1 << X is never zero. |
5903 | if (ValKnown.One[0]) |
5904 | return true; |
5905 | // If max shift cnt of known ones is non-zero, result is non-zero. |
5906 | APInt MaxCnt = computeKnownBits(Op: Op.getOperand(i: 1), Depth: Depth + 1).getMaxValue(); |
5907 | if (MaxCnt.ult(RHS: ValKnown.getBitWidth()) && |
5908 | !ValKnown.One.shl(ShiftAmt: MaxCnt).isZero()) |
5909 | return true; |
5910 | break; |
5911 | } |
5912 | case ISD::UADDSAT: |
5913 | case ISD::UMAX: |
5914 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) || |
5915 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5916 | |
5917 | // For smin/smax: If either operand is known negative/positive |
5918 | // respectively we don't need the other to be known at all. |
5919 | case ISD::SMAX: { |
5920 | KnownBits Op1 = computeKnownBits(Op: Op.getOperand(i: 1), Depth: Depth + 1); |
5921 | if (Op1.isStrictlyPositive()) |
5922 | return true; |
5923 | |
5924 | KnownBits Op0 = computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5925 | if (Op0.isStrictlyPositive()) |
5926 | return true; |
5927 | |
5928 | if (Op1.isNonZero() && Op0.isNonZero()) |
5929 | return true; |
5930 | |
5931 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) && |
5932 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5933 | } |
5934 | case ISD::SMIN: { |
5935 | KnownBits Op1 = computeKnownBits(Op: Op.getOperand(i: 1), Depth: Depth + 1); |
5936 | if (Op1.isNegative()) |
5937 | return true; |
5938 | |
5939 | KnownBits Op0 = computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5940 | if (Op0.isNegative()) |
5941 | return true; |
5942 | |
5943 | if (Op1.isNonZero() && Op0.isNonZero()) |
5944 | return true; |
5945 | |
5946 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) && |
5947 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5948 | } |
5949 | case ISD::UMIN: |
5950 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) && |
5951 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5952 | |
5953 | case ISD::ROTL: |
5954 | case ISD::ROTR: |
5955 | case ISD::BITREVERSE: |
5956 | case ISD::BSWAP: |
5957 | case ISD::CTPOP: |
5958 | case ISD::ABS: |
5959 | return isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5960 | |
5961 | case ISD::SRA: |
5962 | case ISD::SRL: { |
5963 | if (Op->getFlags().hasExact()) |
5964 | return isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5965 | KnownBits ValKnown = computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5966 | if (ValKnown.isNegative()) |
5967 | return true; |
5968 | // If max shift cnt of known ones is non-zero, result is non-zero. |
5969 | APInt MaxCnt = computeKnownBits(Op: Op.getOperand(i: 1), Depth: Depth + 1).getMaxValue(); |
5970 | if (MaxCnt.ult(RHS: ValKnown.getBitWidth()) && |
5971 | !ValKnown.One.lshr(ShiftAmt: MaxCnt).isZero()) |
5972 | return true; |
5973 | break; |
5974 | } |
5975 | case ISD::UDIV: |
5976 | case ISD::SDIV: |
5977 | // div exact can only produce a zero if the dividend is zero. |
5978 | // TODO: For udiv this is also true if Op1 u<= Op0 |
5979 | if (Op->getFlags().hasExact()) |
5980 | return isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
5981 | break; |
5982 | |
5983 | case ISD::ADD: |
5984 | if (Op->getFlags().hasNoUnsignedWrap()) |
5985 | if (isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) || |
5986 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1)) |
5987 | return true; |
5988 | // TODO: There are a lot more cases we can prove for add. |
5989 | break; |
5990 | |
5991 | case ISD::SUB: { |
5992 | if (isNullConstant(V: Op.getOperand(i: 0))) |
5993 | return isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1); |
5994 | |
5995 | std::optional<bool> ne = |
5996 | KnownBits::ne(LHS: computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1), |
5997 | RHS: computeKnownBits(Op: Op.getOperand(i: 1), Depth: Depth + 1)); |
5998 | return ne && *ne; |
5999 | } |
6000 | |
6001 | case ISD::MUL: |
6002 | if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) |
6003 | if (isKnownNeverZero(Op: Op.getOperand(i: 1), Depth: Depth + 1) && |
6004 | isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1)) |
6005 | return true; |
6006 | break; |
6007 | |
6008 | case ISD::ZERO_EXTEND: |
6009 | case ISD::SIGN_EXTEND: |
6010 | return isKnownNeverZero(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
6011 | case ISD::VSCALE: { |
6012 | const Function &F = getMachineFunction().getFunction(); |
6013 | const APInt &Multiplier = Op.getConstantOperandAPInt(i: 0); |
6014 | ConstantRange CR = |
6015 | getVScaleRange(F: &F, BitWidth: Op.getScalarValueSizeInBits()).multiply(Other: Multiplier); |
6016 | if (!CR.contains(Val: APInt(CR.getBitWidth(), 0))) |
6017 | return true; |
6018 | break; |
6019 | } |
6020 | } |
6021 | |
6022 | return computeKnownBits(Op, Depth).isNonZero(); |
6023 | } |
6024 | |
6025 | bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const { |
6026 | if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(N: Op, AllowUndefs: true)) |
6027 | return !C1->isNegative(); |
6028 | |
6029 | return Op.getOpcode() == ISD::FABS; |
6030 | } |
6031 | |
6032 | bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { |
6033 | // Check the obvious case. |
6034 | if (A == B) return true; |
6035 | |
6036 | // For negative and positive zero. |
6037 | if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(Val&: A)) |
6038 | if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(Val&: B)) |
6039 | if (CA->isZero() && CB->isZero()) return true; |
6040 | |
6041 | // Otherwise they may not be equal. |
6042 | return false; |
6043 | } |
6044 | |
6045 | // Only bits set in Mask must be negated, other bits may be arbitrary. |
6046 | SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) { |
6047 | if (isBitwiseNot(V, AllowUndefs)) |
6048 | return V.getOperand(i: 0); |
6049 | |
6050 | // Handle any_extend (not (truncate X)) pattern, where Mask only sets |
6051 | // bits in the non-extended part. |
6052 | ConstantSDNode *MaskC = isConstOrConstSplat(N: Mask); |
6053 | if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND) |
6054 | return SDValue(); |
6055 | SDValue ExtArg = V.getOperand(i: 0); |
6056 | if (ExtArg.getScalarValueSizeInBits() >= |
6057 | MaskC->getAPIntValue().getActiveBits() && |
6058 | isBitwiseNot(V: ExtArg, AllowUndefs) && |
6059 | ExtArg.getOperand(i: 0).getOpcode() == ISD::TRUNCATE && |
6060 | ExtArg.getOperand(i: 0).getOperand(i: 0).getValueType() == V.getValueType()) |
6061 | return ExtArg.getOperand(i: 0).getOperand(i: 0); |
6062 | return SDValue(); |
6063 | } |
6064 | |
6065 | static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) { |
6066 | // Match masked merge pattern (X & ~M) op (Y & M) |
6067 | // Including degenerate case (X & ~M) op M |
6068 | auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask, |
6069 | SDValue Other) { |
6070 | if (SDValue NotOperand = |
6071 | getBitwiseNotOperand(V: Not, Mask, /* AllowUndefs */ true)) { |
6072 | if (NotOperand->getOpcode() == ISD::ZERO_EXTEND || |
6073 | NotOperand->getOpcode() == ISD::TRUNCATE) |
6074 | NotOperand = NotOperand->getOperand(Num: 0); |
6075 | |
6076 | if (Other == NotOperand) |
6077 | return true; |
6078 | if (Other->getOpcode() == ISD::AND) |
6079 | return NotOperand == Other->getOperand(Num: 0) || |
6080 | NotOperand == Other->getOperand(Num: 1); |
6081 | } |
6082 | return false; |
6083 | }; |
6084 | |
6085 | if (A->getOpcode() == ISD::ZERO_EXTEND || A->getOpcode() == ISD::TRUNCATE) |
6086 | A = A->getOperand(Num: 0); |
6087 | |
6088 | if (B->getOpcode() == ISD::ZERO_EXTEND || B->getOpcode() == ISD::TRUNCATE) |
6089 | B = B->getOperand(Num: 0); |
6090 | |
6091 | if (A->getOpcode() == ISD::AND) |
6092 | return MatchNoCommonBitsPattern(A->getOperand(Num: 0), A->getOperand(Num: 1), B) || |
6093 | MatchNoCommonBitsPattern(A->getOperand(Num: 1), A->getOperand(Num: 0), B); |
6094 | return false; |
6095 | } |
6096 | |
6097 | // FIXME: unify with llvm::haveNoCommonBitsSet. |
6098 | bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { |
6099 | assert(A.getValueType() == B.getValueType() && |
6100 | "Values must have the same type"); |
6101 | if (haveNoCommonBitsSetCommutative(A, B) || |
6102 | haveNoCommonBitsSetCommutative(A: B, B: A)) |
6103 | return true; |
6104 | return KnownBits::haveNoCommonBitsSet(LHS: computeKnownBits(Op: A), |
6105 | RHS: computeKnownBits(Op: B)); |
6106 | } |
6107 | |
6108 | static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, |
6109 | SelectionDAG &DAG) { |
6110 | if (cast<ConstantSDNode>(Val&: Step)->isZero()) |
6111 | return DAG.getConstant(Val: 0, DL, VT); |
6112 | |
6113 | return SDValue(); |
6114 | } |
6115 | |
6116 | static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, |
6117 | ArrayRef<SDValue> Ops, |
6118 | SelectionDAG &DAG) { |
6119 | int NumOps = Ops.size(); |
6120 | assert(NumOps != 0 && "Can't build an empty vector!"); |
6121 | assert(!VT.isScalableVector() && |
6122 | "BUILD_VECTOR cannot be used with scalable types"); |
6123 | assert(VT.getVectorNumElements() == (unsigned)NumOps && |
6124 | "Incorrect element count in BUILD_VECTOR!"); |
6125 | |
6126 | // BUILD_VECTOR of UNDEFs is UNDEF. |
6127 | if (llvm::all_of(Range&: Ops, P: [](SDValue Op) { return Op.isUndef(); })) |
6128 | return DAG.getUNDEF(VT); |
6129 | |
6130 | // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. |
6131 | SDValue IdentitySrc; |
6132 | bool IsIdentity = true; |
6133 | for (int i = 0; i != NumOps; ++i) { |
6134 | if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
6135 | Ops[i].getOperand(i: 0).getValueType() != VT || |
6136 | (IdentitySrc && Ops[i].getOperand(i: 0) != IdentitySrc) || |
6137 | !isa<ConstantSDNode>(Val: Ops[i].getOperand(i: 1)) || |
6138 | Ops[i].getConstantOperandAPInt(i: 1) != i) { |
6139 | IsIdentity = false; |
6140 | break; |
6141 | } |
6142 | IdentitySrc = Ops[i].getOperand(i: 0); |
6143 | } |
6144 | if (IsIdentity) |
6145 | return IdentitySrc; |
6146 | |
6147 | return SDValue(); |
6148 | } |
6149 | |
6150 | /// Try to simplify vector concatenation to an input value, undef, or build |
6151 | /// vector. |
6152 | static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, |
6153 | ArrayRef<SDValue> Ops, |
6154 | SelectionDAG &DAG) { |
6155 | assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); |
6156 | assert(llvm::all_of(Ops, |
6157 | [Ops](SDValue Op) { |
6158 | return Ops[0].getValueType() == Op.getValueType(); |
6159 | }) && |
6160 | "Concatenation of vectors with inconsistent value types!"); |
6161 | assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) == |
6162 | VT.getVectorElementCount() && |
6163 | "Incorrect element count in vector concatenation!"); |
6164 | |
6165 | if (Ops.size() == 1) |
6166 | return Ops[0]; |
6167 | |
6168 | // Concat of UNDEFs is UNDEF. |
6169 | if (llvm::all_of(Range&: Ops, P: [](SDValue Op) { return Op.isUndef(); })) |
6170 | return DAG.getUNDEF(VT); |
6171 | |
6172 | // Scan the operands and look for extract operations from a single source |
6173 | // that correspond to insertion at the same location via this concatenation: |
6174 | // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... |
6175 | SDValue IdentitySrc; |
6176 | bool IsIdentity = true; |
6177 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) { |
6178 | SDValue Op = Ops[i]; |
6179 | unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements(); |
6180 | if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
6181 | Op.getOperand(i: 0).getValueType() != VT || |
6182 | (IdentitySrc && Op.getOperand(i: 0) != IdentitySrc) || |
6183 | Op.getConstantOperandVal(i: 1) != IdentityIndex) { |
6184 | IsIdentity = false; |
6185 | break; |
6186 | } |
6187 | assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && |
6188 | "Unexpected identity source vector for concat of extracts"); |
6189 | IdentitySrc = Op.getOperand(i: 0); |
6190 | } |
6191 | if (IsIdentity) { |
6192 | assert(IdentitySrc && "Failed to set source vector of extracts"); |
6193 | return IdentitySrc; |
6194 | } |
6195 | |
6196 | // The code below this point is only designed to work for fixed width |
6197 | // vectors, so we bail out for now. |
6198 | if (VT.isScalableVector()) |
6199 | return SDValue(); |
6200 | |
6201 | // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be |
6202 | // simplified to one big BUILD_VECTOR. |
6203 | // FIXME: Add support for SCALAR_TO_VECTOR as well. |
6204 | EVT SVT = VT.getScalarType(); |
6205 | SmallVector<SDValue, 16> Elts; |
6206 | for (SDValue Op : Ops) { |
6207 | EVT OpVT = Op.getValueType(); |
6208 | if (Op.isUndef()) |
6209 | Elts.append(NumInputs: OpVT.getVectorNumElements(), Elt: DAG.getUNDEF(VT: SVT)); |
6210 | else if (Op.getOpcode() == ISD::BUILD_VECTOR) |
6211 | Elts.append(in_start: Op->op_begin(), in_end: Op->op_end()); |
6212 | else |
6213 | return SDValue(); |
6214 | } |
6215 | |
6216 | // BUILD_VECTOR requires all inputs to be of the same type, find the |
6217 | // maximum type and extend them all. |
6218 | for (SDValue Op : Elts) |
6219 | SVT = (SVT.bitsLT(VT: Op.getValueType()) ? Op.getValueType() : SVT); |
6220 | |
6221 | if (SVT.bitsGT(VT: VT.getScalarType())) { |
6222 | for (SDValue &Op : Elts) { |
6223 | if (Op.isUndef()) |
6224 | Op = DAG.getUNDEF(VT: SVT); |
6225 | else |
6226 | Op = DAG.getTargetLoweringInfo().isZExtFree(FromTy: Op.getValueType(), ToTy: SVT) |
6227 | ? DAG.getZExtOrTrunc(Op, DL, VT: SVT) |
6228 | : DAG.getSExtOrTrunc(Op, DL, VT: SVT); |
6229 | } |
6230 | } |
6231 | |
6232 | SDValue V = DAG.getBuildVector(VT, DL, Ops: Elts); |
6233 | NewSDValueDbgMsg(V, Msg: "New node fold concat vectors: ", G: &DAG); |
6234 | return V; |
6235 | } |
6236 | |
6237 | /// Gets or creates the specified node. |
6238 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { |
6239 | SDVTList VTs = getVTList(VT); |
6240 | FoldingSetNodeID ID; |
6241 | AddNodeIDNode(ID, OpC: Opcode, VTList: VTs, OpList: {}); |
6242 | void *IP = nullptr; |
6243 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) |
6244 | return SDValue(E, 0); |
6245 | |
6246 | auto *N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
6247 | CSEMap.InsertNode(N, InsertPos: IP); |
6248 | |
6249 | InsertNode(N); |
6250 | SDValue V = SDValue(N, 0); |
6251 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
6252 | return V; |
6253 | } |
6254 | |
6255 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
6256 | SDValue N1) { |
6257 | SDNodeFlags Flags; |
6258 | if (Inserter) |
6259 | Flags = Inserter->getFlags(); |
6260 | return getNode(Opcode, DL, VT, Operand: N1, Flags); |
6261 | } |
6262 | |
6263 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
6264 | SDValue N1, const SDNodeFlags Flags) { |
6265 | assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); |
6266 | |
6267 | // Constant fold unary operations with a vector integer or float operand. |
6268 | switch (Opcode) { |
6269 | default: |
6270 | // FIXME: Entirely reasonable to perform folding of other unary |
6271 | // operations here as the need arises. |
6272 | break; |
6273 | case ISD::FNEG: |
6274 | case ISD::FABS: |
6275 | case ISD::FCEIL: |
6276 | case ISD::FTRUNC: |
6277 | case ISD::FFLOOR: |
6278 | case ISD::FP_EXTEND: |
6279 | case ISD::FP_TO_SINT: |
6280 | case ISD::FP_TO_UINT: |
6281 | case ISD::FP_TO_FP16: |
6282 | case ISD::FP_TO_BF16: |
6283 | case ISD::TRUNCATE: |
6284 | case ISD::ANY_EXTEND: |
6285 | case ISD::ZERO_EXTEND: |
6286 | case ISD::SIGN_EXTEND: |
6287 | case ISD::UINT_TO_FP: |
6288 | case ISD::SINT_TO_FP: |
6289 | case ISD::FP16_TO_FP: |
6290 | case ISD::BF16_TO_FP: |
6291 | case ISD::BITCAST: |
6292 | case ISD::ABS: |
6293 | case ISD::BITREVERSE: |
6294 | case ISD::BSWAP: |
6295 | case ISD::CTLZ: |
6296 | case ISD::CTLZ_ZERO_UNDEF: |
6297 | case ISD::CTTZ: |
6298 | case ISD::CTTZ_ZERO_UNDEF: |
6299 | case ISD::CTPOP: |
6300 | case ISD::STEP_VECTOR: { |
6301 | SDValue Ops = {N1}; |
6302 | if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) |
6303 | return Fold; |
6304 | } |
6305 | } |
6306 | |
6307 | unsigned OpOpcode = N1.getNode()->getOpcode(); |
6308 | switch (Opcode) { |
6309 | case ISD::STEP_VECTOR: |
6310 | assert(VT.isScalableVector() && |
6311 | "STEP_VECTOR can only be used with scalable types"); |
6312 | assert(OpOpcode == ISD::TargetConstant && |
6313 | VT.getVectorElementType() == N1.getValueType() && |
6314 | "Unexpected step operand"); |
6315 | break; |
6316 | case ISD::FREEZE: |
6317 | assert(VT == N1.getValueType() && "Unexpected VT!"); |
6318 | if (isGuaranteedNotToBeUndefOrPoison(Op: N1, /*PoisonOnly*/ false, |
6319 | /*Depth*/ 1)) |
6320 | return N1; |
6321 | break; |
6322 | case ISD::TokenFactor: |
6323 | case ISD::MERGE_VALUES: |
6324 | case ISD::CONCAT_VECTORS: |
6325 | return N1; // Factor, merge or concat of one node? No need. |
6326 | case ISD::BUILD_VECTOR: { |
6327 | // Attempt to simplify BUILD_VECTOR. |
6328 | SDValue Ops[] = {N1}; |
6329 | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, DAG&: *this)) |
6330 | return V; |
6331 | break; |
6332 | } |
6333 | case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); |
6334 | case ISD::FP_EXTEND: |
6335 | assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && |
6336 | "Invalid FP cast!"); |
6337 | if (N1.getValueType() == VT) return N1; // noop conversion. |
6338 | assert((!VT.isVector() || VT.getVectorElementCount() == |
6339 | N1.getValueType().getVectorElementCount()) && |
6340 | "Vector element count mismatch!"); |
6341 | assert(N1.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); |
6342 | if (N1.isUndef()) |
6343 | return getUNDEF(VT); |
6344 | break; |
6345 | case ISD::FP_TO_SINT: |
6346 | case ISD::FP_TO_UINT: |
6347 | if (N1.isUndef()) |
6348 | return getUNDEF(VT); |
6349 | break; |
6350 | case ISD::SINT_TO_FP: |
6351 | case ISD::UINT_TO_FP: |
6352 | // [us]itofp(undef) = 0, because the result value is bounded. |
6353 | if (N1.isUndef()) |
6354 | return getConstantFP(Val: 0.0, DL, VT); |
6355 | break; |
6356 | case ISD::SIGN_EXTEND: |
6357 | assert(VT.isInteger() && N1.getValueType().isInteger() && |
6358 | "Invalid SIGN_EXTEND!"); |
6359 | assert(VT.isVector() == N1.getValueType().isVector() && |
6360 | "SIGN_EXTEND result type type should be vector iff the operand " |
6361 | "type is vector!"); |
6362 | if (N1.getValueType() == VT) return N1; // noop extension |
6363 | assert((!VT.isVector() || VT.getVectorElementCount() == |
6364 | N1.getValueType().getVectorElementCount()) && |
6365 | "Vector element count mismatch!"); |
6366 | assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); |
6367 | if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) { |
6368 | SDNodeFlags Flags; |
6369 | if (OpOpcode == ISD::ZERO_EXTEND) |
6370 | Flags.setNonNeg(N1->getFlags().hasNonNeg()); |
6371 | SDValue NewVal = getNode(Opcode: OpOpcode, DL, VT, N1: N1.getOperand(i: 0), Flags); |
6372 | transferDbgValues(From: N1, To: NewVal); |
6373 | return NewVal; |
6374 | } |
6375 | |
6376 | if (OpOpcode == ISD::POISON) |
6377 | return getPOISON(VT); |
6378 | |
6379 | if (N1.isUndef()) |
6380 | // sext(undef) = 0, because the top bits will all be the same. |
6381 | return getConstant(Val: 0, DL, VT); |
6382 | break; |
6383 | case ISD::ZERO_EXTEND: |
6384 | assert(VT.isInteger() && N1.getValueType().isInteger() && |
6385 | "Invalid ZERO_EXTEND!"); |
6386 | assert(VT.isVector() == N1.getValueType().isVector() && |
6387 | "ZERO_EXTEND result type type should be vector iff the operand " |
6388 | "type is vector!"); |
6389 | if (N1.getValueType() == VT) return N1; // noop extension |
6390 | assert((!VT.isVector() || VT.getVectorElementCount() == |
6391 | N1.getValueType().getVectorElementCount()) && |
6392 | "Vector element count mismatch!"); |
6393 | assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); |
6394 | if (OpOpcode == ISD::ZERO_EXTEND) { // (zext (zext x)) -> (zext x) |
6395 | SDNodeFlags Flags; |
6396 | Flags.setNonNeg(N1->getFlags().hasNonNeg()); |
6397 | SDValue NewVal = |
6398 | getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, N1: N1.getOperand(i: 0), Flags); |
6399 | transferDbgValues(From: N1, To: NewVal); |
6400 | return NewVal; |
6401 | } |
6402 | |
6403 | if (OpOpcode == ISD::POISON) |
6404 | return getPOISON(VT); |
6405 | |
6406 | if (N1.isUndef()) |
6407 | // zext(undef) = 0, because the top bits will be zero. |
6408 | return getConstant(Val: 0, DL, VT); |
6409 | |
6410 | // Skip unnecessary zext_inreg pattern: |
6411 | // (zext (trunc x)) -> x iff the upper bits are known zero. |
6412 | // TODO: Remove (zext (trunc (and x, c))) exception which some targets |
6413 | // use to recognise zext_inreg patterns. |
6414 | if (OpOpcode == ISD::TRUNCATE) { |
6415 | SDValue OpOp = N1.getOperand(i: 0); |
6416 | if (OpOp.getValueType() == VT) { |
6417 | if (OpOp.getOpcode() != ISD::AND) { |
6418 | APInt HiBits = APInt::getBitsSetFrom(numBits: VT.getScalarSizeInBits(), |
6419 | loBit: N1.getScalarValueSizeInBits()); |
6420 | if (MaskedValueIsZero(V: OpOp, Mask: HiBits)) { |
6421 | transferDbgValues(From: N1, To: OpOp); |
6422 | return OpOp; |
6423 | } |
6424 | } |
6425 | } |
6426 | } |
6427 | break; |
6428 | case ISD::ANY_EXTEND: |
6429 | assert(VT.isInteger() && N1.getValueType().isInteger() && |
6430 | "Invalid ANY_EXTEND!"); |
6431 | assert(VT.isVector() == N1.getValueType().isVector() && |
6432 | "ANY_EXTEND result type type should be vector iff the operand " |
6433 | "type is vector!"); |
6434 | if (N1.getValueType() == VT) return N1; // noop extension |
6435 | assert((!VT.isVector() || VT.getVectorElementCount() == |
6436 | N1.getValueType().getVectorElementCount()) && |
6437 | "Vector element count mismatch!"); |
6438 | assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); |
6439 | |
6440 | if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || |
6441 | OpOpcode == ISD::ANY_EXTEND) { |
6442 | SDNodeFlags Flags; |
6443 | if (OpOpcode == ISD::ZERO_EXTEND) |
6444 | Flags.setNonNeg(N1->getFlags().hasNonNeg()); |
6445 | // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) |
6446 | return getNode(Opcode: OpOpcode, DL, VT, N1: N1.getOperand(i: 0), Flags); |
6447 | } |
6448 | if (N1.isUndef()) |
6449 | return getUNDEF(VT); |
6450 | |
6451 | // (ext (trunc x)) -> x |
6452 | if (OpOpcode == ISD::TRUNCATE) { |
6453 | SDValue OpOp = N1.getOperand(i: 0); |
6454 | if (OpOp.getValueType() == VT) { |
6455 | transferDbgValues(From: N1, To: OpOp); |
6456 | return OpOp; |
6457 | } |
6458 | } |
6459 | break; |
6460 | case ISD::TRUNCATE: |
6461 | assert(VT.isInteger() && N1.getValueType().isInteger() && |
6462 | "Invalid TRUNCATE!"); |
6463 | assert(VT.isVector() == N1.getValueType().isVector() && |
6464 | "TRUNCATE result type type should be vector iff the operand " |
6465 | "type is vector!"); |
6466 | if (N1.getValueType() == VT) return N1; // noop truncate |
6467 | assert((!VT.isVector() || VT.getVectorElementCount() == |
6468 | N1.getValueType().getVectorElementCount()) && |
6469 | "Vector element count mismatch!"); |
6470 | assert(N1.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); |
6471 | if (OpOpcode == ISD::TRUNCATE) |
6472 | return getNode(Opcode: ISD::TRUNCATE, DL, VT, N1: N1.getOperand(i: 0)); |
6473 | if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || |
6474 | OpOpcode == ISD::ANY_EXTEND) { |
6475 | // If the source is smaller than the dest, we still need an extend. |
6476 | if (N1.getOperand(i: 0).getValueType().getScalarType().bitsLT( |
6477 | VT: VT.getScalarType())) |
6478 | return getNode(Opcode: OpOpcode, DL, VT, N1: N1.getOperand(i: 0)); |
6479 | if (N1.getOperand(i: 0).getValueType().bitsGT(VT)) |
6480 | return getNode(Opcode: ISD::TRUNCATE, DL, VT, N1: N1.getOperand(i: 0)); |
6481 | return N1.getOperand(i: 0); |
6482 | } |
6483 | if (N1.isUndef()) |
6484 | return getUNDEF(VT); |
6485 | if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) |
6486 | return getVScale(DL, VT, |
6487 | MulImm: N1.getConstantOperandAPInt(i: 0).trunc(width: VT.getSizeInBits())); |
6488 | break; |
6489 | case ISD::ANY_EXTEND_VECTOR_INREG: |
6490 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
6491 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
6492 | assert(VT.isVector() && "This DAG node is restricted to vector types."); |
6493 | assert(N1.getValueType().bitsLE(VT) && |
6494 | "The input must be the same size or smaller than the result."); |
6495 | assert(VT.getVectorMinNumElements() < |
6496 | N1.getValueType().getVectorMinNumElements() && |
6497 | "The destination vector type must have fewer lanes than the input."); |
6498 | break; |
6499 | case ISD::ABS: |
6500 | assert(VT.isInteger() && VT == N1.getValueType() && "Invalid ABS!"); |
6501 | if (N1.isUndef()) |
6502 | return getConstant(Val: 0, DL, VT); |
6503 | break; |
6504 | case ISD::BSWAP: |
6505 | assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BSWAP!"); |
6506 | assert((VT.getScalarSizeInBits() % 16 == 0) && |
6507 | "BSWAP types must be a multiple of 16 bits!"); |
6508 | if (N1.isUndef()) |
6509 | return getUNDEF(VT); |
6510 | // bswap(bswap(X)) -> X. |
6511 | if (OpOpcode == ISD::BSWAP) |
6512 | return N1.getOperand(i: 0); |
6513 | break; |
6514 | case ISD::BITREVERSE: |
6515 | assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BITREVERSE!"); |
6516 | if (N1.isUndef()) |
6517 | return getUNDEF(VT); |
6518 | break; |
6519 | case ISD::BITCAST: |
6520 | assert(VT.getSizeInBits() == N1.getValueSizeInBits() && |
6521 | "Cannot BITCAST between types of different sizes!"); |
6522 | if (VT == N1.getValueType()) return N1; // noop conversion. |
6523 | if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) |
6524 | return getNode(Opcode: ISD::BITCAST, DL, VT, N1: N1.getOperand(i: 0)); |
6525 | if (N1.isUndef()) |
6526 | return getUNDEF(VT); |
6527 | break; |
6528 | case ISD::SCALAR_TO_VECTOR: |
6529 | assert(VT.isVector() && !N1.getValueType().isVector() && |
6530 | (VT.getVectorElementType() == N1.getValueType() || |
6531 | (VT.getVectorElementType().isInteger() && |
6532 | N1.getValueType().isInteger() && |
6533 | VT.getVectorElementType().bitsLE(N1.getValueType()))) && |
6534 | "Illegal SCALAR_TO_VECTOR node!"); |
6535 | if (N1.isUndef()) |
6536 | return getUNDEF(VT); |
6537 | // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. |
6538 | if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && |
6539 | isa<ConstantSDNode>(Val: N1.getOperand(i: 1)) && |
6540 | N1.getConstantOperandVal(i: 1) == 0 && |
6541 | N1.getOperand(i: 0).getValueType() == VT) |
6542 | return N1.getOperand(i: 0); |
6543 | break; |
6544 | case ISD::FNEG: |
6545 | // Negation of an unknown bag of bits is still completely undefined. |
6546 | if (N1.isUndef()) |
6547 | return getUNDEF(VT); |
6548 | |
6549 | if (OpOpcode == ISD::FNEG) // --X -> X |
6550 | return N1.getOperand(i: 0); |
6551 | break; |
6552 | case ISD::FABS: |
6553 | if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) |
6554 | return getNode(Opcode: ISD::FABS, DL, VT, N1: N1.getOperand(i: 0)); |
6555 | break; |
6556 | case ISD::VSCALE: |
6557 | assert(VT == N1.getValueType() && "Unexpected VT!"); |
6558 | break; |
6559 | case ISD::CTPOP: |
6560 | if (N1.getValueType().getScalarType() == MVT::i1) |
6561 | return N1; |
6562 | break; |
6563 | case ISD::CTLZ: |
6564 | case ISD::CTTZ: |
6565 | if (N1.getValueType().getScalarType() == MVT::i1) |
6566 | return getNOT(DL, Val: N1, VT: N1.getValueType()); |
6567 | break; |
6568 | case ISD::VECREDUCE_ADD: |
6569 | if (N1.getValueType().getScalarType() == MVT::i1) |
6570 | return getNode(Opcode: ISD::VECREDUCE_XOR, DL, VT, N1); |
6571 | break; |
6572 | case ISD::VECREDUCE_SMIN: |
6573 | case ISD::VECREDUCE_UMAX: |
6574 | if (N1.getValueType().getScalarType() == MVT::i1) |
6575 | return getNode(Opcode: ISD::VECREDUCE_OR, DL, VT, N1); |
6576 | break; |
6577 | case ISD::VECREDUCE_SMAX: |
6578 | case ISD::VECREDUCE_UMIN: |
6579 | if (N1.getValueType().getScalarType() == MVT::i1) |
6580 | return getNode(Opcode: ISD::VECREDUCE_AND, DL, VT, N1); |
6581 | break; |
6582 | case ISD::SPLAT_VECTOR: |
6583 | assert(VT.isVector() && "Wrong return type!"); |
6584 | // FIXME: Hexagon uses i32 scalar for a floating point zero vector so allow |
6585 | // that for now. |
6586 | assert((VT.getVectorElementType() == N1.getValueType() || |
6587 | (VT.isFloatingPoint() && N1.getValueType() == MVT::i32) || |
6588 | (VT.getVectorElementType().isInteger() && |
6589 | N1.getValueType().isInteger() && |
6590 | VT.getVectorElementType().bitsLE(N1.getValueType()))) && |
6591 | "Wrong operand type!"); |
6592 | break; |
6593 | } |
6594 | |
6595 | SDNode *N; |
6596 | SDVTList VTs = getVTList(VT); |
6597 | SDValue Ops[] = {N1}; |
6598 | if (VT != MVT::Glue) { // Don't CSE glue producing nodes |
6599 | FoldingSetNodeID ID; |
6600 | AddNodeIDNode(ID, OpC: Opcode, VTList: VTs, OpList: Ops); |
6601 | void *IP = nullptr; |
6602 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
6603 | E->intersectFlagsWith(Flags); |
6604 | return SDValue(E, 0); |
6605 | } |
6606 | |
6607 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
6608 | N->setFlags(Flags); |
6609 | createOperands(Node: N, Vals: Ops); |
6610 | CSEMap.InsertNode(N, InsertPos: IP); |
6611 | } else { |
6612 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
6613 | createOperands(Node: N, Vals: Ops); |
6614 | } |
6615 | |
6616 | InsertNode(N); |
6617 | SDValue V = SDValue(N, 0); |
6618 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
6619 | return V; |
6620 | } |
6621 | |
6622 | static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, |
6623 | const APInt &C2) { |
6624 | switch (Opcode) { |
6625 | case ISD::ADD: return C1 + C2; |
6626 | case ISD::SUB: return C1 - C2; |
6627 | case ISD::MUL: return C1 * C2; |
6628 | case ISD::AND: return C1 & C2; |
6629 | case ISD::OR: return C1 | C2; |
6630 | case ISD::XOR: return C1 ^ C2; |
6631 | case ISD::SHL: return C1 << C2; |
6632 | case ISD::SRL: return C1.lshr(ShiftAmt: C2); |
6633 | case ISD::SRA: return C1.ashr(ShiftAmt: C2); |
6634 | case ISD::ROTL: return C1.rotl(rotateAmt: C2); |
6635 | case ISD::ROTR: return C1.rotr(rotateAmt: C2); |
6636 | case ISD::SMIN: return C1.sle(RHS: C2) ? C1 : C2; |
6637 | case ISD::SMAX: return C1.sge(RHS: C2) ? C1 : C2; |
6638 | case ISD::UMIN: return C1.ule(RHS: C2) ? C1 : C2; |
6639 | case ISD::UMAX: return C1.uge(RHS: C2) ? C1 : C2; |
6640 | case ISD::SADDSAT: return C1.sadd_sat(RHS: C2); |
6641 | case ISD::UADDSAT: return C1.uadd_sat(RHS: C2); |
6642 | case ISD::SSUBSAT: return C1.ssub_sat(RHS: C2); |
6643 | case ISD::USUBSAT: return C1.usub_sat(RHS: C2); |
6644 | case ISD::SSHLSAT: return C1.sshl_sat(RHS: C2); |
6645 | case ISD::USHLSAT: return C1.ushl_sat(RHS: C2); |
6646 | case ISD::UDIV: |
6647 | if (!C2.getBoolValue()) |
6648 | break; |
6649 | return C1.udiv(RHS: C2); |
6650 | case ISD::UREM: |
6651 | if (!C2.getBoolValue()) |
6652 | break; |
6653 | return C1.urem(RHS: C2); |
6654 | case ISD::SDIV: |
6655 | if (!C2.getBoolValue()) |
6656 | break; |
6657 | return C1.sdiv(RHS: C2); |
6658 | case ISD::SREM: |
6659 | if (!C2.getBoolValue()) |
6660 | break; |
6661 | return C1.srem(RHS: C2); |
6662 | case ISD::AVGFLOORS: |
6663 | return APIntOps::avgFloorS(C1, C2); |
6664 | case ISD::AVGFLOORU: |
6665 | return APIntOps::avgFloorU(C1, C2); |
6666 | case ISD::AVGCEILS: |
6667 | return APIntOps::avgCeilS(C1, C2); |
6668 | case ISD::AVGCEILU: |
6669 | return APIntOps::avgCeilU(C1, C2); |
6670 | case ISD::ABDS: |
6671 | return APIntOps::abds(A: C1, B: C2); |
6672 | case ISD::ABDU: |
6673 | return APIntOps::abdu(A: C1, B: C2); |
6674 | case ISD::MULHS: |
6675 | return APIntOps::mulhs(C1, C2); |
6676 | case ISD::MULHU: |
6677 | return APIntOps::mulhu(C1, C2); |
6678 | } |
6679 | return std::nullopt; |
6680 | } |
6681 | // Handle constant folding with UNDEF. |
6682 | // TODO: Handle more cases. |
6683 | static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1, |
6684 | bool IsUndef1, const APInt &C2, |
6685 | bool IsUndef2) { |
6686 | if (!(IsUndef1 || IsUndef2)) |
6687 | return FoldValue(Opcode, C1, C2); |
6688 | |
6689 | // Fold and(x, undef) -> 0 |
6690 | // Fold mul(x, undef) -> 0 |
6691 | if (Opcode == ISD::AND || Opcode == ISD::MUL) |
6692 | return APInt::getZero(numBits: C1.getBitWidth()); |
6693 | |
6694 | return std::nullopt; |
6695 | } |
6696 | |
6697 | SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, |
6698 | const GlobalAddressSDNode *GA, |
6699 | const SDNode *N2) { |
6700 | if (GA->getOpcode() != ISD::GlobalAddress) |
6701 | return SDValue(); |
6702 | if (!TLI->isOffsetFoldingLegal(GA)) |
6703 | return SDValue(); |
6704 | auto *C2 = dyn_cast<ConstantSDNode>(Val: N2); |
6705 | if (!C2) |
6706 | return SDValue(); |
6707 | int64_t Offset = C2->getSExtValue(); |
6708 | switch (Opcode) { |
6709 | case ISD::ADD: break; |
6710 | case ISD::SUB: Offset = -uint64_t(Offset); break; |
6711 | default: return SDValue(); |
6712 | } |
6713 | return getGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(C2), VT, |
6714 | Offset: GA->getOffset() + uint64_t(Offset)); |
6715 | } |
6716 | |
6717 | bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { |
6718 | switch (Opcode) { |
6719 | case ISD::SDIV: |
6720 | case ISD::UDIV: |
6721 | case ISD::SREM: |
6722 | case ISD::UREM: { |
6723 | // If a divisor is zero/undef or any element of a divisor vector is |
6724 | // zero/undef, the whole op is undef. |
6725 | assert(Ops.size() == 2 && "Div/rem should have 2 operands"); |
6726 | SDValue Divisor = Ops[1]; |
6727 | if (Divisor.isUndef() || isNullConstant(V: Divisor)) |
6728 | return true; |
6729 | |
6730 | return ISD::isBuildVectorOfConstantSDNodes(N: Divisor.getNode()) && |
6731 | llvm::any_of(Range: Divisor->op_values(), |
6732 | P: [](SDValue V) { return V.isUndef() || |
6733 | isNullConstant(V); }); |
6734 | // TODO: Handle signed overflow. |
6735 | } |
6736 | // TODO: Handle oversized shifts. |
6737 | default: |
6738 | return false; |
6739 | } |
6740 | } |
6741 | |
6742 | SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, |
6743 | EVT VT, ArrayRef<SDValue> Ops, |
6744 | SDNodeFlags Flags) { |
6745 | // If the opcode is a target-specific ISD node, there's nothing we can |
6746 | // do here and the operand rules may not line up with the below, so |
6747 | // bail early. |
6748 | // We can't create a scalar CONCAT_VECTORS so skip it. It will break |
6749 | // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by |
6750 | // foldCONCAT_VECTORS in getNode before this is called. |
6751 | if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) |
6752 | return SDValue(); |
6753 | |
6754 | unsigned NumOps = Ops.size(); |
6755 | if (NumOps == 0) |
6756 | return SDValue(); |
6757 | |
6758 | if (isUndef(Opcode, Ops)) |
6759 | return getUNDEF(VT); |
6760 | |
6761 | // Handle unary special cases. |
6762 | if (NumOps == 1) { |
6763 | SDValue N1 = Ops[0]; |
6764 | |
6765 | // Constant fold unary operations with an integer constant operand. Even |
6766 | // opaque constant will be folded, because the folding of unary operations |
6767 | // doesn't create new constants with different values. Nevertheless, the |
6768 | // opaque flag is preserved during folding to prevent future folding with |
6769 | // other constants. |
6770 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: N1)) { |
6771 | const APInt &Val = C->getAPIntValue(); |
6772 | switch (Opcode) { |
6773 | case ISD::SIGN_EXTEND: |
6774 | return getConstant(Val: Val.sextOrTrunc(width: VT.getSizeInBits()), DL, VT, |
6775 | isT: C->isTargetOpcode(), isO: C->isOpaque()); |
6776 | case ISD::TRUNCATE: |
6777 | if (C->isOpaque()) |
6778 | break; |
6779 | [[fallthrough]]; |
6780 | case ISD::ZERO_EXTEND: |
6781 | return getConstant(Val: Val.zextOrTrunc(width: VT.getSizeInBits()), DL, VT, |
6782 | isT: C->isTargetOpcode(), isO: C->isOpaque()); |
6783 | case ISD::ANY_EXTEND: |
6784 | // Some targets like RISCV prefer to sign extend some types. |
6785 | if (TLI->isSExtCheaperThanZExt(FromTy: N1.getValueType(), ToTy: VT)) |
6786 | return getConstant(Val: Val.sextOrTrunc(width: VT.getSizeInBits()), DL, VT, |
6787 | isT: C->isTargetOpcode(), isO: C->isOpaque()); |
6788 | return getConstant(Val: Val.zextOrTrunc(width: VT.getSizeInBits()), DL, VT, |
6789 | isT: C->isTargetOpcode(), isO: C->isOpaque()); |
6790 | case ISD::ABS: |
6791 | return getConstant(Val: Val.abs(), DL, VT, isT: C->isTargetOpcode(), |
6792 | isO: C->isOpaque()); |
6793 | case ISD::BITREVERSE: |
6794 | return getConstant(Val: Val.reverseBits(), DL, VT, isT: C->isTargetOpcode(), |
6795 | isO: C->isOpaque()); |
6796 | case ISD::BSWAP: |
6797 | return getConstant(Val: Val.byteSwap(), DL, VT, isT: C->isTargetOpcode(), |
6798 | isO: C->isOpaque()); |
6799 | case ISD::CTPOP: |
6800 | return getConstant(Val: Val.popcount(), DL, VT, isT: C->isTargetOpcode(), |
6801 | isO: C->isOpaque()); |
6802 | case ISD::CTLZ: |
6803 | case ISD::CTLZ_ZERO_UNDEF: |
6804 | return getConstant(Val: Val.countl_zero(), DL, VT, isT: C->isTargetOpcode(), |
6805 | isO: C->isOpaque()); |
6806 | case ISD::CTTZ: |
6807 | case ISD::CTTZ_ZERO_UNDEF: |
6808 | return getConstant(Val: Val.countr_zero(), DL, VT, isT: C->isTargetOpcode(), |
6809 | isO: C->isOpaque()); |
6810 | case ISD::UINT_TO_FP: |
6811 | case ISD::SINT_TO_FP: { |
6812 | APFloat FPV(VT.getFltSemantics(), APInt::getZero(numBits: VT.getSizeInBits())); |
6813 | (void)FPV.convertFromAPInt(Input: Val, IsSigned: Opcode == ISD::SINT_TO_FP, |
6814 | RM: APFloat::rmNearestTiesToEven); |
6815 | return getConstantFP(V: FPV, DL, VT); |
6816 | } |
6817 | case ISD::FP16_TO_FP: |
6818 | case ISD::BF16_TO_FP: { |
6819 | bool Ignored; |
6820 | APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() |
6821 | : APFloat::BFloat(), |
6822 | (Val.getBitWidth() == 16) ? Val : Val.trunc(width: 16)); |
6823 | |
6824 | // This can return overflow, underflow, or inexact; we don't care. |
6825 | // FIXME need to be more flexible about rounding mode. |
6826 | (void)FPV.convert(ToSemantics: VT.getFltSemantics(), RM: APFloat::rmNearestTiesToEven, |
6827 | losesInfo: &Ignored); |
6828 | return getConstantFP(V: FPV, DL, VT); |
6829 | } |
6830 | case ISD::STEP_VECTOR: |
6831 | if (SDValue V = FoldSTEP_VECTOR(DL, VT, Step: N1, DAG&: *this)) |
6832 | return V; |
6833 | break; |
6834 | case ISD::BITCAST: |
6835 | if (VT == MVT::f16 && C->getValueType(ResNo: 0) == MVT::i16) |
6836 | return getConstantFP(V: APFloat(APFloat::IEEEhalf(), Val), DL, VT); |
6837 | if (VT == MVT::f32 && C->getValueType(ResNo: 0) == MVT::i32) |
6838 | return getConstantFP(V: APFloat(APFloat::IEEEsingle(), Val), DL, VT); |
6839 | if (VT == MVT::f64 && C->getValueType(ResNo: 0) == MVT::i64) |
6840 | return getConstantFP(V: APFloat(APFloat::IEEEdouble(), Val), DL, VT); |
6841 | if (VT == MVT::f128 && C->getValueType(ResNo: 0) == MVT::i128) |
6842 | return getConstantFP(V: APFloat(APFloat::IEEEquad(), Val), DL, VT); |
6843 | break; |
6844 | } |
6845 | } |
6846 | |
6847 | // Constant fold unary operations with a floating point constant operand. |
6848 | if (auto *C = dyn_cast<ConstantFPSDNode>(Val&: N1)) { |
6849 | APFloat V = C->getValueAPF(); // make copy |
6850 | switch (Opcode) { |
6851 | case ISD::FNEG: |
6852 | V.changeSign(); |
6853 | return getConstantFP(V, DL, VT); |
6854 | case ISD::FABS: |
6855 | V.clearSign(); |
6856 | return getConstantFP(V, DL, VT); |
6857 | case ISD::FCEIL: { |
6858 | APFloat::opStatus fs = V.roundToIntegral(RM: APFloat::rmTowardPositive); |
6859 | if (fs == APFloat::opOK || fs == APFloat::opInexact) |
6860 | return getConstantFP(V, DL, VT); |
6861 | return SDValue(); |
6862 | } |
6863 | case ISD::FTRUNC: { |
6864 | APFloat::opStatus fs = V.roundToIntegral(RM: APFloat::rmTowardZero); |
6865 | if (fs == APFloat::opOK || fs == APFloat::opInexact) |
6866 | return getConstantFP(V, DL, VT); |
6867 | return SDValue(); |
6868 | } |
6869 | case ISD::FFLOOR: { |
6870 | APFloat::opStatus fs = V.roundToIntegral(RM: APFloat::rmTowardNegative); |
6871 | if (fs == APFloat::opOK || fs == APFloat::opInexact) |
6872 | return getConstantFP(V, DL, VT); |
6873 | return SDValue(); |
6874 | } |
6875 | case ISD::FP_EXTEND: { |
6876 | bool ignored; |
6877 | // This can return overflow, underflow, or inexact; we don't care. |
6878 | // FIXME need to be more flexible about rounding mode. |
6879 | (void)V.convert(ToSemantics: VT.getFltSemantics(), RM: APFloat::rmNearestTiesToEven, |
6880 | losesInfo: &ignored); |
6881 | return getConstantFP(V, DL, VT); |
6882 | } |
6883 | case ISD::FP_TO_SINT: |
6884 | case ISD::FP_TO_UINT: { |
6885 | bool ignored; |
6886 | APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); |
6887 | // FIXME need to be more flexible about rounding mode. |
6888 | APFloat::opStatus s = |
6889 | V.convertToInteger(Result&: IntVal, RM: APFloat::rmTowardZero, IsExact: &ignored); |
6890 | if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual |
6891 | break; |
6892 | return getConstant(Val: IntVal, DL, VT); |
6893 | } |
6894 | case ISD::FP_TO_FP16: |
6895 | case ISD::FP_TO_BF16: { |
6896 | bool Ignored; |
6897 | // This can return overflow, underflow, or inexact; we don't care. |
6898 | // FIXME need to be more flexible about rounding mode. |
6899 | (void)V.convert(ToSemantics: Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() |
6900 | : APFloat::BFloat(), |
6901 | RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored); |
6902 | return getConstant(Val: V.bitcastToAPInt().getZExtValue(), DL, VT); |
6903 | } |
6904 | case ISD::BITCAST: |
6905 | if (VT == MVT::i16 && C->getValueType(ResNo: 0) == MVT::f16) |
6906 | return getConstant(Val: (uint16_t)V.bitcastToAPInt().getZExtValue(), DL, |
6907 | VT); |
6908 | if (VT == MVT::i16 && C->getValueType(ResNo: 0) == MVT::bf16) |
6909 | return getConstant(Val: (uint16_t)V.bitcastToAPInt().getZExtValue(), DL, |
6910 | VT); |
6911 | if (VT == MVT::i32 && C->getValueType(ResNo: 0) == MVT::f32) |
6912 | return getConstant(Val: (uint32_t)V.bitcastToAPInt().getZExtValue(), DL, |
6913 | VT); |
6914 | if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) |
6915 | return getConstant(Val: V.bitcastToAPInt().getZExtValue(), DL, VT); |
6916 | break; |
6917 | } |
6918 | } |
6919 | |
6920 | // Early-out if we failed to constant fold a bitcast. |
6921 | if (Opcode == ISD::BITCAST) |
6922 | return SDValue(); |
6923 | } |
6924 | |
6925 | // Handle binops special cases. |
6926 | if (NumOps == 2) { |
6927 | if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops)) |
6928 | return CFP; |
6929 | |
6930 | if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ops[0])) { |
6931 | if (auto *C2 = dyn_cast<ConstantSDNode>(Val: Ops[1])) { |
6932 | if (C1->isOpaque() || C2->isOpaque()) |
6933 | return SDValue(); |
6934 | |
6935 | std::optional<APInt> FoldAttempt = |
6936 | FoldValue(Opcode, C1: C1->getAPIntValue(), C2: C2->getAPIntValue()); |
6937 | if (!FoldAttempt) |
6938 | return SDValue(); |
6939 | |
6940 | SDValue Folded = getConstant(Val: *FoldAttempt, DL, VT); |
6941 | assert((!Folded || !VT.isVector()) && |
6942 | "Can't fold vectors ops with scalar operands"); |
6943 | return Folded; |
6944 | } |
6945 | } |
6946 | |
6947 | // fold (add Sym, c) -> Sym+c |
6948 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val: Ops[0])) |
6949 | return FoldSymbolOffset(Opcode, VT, GA, N2: Ops[1].getNode()); |
6950 | if (TLI->isCommutativeBinOp(Opcode)) |
6951 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val: Ops[1])) |
6952 | return FoldSymbolOffset(Opcode, VT, GA, N2: Ops[0].getNode()); |
6953 | |
6954 | // fold (sext_in_reg c1) -> c2 |
6955 | if (Opcode == ISD::SIGN_EXTEND_INREG) { |
6956 | EVT EVT = cast<VTSDNode>(Val: Ops[1])->getVT(); |
6957 | |
6958 | auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { |
6959 | unsigned FromBits = EVT.getScalarSizeInBits(); |
6960 | Val <<= Val.getBitWidth() - FromBits; |
6961 | Val.ashrInPlace(ShiftAmt: Val.getBitWidth() - FromBits); |
6962 | return getConstant(Val, DL, VT: ConstantVT); |
6963 | }; |
6964 | |
6965 | if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ops[0])) { |
6966 | const APInt &Val = C1->getAPIntValue(); |
6967 | return SignExtendInReg(Val, VT); |
6968 | } |
6969 | |
6970 | if (ISD::isBuildVectorOfConstantSDNodes(N: Ops[0].getNode())) { |
6971 | SmallVector<SDValue, 8> ScalarOps; |
6972 | llvm::EVT OpVT = Ops[0].getOperand(i: 0).getValueType(); |
6973 | for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) { |
6974 | SDValue Op = Ops[0].getOperand(i: I); |
6975 | if (Op.isUndef()) { |
6976 | ScalarOps.push_back(Elt: getUNDEF(VT: OpVT)); |
6977 | continue; |
6978 | } |
6979 | const APInt &Val = cast<ConstantSDNode>(Val&: Op)->getAPIntValue(); |
6980 | ScalarOps.push_back(Elt: SignExtendInReg(Val, OpVT)); |
6981 | } |
6982 | return getBuildVector(VT, DL, Ops: ScalarOps); |
6983 | } |
6984 | |
6985 | if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR && |
6986 | isa<ConstantSDNode>(Val: Ops[0].getOperand(i: 0))) |
6987 | return getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, |
6988 | N1: SignExtendInReg(Ops[0].getConstantOperandAPInt(i: 0), |
6989 | Ops[0].getOperand(i: 0).getValueType())); |
6990 | } |
6991 | } |
6992 | |
6993 | // This is for vector folding only from here on. |
6994 | if (!VT.isVector()) |
6995 | return SDValue(); |
6996 | |
6997 | ElementCount NumElts = VT.getVectorElementCount(); |
6998 | |
6999 | // See if we can fold through any bitcasted integer ops. |
7000 | if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && |
7001 | Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && |
7002 | (Ops[0].getOpcode() == ISD::BITCAST || |
7003 | Ops[1].getOpcode() == ISD::BITCAST)) { |
7004 | SDValue N1 = peekThroughBitcasts(V: Ops[0]); |
7005 | SDValue N2 = peekThroughBitcasts(V: Ops[1]); |
7006 | auto *BV1 = dyn_cast<BuildVectorSDNode>(Val&: N1); |
7007 | auto *BV2 = dyn_cast<BuildVectorSDNode>(Val&: N2); |
7008 | if (BV1 && BV2 && N1.getValueType().isInteger() && |
7009 | N2.getValueType().isInteger()) { |
7010 | bool IsLE = getDataLayout().isLittleEndian(); |
7011 | unsigned EltBits = VT.getScalarSizeInBits(); |
7012 | SmallVector<APInt> RawBits1, RawBits2; |
7013 | BitVector UndefElts1, UndefElts2; |
7014 | if (BV1->getConstantRawBits(IsLittleEndian: IsLE, DstEltSizeInBits: EltBits, RawBitElements&: RawBits1, UndefElements&: UndefElts1) && |
7015 | BV2->getConstantRawBits(IsLittleEndian: IsLE, DstEltSizeInBits: EltBits, RawBitElements&: RawBits2, UndefElements&: UndefElts2)) { |
7016 | SmallVector<APInt> RawBits; |
7017 | for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { |
7018 | std::optional<APInt> Fold = FoldValueWithUndef( |
7019 | Opcode, C1: RawBits1[I], IsUndef1: UndefElts1[I], C2: RawBits2[I], IsUndef2: UndefElts2[I]); |
7020 | if (!Fold) |
7021 | break; |
7022 | RawBits.push_back(Elt: *Fold); |
7023 | } |
7024 | if (RawBits.size() == NumElts.getFixedValue()) { |
7025 | // We have constant folded, but we might need to cast this again back |
7026 | // to the original (possibly legalized) type. |
7027 | EVT BVVT, BVEltVT; |
7028 | if (N1.getValueType() == VT) { |
7029 | BVVT = N1.getValueType(); |
7030 | BVEltVT = BV1->getOperand(Num: 0).getValueType(); |
7031 | } else { |
7032 | BVVT = N2.getValueType(); |
7033 | BVEltVT = BV2->getOperand(Num: 0).getValueType(); |
7034 | } |
7035 | unsigned BVEltBits = BVEltVT.getSizeInBits(); |
7036 | SmallVector<APInt> DstBits; |
7037 | BitVector DstUndefs; |
7038 | BuildVectorSDNode::recastRawBits(IsLittleEndian: IsLE, DstEltSizeInBits: BVVT.getScalarSizeInBits(), |
7039 | DstBitElements&: DstBits, SrcBitElements: RawBits, DstUndefElements&: DstUndefs, |
7040 | SrcUndefElements: BitVector(RawBits.size(), false)); |
7041 | SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(VT: BVEltVT)); |
7042 | for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { |
7043 | if (DstUndefs[I]) |
7044 | continue; |
7045 | Ops[I] = getConstant(Val: DstBits[I].sext(width: BVEltBits), DL, VT: BVEltVT); |
7046 | } |
7047 | return getBitcast(VT, V: getBuildVector(VT: BVVT, DL, Ops)); |
7048 | } |
7049 | } |
7050 | } |
7051 | } |
7052 | |
7053 | // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). |
7054 | // (shl step_vector(C0), C1) -> (step_vector(C0 << C1)) |
7055 | if ((Opcode == ISD::MUL || Opcode == ISD::SHL) && |
7056 | Ops[0].getOpcode() == ISD::STEP_VECTOR) { |
7057 | APInt RHSVal; |
7058 | if (ISD::isConstantSplatVector(N: Ops[1].getNode(), SplatVal&: RHSVal)) { |
7059 | APInt NewStep = Opcode == ISD::MUL |
7060 | ? Ops[0].getConstantOperandAPInt(i: 0) * RHSVal |
7061 | : Ops[0].getConstantOperandAPInt(i: 0) << RHSVal; |
7062 | return getStepVector(DL, ResVT: VT, StepVal: NewStep); |
7063 | } |
7064 | } |
7065 | |
7066 | auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { |
7067 | return !Op.getValueType().isVector() || |
7068 | Op.getValueType().getVectorElementCount() == NumElts; |
7069 | }; |
7070 | |
7071 | auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { |
7072 | return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || |
7073 | Op.getOpcode() == ISD::BUILD_VECTOR || |
7074 | Op.getOpcode() == ISD::SPLAT_VECTOR; |
7075 | }; |
7076 | |
7077 | // All operands must be vector types with the same number of elements as |
7078 | // the result type and must be either UNDEF or a build/splat vector |
7079 | // or UNDEF scalars. |
7080 | if (!llvm::all_of(Range&: Ops, P: IsBuildVectorSplatVectorOrUndef) || |
7081 | !llvm::all_of(Range&: Ops, P: IsScalarOrSameVectorSize)) |
7082 | return SDValue(); |
7083 | |
7084 | // If we are comparing vectors, then the result needs to be a i1 boolean that |
7085 | // is then extended back to the legal result type depending on how booleans |
7086 | // are represented. |
7087 | EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); |
7088 | ISD::NodeType ExtendCode = |
7089 | (Opcode == ISD::SETCC && SVT != VT.getScalarType()) |
7090 | ? TargetLowering::getExtendForContent(Content: TLI->getBooleanContents(Type: VT)) |
7091 | : ISD::SIGN_EXTEND; |
7092 | |
7093 | // Find legal integer scalar type for constant promotion and |
7094 | // ensure that its scalar size is at least as large as source. |
7095 | EVT LegalSVT = VT.getScalarType(); |
7096 | if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { |
7097 | LegalSVT = TLI->getTypeToTransformTo(Context&: *getContext(), VT: LegalSVT); |
7098 | if (LegalSVT.bitsLT(VT: VT.getScalarType())) |
7099 | return SDValue(); |
7100 | } |
7101 | |
7102 | // For scalable vector types we know we're dealing with SPLAT_VECTORs. We |
7103 | // only have one operand to check. For fixed-length vector types we may have |
7104 | // a combination of BUILD_VECTOR and SPLAT_VECTOR. |
7105 | unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); |
7106 | |
7107 | // Constant fold each scalar lane separately. |
7108 | SmallVector<SDValue, 4> ScalarResults; |
7109 | for (unsigned I = 0; I != NumVectorElts; I++) { |
7110 | SmallVector<SDValue, 4> ScalarOps; |
7111 | for (SDValue Op : Ops) { |
7112 | EVT InSVT = Op.getValueType().getScalarType(); |
7113 | if (Op.getOpcode() != ISD::BUILD_VECTOR && |
7114 | Op.getOpcode() != ISD::SPLAT_VECTOR) { |
7115 | if (Op.isUndef()) |
7116 | ScalarOps.push_back(Elt: getUNDEF(VT: InSVT)); |
7117 | else |
7118 | ScalarOps.push_back(Elt: Op); |
7119 | continue; |
7120 | } |
7121 | |
7122 | SDValue ScalarOp = |
7123 | Op.getOperand(i: Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I); |
7124 | EVT ScalarVT = ScalarOp.getValueType(); |
7125 | |
7126 | // Build vector (integer) scalar operands may need implicit |
7127 | // truncation - do this before constant folding. |
7128 | if (ScalarVT.isInteger() && ScalarVT.bitsGT(VT: InSVT)) { |
7129 | // Don't create illegally-typed nodes unless they're constants or undef |
7130 | // - if we fail to constant fold we can't guarantee the (dead) nodes |
7131 | // we're creating will be cleaned up before being visited for |
7132 | // legalization. |
7133 | if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() && |
7134 | !isa<ConstantSDNode>(Val: ScalarOp) && |
7135 | TLI->getTypeAction(Context&: *getContext(), VT: InSVT) != |
7136 | TargetLowering::TypeLegal) |
7137 | return SDValue(); |
7138 | ScalarOp = getNode(Opcode: ISD::TRUNCATE, DL, VT: InSVT, N1: ScalarOp); |
7139 | } |
7140 | |
7141 | ScalarOps.push_back(Elt: ScalarOp); |
7142 | } |
7143 | |
7144 | // Constant fold the scalar operands. |
7145 | SDValue ScalarResult = getNode(Opcode, DL, VT: SVT, Ops: ScalarOps, Flags); |
7146 | |
7147 | // Scalar folding only succeeded if the result is a constant or UNDEF. |
7148 | if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && |
7149 | ScalarResult.getOpcode() != ISD::ConstantFP) |
7150 | return SDValue(); |
7151 | |
7152 | // Legalize the (integer) scalar constant if necessary. We only do |
7153 | // this once we know the folding succeeded, since otherwise we would |
7154 | // get a node with illegal type which has a user. |
7155 | if (LegalSVT != SVT) |
7156 | ScalarResult = getNode(Opcode: ExtendCode, DL, VT: LegalSVT, N1: ScalarResult); |
7157 | |
7158 | ScalarResults.push_back(Elt: ScalarResult); |
7159 | } |
7160 | |
7161 | SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, Op: ScalarResults[0]) |
7162 | : getBuildVector(VT, DL, Ops: ScalarResults); |
7163 | NewSDValueDbgMsg(V, Msg: "New node fold constant vector: ", G: this); |
7164 | return V; |
7165 | } |
7166 | |
7167 | SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, |
7168 | EVT VT, ArrayRef<SDValue> Ops) { |
7169 | // TODO: Add support for unary/ternary fp opcodes. |
7170 | if (Ops.size() != 2) |
7171 | return SDValue(); |
7172 | |
7173 | // TODO: We don't do any constant folding for strict FP opcodes here, but we |
7174 | // should. That will require dealing with a potentially non-default |
7175 | // rounding mode, checking the "opStatus" return value from the APFloat |
7176 | // math calculations, and possibly other variations. |
7177 | SDValue N1 = Ops[0]; |
7178 | SDValue N2 = Ops[1]; |
7179 | ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N: N1, /*AllowUndefs*/ false); |
7180 | ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N: N2, /*AllowUndefs*/ false); |
7181 | if (N1CFP && N2CFP) { |
7182 | APFloat C1 = N1CFP->getValueAPF(); // make copy |
7183 | const APFloat &C2 = N2CFP->getValueAPF(); |
7184 | switch (Opcode) { |
7185 | case ISD::FADD: |
7186 | C1.add(RHS: C2, RM: APFloat::rmNearestTiesToEven); |
7187 | return getConstantFP(V: C1, DL, VT); |
7188 | case ISD::FSUB: |
7189 | C1.subtract(RHS: C2, RM: APFloat::rmNearestTiesToEven); |
7190 | return getConstantFP(V: C1, DL, VT); |
7191 | case ISD::FMUL: |
7192 | C1.multiply(RHS: C2, RM: APFloat::rmNearestTiesToEven); |
7193 | return getConstantFP(V: C1, DL, VT); |
7194 | case ISD::FDIV: |
7195 | C1.divide(RHS: C2, RM: APFloat::rmNearestTiesToEven); |
7196 | return getConstantFP(V: C1, DL, VT); |
7197 | case ISD::FREM: |
7198 | C1.mod(RHS: C2); |
7199 | return getConstantFP(V: C1, DL, VT); |
7200 | case ISD::FCOPYSIGN: |
7201 | C1.copySign(RHS: C2); |
7202 | return getConstantFP(V: C1, DL, VT); |
7203 | case ISD::FMINNUM: |
7204 | return getConstantFP(V: minnum(A: C1, B: C2), DL, VT); |
7205 | case ISD::FMAXNUM: |
7206 | return getConstantFP(V: maxnum(A: C1, B: C2), DL, VT); |
7207 | case ISD::FMINIMUM: |
7208 | return getConstantFP(V: minimum(A: C1, B: C2), DL, VT); |
7209 | case ISD::FMAXIMUM: |
7210 | return getConstantFP(V: maximum(A: C1, B: C2), DL, VT); |
7211 | case ISD::FMINIMUMNUM: |
7212 | return getConstantFP(V: minimumnum(A: C1, B: C2), DL, VT); |
7213 | case ISD::FMAXIMUMNUM: |
7214 | return getConstantFP(V: maximumnum(A: C1, B: C2), DL, VT); |
7215 | default: break; |
7216 | } |
7217 | } |
7218 | if (N1CFP && Opcode == ISD::FP_ROUND) { |
7219 | APFloat C1 = N1CFP->getValueAPF(); // make copy |
7220 | bool Unused; |
7221 | // This can return overflow, underflow, or inexact; we don't care. |
7222 | // FIXME need to be more flexible about rounding mode. |
7223 | (void)C1.convert(ToSemantics: VT.getFltSemantics(), RM: APFloat::rmNearestTiesToEven, |
7224 | losesInfo: &Unused); |
7225 | return getConstantFP(V: C1, DL, VT); |
7226 | } |
7227 | |
7228 | switch (Opcode) { |
7229 | case ISD::FSUB: |
7230 | // -0.0 - undef --> undef (consistent with "fneg undef") |
7231 | if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N: N1, /*AllowUndefs*/ true)) |
7232 | if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef()) |
7233 | return getUNDEF(VT); |
7234 | [[fallthrough]]; |
7235 | |
7236 | case ISD::FADD: |
7237 | case ISD::FMUL: |
7238 | case ISD::FDIV: |
7239 | case ISD::FREM: |
7240 | // If both operands are undef, the result is undef. If 1 operand is undef, |
7241 | // the result is NaN. This should match the behavior of the IR optimizer. |
7242 | if (N1.isUndef() && N2.isUndef()) |
7243 | return getUNDEF(VT); |
7244 | if (N1.isUndef() || N2.isUndef()) |
7245 | return getConstantFP(V: APFloat::getNaN(Sem: VT.getFltSemantics()), DL, VT); |
7246 | } |
7247 | return SDValue(); |
7248 | } |
7249 | |
7250 | SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { |
7251 | assert(Val.getValueType().isInteger() && "Invalid AssertAlign!"); |
7252 | |
7253 | // There's no need to assert on a byte-aligned pointer. All pointers are at |
7254 | // least byte aligned. |
7255 | if (A == Align(1)) |
7256 | return Val; |
7257 | |
7258 | SDVTList VTs = getVTList(VT: Val.getValueType()); |
7259 | FoldingSetNodeID ID; |
7260 | AddNodeIDNode(ID, OpC: ISD::AssertAlign, VTList: VTs, OpList: {Val}); |
7261 | ID.AddInteger(I: A.value()); |
7262 | |
7263 | void *IP = nullptr; |
7264 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) |
7265 | return SDValue(E, 0); |
7266 | |
7267 | auto *N = |
7268 | newSDNode<AssertAlignSDNode>(Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs, Args&: A); |
7269 | createOperands(Node: N, Vals: {Val}); |
7270 | |
7271 | CSEMap.InsertNode(N, InsertPos: IP); |
7272 | InsertNode(N); |
7273 | |
7274 | SDValue V(N, 0); |
7275 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
7276 | return V; |
7277 | } |
7278 | |
7279 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
7280 | SDValue N1, SDValue N2) { |
7281 | SDNodeFlags Flags; |
7282 | if (Inserter) |
7283 | Flags = Inserter->getFlags(); |
7284 | return getNode(Opcode, DL, VT, N1, N2, Flags); |
7285 | } |
7286 | |
7287 | void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, |
7288 | SDValue &N2) const { |
7289 | if (!TLI->isCommutativeBinOp(Opcode)) |
7290 | return; |
7291 | |
7292 | // Canonicalize: |
7293 | // binop(const, nonconst) -> binop(nonconst, const) |
7294 | bool N1C = isConstantIntBuildVectorOrConstantInt(N: N1); |
7295 | bool N2C = isConstantIntBuildVectorOrConstantInt(N: N2); |
7296 | bool N1CFP = isConstantFPBuildVectorOrConstantFP(N: N1); |
7297 | bool N2CFP = isConstantFPBuildVectorOrConstantFP(N: N2); |
7298 | if ((N1C && !N2C) || (N1CFP && !N2CFP)) |
7299 | std::swap(a&: N1, b&: N2); |
7300 | |
7301 | // Canonicalize: |
7302 | // binop(splat(x), step_vector) -> binop(step_vector, splat(x)) |
7303 | else if (N1.getOpcode() == ISD::SPLAT_VECTOR && |
7304 | N2.getOpcode() == ISD::STEP_VECTOR) |
7305 | std::swap(a&: N1, b&: N2); |
7306 | } |
7307 | |
7308 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
7309 | SDValue N1, SDValue N2, const SDNodeFlags Flags) { |
7310 | assert(N1.getOpcode() != ISD::DELETED_NODE && |
7311 | N2.getOpcode() != ISD::DELETED_NODE && |
7312 | "Operand is DELETED_NODE!"); |
7313 | |
7314 | canonicalizeCommutativeBinop(Opcode, N1, N2); |
7315 | |
7316 | auto *N1C = dyn_cast<ConstantSDNode>(Val&: N1); |
7317 | auto *N2C = dyn_cast<ConstantSDNode>(Val&: N2); |
7318 | |
7319 | // Don't allow undefs in vector splats - we might be returning N2 when folding |
7320 | // to zero etc. |
7321 | ConstantSDNode *N2CV = |
7322 | isConstOrConstSplat(N: N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true); |
7323 | |
7324 | switch (Opcode) { |
7325 | default: break; |
7326 | case ISD::TokenFactor: |
7327 | assert(VT == MVT::Other && N1.getValueType() == MVT::Other && |
7328 | N2.getValueType() == MVT::Other && "Invalid token factor!"); |
7329 | // Fold trivial token factors. |
7330 | if (N1.getOpcode() == ISD::EntryToken) return N2; |
7331 | if (N2.getOpcode() == ISD::EntryToken) return N1; |
7332 | if (N1 == N2) return N1; |
7333 | break; |
7334 | case ISD::BUILD_VECTOR: { |
7335 | // Attempt to simplify BUILD_VECTOR. |
7336 | SDValue Ops[] = {N1, N2}; |
7337 | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, DAG&: *this)) |
7338 | return V; |
7339 | break; |
7340 | } |
7341 | case ISD::CONCAT_VECTORS: { |
7342 | SDValue Ops[] = {N1, N2}; |
7343 | if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, DAG&: *this)) |
7344 | return V; |
7345 | break; |
7346 | } |
7347 | case ISD::AND: |
7348 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7349 | assert(N1.getValueType() == N2.getValueType() && |
7350 | N1.getValueType() == VT && "Binary operator types must match!"); |
7351 | // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's |
7352 | // worth handling here. |
7353 | if (N2CV && N2CV->isZero()) |
7354 | return N2; |
7355 | if (N2CV && N2CV->isAllOnes()) // X & -1 -> X |
7356 | return N1; |
7357 | break; |
7358 | case ISD::OR: |
7359 | case ISD::XOR: |
7360 | case ISD::ADD: |
7361 | case ISD::PTRADD: |
7362 | case ISD::SUB: |
7363 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7364 | assert(N1.getValueType() == N2.getValueType() && |
7365 | N1.getValueType() == VT && "Binary operator types must match!"); |
7366 | // The equal operand types requirement is unnecessarily strong for PTRADD. |
7367 | // However, the SelectionDAGBuilder does not generate PTRADDs with different |
7368 | // operand types, and we'd need to re-implement GEP's non-standard wrapping |
7369 | // logic everywhere where PTRADDs may be folded or combined to properly |
7370 | // support them. If/when we introduce pointer types to the SDAG, we will |
7371 | // need to relax this constraint. |
7372 | |
7373 | // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so |
7374 | // it's worth handling here. |
7375 | if (N2CV && N2CV->isZero()) |
7376 | return N1; |
7377 | if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && |
7378 | VT.getScalarType() == MVT::i1) |
7379 | return getNode(Opcode: ISD::XOR, DL, VT, N1, N2); |
7380 | break; |
7381 | case ISD::MUL: |
7382 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7383 | assert(N1.getValueType() == N2.getValueType() && |
7384 | N1.getValueType() == VT && "Binary operator types must match!"); |
7385 | if (VT.getScalarType() == MVT::i1) |
7386 | return getNode(Opcode: ISD::AND, DL, VT, N1, N2); |
7387 | if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { |
7388 | const APInt &MulImm = N1->getConstantOperandAPInt(Num: 0); |
7389 | const APInt &N2CImm = N2C->getAPIntValue(); |
7390 | return getVScale(DL, VT, MulImm: MulImm * N2CImm); |
7391 | } |
7392 | break; |
7393 | case ISD::UDIV: |
7394 | case ISD::UREM: |
7395 | case ISD::MULHU: |
7396 | case ISD::MULHS: |
7397 | case ISD::SDIV: |
7398 | case ISD::SREM: |
7399 | case ISD::SADDSAT: |
7400 | case ISD::SSUBSAT: |
7401 | case ISD::UADDSAT: |
7402 | case ISD::USUBSAT: |
7403 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7404 | assert(N1.getValueType() == N2.getValueType() && |
7405 | N1.getValueType() == VT && "Binary operator types must match!"); |
7406 | if (VT.getScalarType() == MVT::i1) { |
7407 | // fold (add_sat x, y) -> (or x, y) for bool types. |
7408 | if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT) |
7409 | return getNode(Opcode: ISD::OR, DL, VT, N1, N2); |
7410 | // fold (sub_sat x, y) -> (and x, ~y) for bool types. |
7411 | if (Opcode == ISD::SSUBSAT || Opcode == ISD::USUBSAT) |
7412 | return getNode(Opcode: ISD::AND, DL, VT, N1, N2: getNOT(DL, Val: N2, VT)); |
7413 | } |
7414 | break; |
7415 | case ISD::SCMP: |
7416 | case ISD::UCMP: |
7417 | assert(N1.getValueType() == N2.getValueType() && |
7418 | "Types of operands of UCMP/SCMP must match"); |
7419 | assert(N1.getValueType().isVector() == VT.isVector() && |
7420 | "Operands and return type of must both be scalars or vectors"); |
7421 | if (VT.isVector()) |
7422 | assert(VT.getVectorElementCount() == |
7423 | N1.getValueType().getVectorElementCount() && |
7424 | "Result and operands must have the same number of elements"); |
7425 | break; |
7426 | case ISD::AVGFLOORS: |
7427 | case ISD::AVGFLOORU: |
7428 | case ISD::AVGCEILS: |
7429 | case ISD::AVGCEILU: |
7430 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7431 | assert(N1.getValueType() == N2.getValueType() && |
7432 | N1.getValueType() == VT && "Binary operator types must match!"); |
7433 | break; |
7434 | case ISD::ABDS: |
7435 | case ISD::ABDU: |
7436 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7437 | assert(N1.getValueType() == N2.getValueType() && |
7438 | N1.getValueType() == VT && "Binary operator types must match!"); |
7439 | if (VT.getScalarType() == MVT::i1) |
7440 | return getNode(Opcode: ISD::XOR, DL, VT, N1, N2); |
7441 | break; |
7442 | case ISD::SMIN: |
7443 | case ISD::UMAX: |
7444 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7445 | assert(N1.getValueType() == N2.getValueType() && |
7446 | N1.getValueType() == VT && "Binary operator types must match!"); |
7447 | if (VT.getScalarType() == MVT::i1) |
7448 | return getNode(Opcode: ISD::OR, DL, VT, N1, N2); |
7449 | break; |
7450 | case ISD::SMAX: |
7451 | case ISD::UMIN: |
7452 | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
7453 | assert(N1.getValueType() == N2.getValueType() && |
7454 | N1.getValueType() == VT && "Binary operator types must match!"); |
7455 | if (VT.getScalarType() == MVT::i1) |
7456 | return getNode(Opcode: ISD::AND, DL, VT, N1, N2); |
7457 | break; |
7458 | case ISD::FADD: |
7459 | case ISD::FSUB: |
7460 | case ISD::FMUL: |
7461 | case ISD::FDIV: |
7462 | case ISD::FREM: |
7463 | assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); |
7464 | assert(N1.getValueType() == N2.getValueType() && |
7465 | N1.getValueType() == VT && "Binary operator types must match!"); |
7466 | if (SDValue V = simplifyFPBinop(Opcode, X: N1, Y: N2, Flags)) |
7467 | return V; |
7468 | break; |
7469 | case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. |
7470 | assert(N1.getValueType() == VT && |
7471 | N1.getValueType().isFloatingPoint() && |
7472 | N2.getValueType().isFloatingPoint() && |
7473 | "Invalid FCOPYSIGN!"); |
7474 | break; |
7475 | case ISD::SHL: |
7476 | if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { |
7477 | const APInt &MulImm = N1->getConstantOperandAPInt(Num: 0); |
7478 | const APInt &ShiftImm = N2C->getAPIntValue(); |
7479 | return getVScale(DL, VT, MulImm: MulImm << ShiftImm); |
7480 | } |
7481 | [[fallthrough]]; |
7482 | case ISD::SRA: |
7483 | case ISD::SRL: |
7484 | if (SDValue V = simplifyShift(X: N1, Y: N2)) |
7485 | return V; |
7486 | [[fallthrough]]; |
7487 | case ISD::ROTL: |
7488 | case ISD::ROTR: |
7489 | assert(VT == N1.getValueType() && |
7490 | "Shift operators return type must be the same as their first arg"); |
7491 | assert(VT.isInteger() && N2.getValueType().isInteger() && |
7492 | "Shifts only work on integers"); |
7493 | assert((!VT.isVector() || VT == N2.getValueType()) && |
7494 | "Vector shift amounts must be in the same as their first arg"); |
7495 | // Verify that the shift amount VT is big enough to hold valid shift |
7496 | // amounts. This catches things like trying to shift an i1024 value by an |
7497 | // i8, which is easy to fall into in generic code that uses |
7498 | // TLI.getShiftAmount(). |
7499 | assert(N2.getValueType().getScalarSizeInBits() >= |
7500 | Log2_32_Ceil(VT.getScalarSizeInBits()) && |
7501 | "Invalid use of small shift amount with oversized value!"); |
7502 | |
7503 | // Always fold shifts of i1 values so the code generator doesn't need to |
7504 | // handle them. Since we know the size of the shift has to be less than the |
7505 | // size of the value, the shift/rotate count is guaranteed to be zero. |
7506 | if (VT == MVT::i1) |
7507 | return N1; |
7508 | if (N2CV && N2CV->isZero()) |
7509 | return N1; |
7510 | break; |
7511 | case ISD::FP_ROUND: |
7512 | assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && |
7513 | VT.bitsLE(N1.getValueType()) && N2C && |
7514 | (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && |
7515 | N2.getOpcode() == ISD::TargetConstant && "Invalid FP_ROUND!"); |
7516 | if (N1.getValueType() == VT) return N1; // noop conversion. |
7517 | break; |
7518 | case ISD::AssertNoFPClass: { |
7519 | assert(N1.getValueType().isFloatingPoint() && |
7520 | "AssertNoFPClass is used for a non-floating type"); |
7521 | assert(isa<ConstantSDNode>(N2) && "NoFPClass is not Constant"); |
7522 | FPClassTest NoFPClass = static_cast<FPClassTest>(N2->getAsZExtVal()); |
7523 | assert(llvm::to_underlying(NoFPClass) <= |
7524 | BitmaskEnumDetail::Mask<FPClassTest>() && |
7525 | "FPClassTest value too large"); |
7526 | (void)NoFPClass; |
7527 | break; |
7528 | } |
7529 | case ISD::AssertSext: |
7530 | case ISD::AssertZext: { |
7531 | EVT EVT = cast<VTSDNode>(Val&: N2)->getVT(); |
7532 | assert(VT == N1.getValueType() && "Not an inreg extend!"); |
7533 | assert(VT.isInteger() && EVT.isInteger() && |
7534 | "Cannot *_EXTEND_INREG FP types"); |
7535 | assert(!EVT.isVector() && |
7536 | "AssertSExt/AssertZExt type should be the vector element type " |
7537 | "rather than the vector type!"); |
7538 | assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); |
7539 | if (VT.getScalarType() == EVT) return N1; // noop assertion. |
7540 | break; |
7541 | } |
7542 | case ISD::SIGN_EXTEND_INREG: { |
7543 | EVT EVT = cast<VTSDNode>(Val&: N2)->getVT(); |
7544 | assert(VT == N1.getValueType() && "Not an inreg extend!"); |
7545 | assert(VT.isInteger() && EVT.isInteger() && |
7546 | "Cannot *_EXTEND_INREG FP types"); |
7547 | assert(EVT.isVector() == VT.isVector() && |
7548 | "SIGN_EXTEND_INREG type should be vector iff the operand " |
7549 | "type is vector!"); |
7550 | assert((!EVT.isVector() || |
7551 | EVT.getVectorElementCount() == VT.getVectorElementCount()) && |
7552 | "Vector element counts must match in SIGN_EXTEND_INREG"); |
7553 | assert(EVT.bitsLE(VT) && "Not extending!"); |
7554 | if (EVT == VT) return N1; // Not actually extending |
7555 | break; |
7556 | } |
7557 | case ISD::FP_TO_SINT_SAT: |
7558 | case ISD::FP_TO_UINT_SAT: { |
7559 | assert(VT.isInteger() && cast<VTSDNode>(N2)->getVT().isInteger() && |
7560 | N1.getValueType().isFloatingPoint() && "Invalid FP_TO_*INT_SAT"); |
7561 | assert(N1.getValueType().isVector() == VT.isVector() && |
7562 | "FP_TO_*INT_SAT type should be vector iff the operand type is " |
7563 | "vector!"); |
7564 | assert((!VT.isVector() || VT.getVectorElementCount() == |
7565 | N1.getValueType().getVectorElementCount()) && |
7566 | "Vector element counts must match in FP_TO_*INT_SAT"); |
7567 | assert(!cast<VTSDNode>(N2)->getVT().isVector() && |
7568 | "Type to saturate to must be a scalar."); |
7569 | assert(cast<VTSDNode>(N2)->getVT().bitsLE(VT.getScalarType()) && |
7570 | "Not extending!"); |
7571 | break; |
7572 | } |
7573 | case ISD::EXTRACT_VECTOR_ELT: |
7574 | assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && |
7575 | "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ |
7576 | element type of the vector."); |
7577 | |
7578 | // Extract from an undefined value or using an undefined index is undefined. |
7579 | if (N1.isUndef() || N2.isUndef()) |
7580 | return getUNDEF(VT); |
7581 | |
7582 | // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length |
7583 | // vectors. For scalable vectors we will provide appropriate support for |
7584 | // dealing with arbitrary indices. |
7585 | if (N2C && N1.getValueType().isFixedLengthVector() && |
7586 | N2C->getAPIntValue().uge(RHS: N1.getValueType().getVectorNumElements())) |
7587 | return getUNDEF(VT); |
7588 | |
7589 | // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is |
7590 | // expanding copies of large vectors from registers. This only works for |
7591 | // fixed length vectors, since we need to know the exact number of |
7592 | // elements. |
7593 | if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && |
7594 | N1.getOperand(i: 0).getValueType().isFixedLengthVector()) { |
7595 | unsigned Factor = N1.getOperand(i: 0).getValueType().getVectorNumElements(); |
7596 | return getExtractVectorElt(DL, VT, |
7597 | Vec: N1.getOperand(i: N2C->getZExtValue() / Factor), |
7598 | Idx: N2C->getZExtValue() % Factor); |
7599 | } |
7600 | |
7601 | // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while |
7602 | // lowering is expanding large vector constants. |
7603 | if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || |
7604 | N1.getOpcode() == ISD::SPLAT_VECTOR)) { |
7605 | assert((N1.getOpcode() != ISD::BUILD_VECTOR || |
7606 | N1.getValueType().isFixedLengthVector()) && |
7607 | "BUILD_VECTOR used for scalable vectors"); |
7608 | unsigned Index = |
7609 | N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0; |
7610 | SDValue Elt = N1.getOperand(i: Index); |
7611 | |
7612 | if (VT != Elt.getValueType()) |
7613 | // If the vector element type is not legal, the BUILD_VECTOR operands |
7614 | // are promoted and implicitly truncated, and the result implicitly |
7615 | // extended. Make that explicit here. |
7616 | Elt = getAnyExtOrTrunc(Op: Elt, DL, VT); |
7617 | |
7618 | return Elt; |
7619 | } |
7620 | |
7621 | // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector |
7622 | // operations are lowered to scalars. |
7623 | if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { |
7624 | // If the indices are the same, return the inserted element else |
7625 | // if the indices are known different, extract the element from |
7626 | // the original vector. |
7627 | SDValue N1Op2 = N1.getOperand(i: 2); |
7628 | ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(Val&: N1Op2); |
7629 | |
7630 | if (N1Op2C && N2C) { |
7631 | if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { |
7632 | if (VT == N1.getOperand(i: 1).getValueType()) |
7633 | return N1.getOperand(i: 1); |
7634 | if (VT.isFloatingPoint()) { |
7635 | assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits()); |
7636 | return getFPExtendOrRound(Op: N1.getOperand(i: 1), DL, VT); |
7637 | } |
7638 | return getSExtOrTrunc(Op: N1.getOperand(i: 1), DL, VT); |
7639 | } |
7640 | return getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: N1.getOperand(i: 0), N2); |
7641 | } |
7642 | } |
7643 | |
7644 | // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed |
7645 | // when vector types are scalarized and v1iX is legal. |
7646 | // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx). |
7647 | // Here we are completely ignoring the extract element index (N2), |
7648 | // which is fine for fixed width vectors, since any index other than 0 |
7649 | // is undefined anyway. However, this cannot be ignored for scalable |
7650 | // vectors - in theory we could support this, but we don't want to do this |
7651 | // without a profitability check. |
7652 | if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
7653 | N1.getValueType().isFixedLengthVector() && |
7654 | N1.getValueType().getVectorNumElements() == 1) { |
7655 | return getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: N1.getOperand(i: 0), |
7656 | N2: N1.getOperand(i: 1)); |
7657 | } |
7658 | break; |
7659 | case ISD::EXTRACT_ELEMENT: |
7660 | assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); |
7661 | assert(!N1.getValueType().isVector() && !VT.isVector() && |
7662 | (N1.getValueType().isInteger() == VT.isInteger()) && |
7663 | N1.getValueType() != VT && |
7664 | "Wrong types for EXTRACT_ELEMENT!"); |
7665 | |
7666 | // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding |
7667 | // 64-bit integers into 32-bit parts. Instead of building the extract of |
7668 | // the BUILD_PAIR, only to have legalize rip it apart, just do it now. |
7669 | if (N1.getOpcode() == ISD::BUILD_PAIR) |
7670 | return N1.getOperand(i: N2C->getZExtValue()); |
7671 | |
7672 | // EXTRACT_ELEMENT of a constant int is also very common. |
7673 | if (N1C) { |
7674 | unsigned ElementSize = VT.getSizeInBits(); |
7675 | unsigned Shift = ElementSize * N2C->getZExtValue(); |
7676 | const APInt &Val = N1C->getAPIntValue(); |
7677 | return getConstant(Val: Val.extractBits(numBits: ElementSize, bitPosition: Shift), DL, VT); |
7678 | } |
7679 | break; |
7680 | case ISD::EXTRACT_SUBVECTOR: { |
7681 | EVT N1VT = N1.getValueType(); |
7682 | assert(VT.isVector() && N1VT.isVector() && |
7683 | "Extract subvector VTs must be vectors!"); |
7684 | assert(VT.getVectorElementType() == N1VT.getVectorElementType() && |
7685 | "Extract subvector VTs must have the same element type!"); |
7686 | assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) && |
7687 | "Cannot extract a scalable vector from a fixed length vector!"); |
7688 | assert((VT.isScalableVector() != N1VT.isScalableVector() || |
7689 | VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) && |
7690 | "Extract subvector must be from larger vector to smaller vector!"); |
7691 | assert(N2C && "Extract subvector index must be a constant"); |
7692 | assert((VT.isScalableVector() != N1VT.isScalableVector() || |
7693 | (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= |
7694 | N1VT.getVectorMinNumElements()) && |
7695 | "Extract subvector overflow!"); |
7696 | assert(N2C->getAPIntValue().getBitWidth() == |
7697 | TLI->getVectorIdxWidth(getDataLayout()) && |
7698 | "Constant index for EXTRACT_SUBVECTOR has an invalid size"); |
7699 | |
7700 | // Trivial extraction. |
7701 | if (VT == N1VT) |
7702 | return N1; |
7703 | |
7704 | // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. |
7705 | if (N1.isUndef()) |
7706 | return getUNDEF(VT); |
7707 | |
7708 | // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of |
7709 | // the concat have the same type as the extract. |
7710 | if (N1.getOpcode() == ISD::CONCAT_VECTORS && |
7711 | VT == N1.getOperand(i: 0).getValueType()) { |
7712 | unsigned Factor = VT.getVectorMinNumElements(); |
7713 | return N1.getOperand(i: N2C->getZExtValue() / Factor); |
7714 | } |
7715 | |
7716 | // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created |
7717 | // during shuffle legalization. |
7718 | if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(i: 2) && |
7719 | VT == N1.getOperand(i: 1).getValueType()) |
7720 | return N1.getOperand(i: 1); |
7721 | break; |
7722 | } |
7723 | } |
7724 | |
7725 | // Perform trivial constant folding. |
7726 | if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, Ops: {N1, N2}, Flags)) |
7727 | return SV; |
7728 | |
7729 | // Canonicalize an UNDEF to the RHS, even over a constant. |
7730 | if (N1.isUndef()) { |
7731 | if (TLI->isCommutativeBinOp(Opcode)) { |
7732 | std::swap(a&: N1, b&: N2); |
7733 | } else { |
7734 | switch (Opcode) { |
7735 | case ISD::PTRADD: |
7736 | case ISD::SUB: |
7737 | // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. |
7738 | return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); |
7739 | case ISD::SIGN_EXTEND_INREG: |
7740 | case ISD::UDIV: |
7741 | case ISD::SDIV: |
7742 | case ISD::UREM: |
7743 | case ISD::SREM: |
7744 | case ISD::SSUBSAT: |
7745 | case ISD::USUBSAT: |
7746 | // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. |
7747 | return N1.getOpcode() == ISD::POISON ? getPOISON(VT) |
7748 | : getConstant(Val: 0, DL, VT); |
7749 | } |
7750 | } |
7751 | } |
7752 | |
7753 | // Fold a bunch of operators when the RHS is undef. |
7754 | if (N2.isUndef()) { |
7755 | switch (Opcode) { |
7756 | case ISD::XOR: |
7757 | if (N1.isUndef()) |
7758 | // Handle undef ^ undef -> 0 special case. This is a common |
7759 | // idiom (misuse). |
7760 | return getConstant(Val: 0, DL, VT); |
7761 | [[fallthrough]]; |
7762 | case ISD::ADD: |
7763 | case ISD::PTRADD: |
7764 | case ISD::SUB: |
7765 | case ISD::UDIV: |
7766 | case ISD::SDIV: |
7767 | case ISD::UREM: |
7768 | case ISD::SREM: |
7769 | // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. |
7770 | return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); |
7771 | case ISD::MUL: |
7772 | case ISD::AND: |
7773 | case ISD::SSUBSAT: |
7774 | case ISD::USUBSAT: |
7775 | // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. |
7776 | return N2.getOpcode() == ISD::POISON ? getPOISON(VT) |
7777 | : getConstant(Val: 0, DL, VT); |
7778 | case ISD::OR: |
7779 | case ISD::SADDSAT: |
7780 | case ISD::UADDSAT: |
7781 | // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> |
7782 | // poison. |
7783 | return N2.getOpcode() == ISD::POISON ? getPOISON(VT) |
7784 | : getAllOnesConstant(DL, VT); |
7785 | } |
7786 | } |
7787 | |
7788 | // Memoize this node if possible. |
7789 | SDNode *N; |
7790 | SDVTList VTs = getVTList(VT); |
7791 | SDValue Ops[] = {N1, N2}; |
7792 | if (VT != MVT::Glue) { |
7793 | FoldingSetNodeID ID; |
7794 | AddNodeIDNode(ID, OpC: Opcode, VTList: VTs, OpList: Ops); |
7795 | void *IP = nullptr; |
7796 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
7797 | E->intersectFlagsWith(Flags); |
7798 | return SDValue(E, 0); |
7799 | } |
7800 | |
7801 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
7802 | N->setFlags(Flags); |
7803 | createOperands(Node: N, Vals: Ops); |
7804 | CSEMap.InsertNode(N, InsertPos: IP); |
7805 | } else { |
7806 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
7807 | createOperands(Node: N, Vals: Ops); |
7808 | } |
7809 | |
7810 | InsertNode(N); |
7811 | SDValue V = SDValue(N, 0); |
7812 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
7813 | return V; |
7814 | } |
7815 | |
7816 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
7817 | SDValue N1, SDValue N2, SDValue N3) { |
7818 | SDNodeFlags Flags; |
7819 | if (Inserter) |
7820 | Flags = Inserter->getFlags(); |
7821 | return getNode(Opcode, DL, VT, N1, N2, N3, Flags); |
7822 | } |
7823 | |
7824 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
7825 | SDValue N1, SDValue N2, SDValue N3, |
7826 | const SDNodeFlags Flags) { |
7827 | assert(N1.getOpcode() != ISD::DELETED_NODE && |
7828 | N2.getOpcode() != ISD::DELETED_NODE && |
7829 | N3.getOpcode() != ISD::DELETED_NODE && |
7830 | "Operand is DELETED_NODE!"); |
7831 | // Perform various simplifications. |
7832 | switch (Opcode) { |
7833 | case ISD::FMA: |
7834 | case ISD::FMAD: { |
7835 | assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); |
7836 | assert(N1.getValueType() == VT && N2.getValueType() == VT && |
7837 | N3.getValueType() == VT && "FMA types must match!"); |
7838 | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(Val&: N1); |
7839 | ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(Val&: N2); |
7840 | ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(Val&: N3); |
7841 | if (N1CFP && N2CFP && N3CFP) { |
7842 | APFloat V1 = N1CFP->getValueAPF(); |
7843 | const APFloat &V2 = N2CFP->getValueAPF(); |
7844 | const APFloat &V3 = N3CFP->getValueAPF(); |
7845 | if (Opcode == ISD::FMAD) { |
7846 | V1.multiply(RHS: V2, RM: APFloat::rmNearestTiesToEven); |
7847 | V1.add(RHS: V3, RM: APFloat::rmNearestTiesToEven); |
7848 | } else |
7849 | V1.fusedMultiplyAdd(Multiplicand: V2, Addend: V3, RM: APFloat::rmNearestTiesToEven); |
7850 | return getConstantFP(V: V1, DL, VT); |
7851 | } |
7852 | break; |
7853 | } |
7854 | case ISD::BUILD_VECTOR: { |
7855 | // Attempt to simplify BUILD_VECTOR. |
7856 | SDValue Ops[] = {N1, N2, N3}; |
7857 | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, DAG&: *this)) |
7858 | return V; |
7859 | break; |
7860 | } |
7861 | case ISD::CONCAT_VECTORS: { |
7862 | SDValue Ops[] = {N1, N2, N3}; |
7863 | if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, DAG&: *this)) |
7864 | return V; |
7865 | break; |
7866 | } |
7867 | case ISD::SETCC: { |
7868 | assert(VT.isInteger() && "SETCC result type must be an integer!"); |
7869 | assert(N1.getValueType() == N2.getValueType() && |
7870 | "SETCC operands must have the same type!"); |
7871 | assert(VT.isVector() == N1.getValueType().isVector() && |
7872 | "SETCC type should be vector iff the operand type is vector!"); |
7873 | assert((!VT.isVector() || VT.getVectorElementCount() == |
7874 | N1.getValueType().getVectorElementCount()) && |
7875 | "SETCC vector element counts must match!"); |
7876 | // Use FoldSetCC to simplify SETCC's. |
7877 | if (SDValue V = FoldSetCC(VT, N1, N2, Cond: cast<CondCodeSDNode>(Val&: N3)->get(), dl: DL)) |
7878 | return V; |
7879 | // Vector constant folding. |
7880 | SDValue Ops[] = {N1, N2, N3}; |
7881 | if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) { |
7882 | NewSDValueDbgMsg(V, Msg: "New node vector constant folding: ", G: this); |
7883 | return V; |
7884 | } |
7885 | break; |
7886 | } |
7887 | case ISD::SELECT: |
7888 | case ISD::VSELECT: |
7889 | if (SDValue V = simplifySelect(Cond: N1, TVal: N2, FVal: N3)) |
7890 | return V; |
7891 | break; |
7892 | case ISD::VECTOR_SHUFFLE: |
7893 | llvm_unreachable("should use getVectorShuffle constructor!"); |
7894 | case ISD::VECTOR_SPLICE: { |
7895 | if (cast<ConstantSDNode>(Val&: N3)->isZero()) |
7896 | return N1; |
7897 | break; |
7898 | } |
7899 | case ISD::INSERT_VECTOR_ELT: { |
7900 | assert(VT.isVector() && VT == N1.getValueType() && |
7901 | "INSERT_VECTOR_ELT vector type mismatch"); |
7902 | assert(VT.isFloatingPoint() == N2.getValueType().isFloatingPoint() && |
7903 | "INSERT_VECTOR_ELT scalar fp/int mismatch"); |
7904 | assert((!VT.isFloatingPoint() || |
7905 | VT.getVectorElementType() == N2.getValueType()) && |
7906 | "INSERT_VECTOR_ELT fp scalar type mismatch"); |
7907 | assert((!VT.isInteger() || |
7908 | VT.getScalarSizeInBits() <= N2.getScalarValueSizeInBits()) && |
7909 | "INSERT_VECTOR_ELT int scalar size mismatch"); |
7910 | |
7911 | auto *N3C = dyn_cast<ConstantSDNode>(Val&: N3); |
7912 | // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except |
7913 | // for scalable vectors where we will generate appropriate code to |
7914 | // deal with out-of-bounds cases correctly. |
7915 | if (N3C && N1.getValueType().isFixedLengthVector() && |
7916 | N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) |
7917 | return getUNDEF(VT); |
7918 | |
7919 | // Undefined index can be assumed out-of-bounds, so that's UNDEF too. |
7920 | if (N3.isUndef()) |
7921 | return getUNDEF(VT); |
7922 | |
7923 | // If the inserted element is an UNDEF, just use the input vector. |
7924 | if (N2.isUndef()) |
7925 | return N1; |
7926 | |
7927 | break; |
7928 | } |
7929 | case ISD::INSERT_SUBVECTOR: { |
7930 | // Inserting undef into undef is still undef. |
7931 | if (N1.isUndef() && N2.isUndef()) |
7932 | return getUNDEF(VT); |
7933 | |
7934 | EVT N2VT = N2.getValueType(); |
7935 | assert(VT == N1.getValueType() && |
7936 | "Dest and insert subvector source types must match!"); |
7937 | assert(VT.isVector() && N2VT.isVector() && |
7938 | "Insert subvector VTs must be vectors!"); |
7939 | assert(VT.getVectorElementType() == N2VT.getVectorElementType() && |
7940 | "Insert subvector VTs must have the same element type!"); |
7941 | assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && |
7942 | "Cannot insert a scalable vector into a fixed length vector!"); |
7943 | assert((VT.isScalableVector() != N2VT.isScalableVector() || |
7944 | VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) && |
7945 | "Insert subvector must be from smaller vector to larger vector!"); |
7946 | assert(isa<ConstantSDNode>(N3) && |
7947 | "Insert subvector index must be constant"); |
7948 | assert((VT.isScalableVector() != N2VT.isScalableVector() || |
7949 | (N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <= |
7950 | VT.getVectorMinNumElements()) && |
7951 | "Insert subvector overflow!"); |
7952 | assert(N3->getAsAPIntVal().getBitWidth() == |
7953 | TLI->getVectorIdxWidth(getDataLayout()) && |
7954 | "Constant index for INSERT_SUBVECTOR has an invalid size"); |
7955 | |
7956 | // Trivial insertion. |
7957 | if (VT == N2VT) |
7958 | return N2; |
7959 | |
7960 | // If this is an insert of an extracted vector into an undef vector, we |
7961 | // can just use the input to the extract. |
7962 | if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
7963 | N2.getOperand(i: 1) == N3 && N2.getOperand(i: 0).getValueType() == VT) |
7964 | return N2.getOperand(i: 0); |
7965 | break; |
7966 | } |
7967 | case ISD::BITCAST: |
7968 | // Fold bit_convert nodes from a type to themselves. |
7969 | if (N1.getValueType() == VT) |
7970 | return N1; |
7971 | break; |
7972 | case ISD::VP_TRUNCATE: |
7973 | case ISD::VP_SIGN_EXTEND: |
7974 | case ISD::VP_ZERO_EXTEND: |
7975 | // Don't create noop casts. |
7976 | if (N1.getValueType() == VT) |
7977 | return N1; |
7978 | break; |
7979 | case ISD::VECTOR_COMPRESS: { |
7980 | [[maybe_unused]] EVT VecVT = N1.getValueType(); |
7981 | [[maybe_unused]] EVT MaskVT = N2.getValueType(); |
7982 | [[maybe_unused]] EVT PassthruVT = N3.getValueType(); |
7983 | assert(VT == VecVT && "Vector and result type don't match."); |
7984 | assert(VecVT.isVector() && MaskVT.isVector() && PassthruVT.isVector() && |
7985 | "All inputs must be vectors."); |
7986 | assert(VecVT == PassthruVT && "Vector and passthru types don't match."); |
7987 | assert(VecVT.getVectorElementCount() == MaskVT.getVectorElementCount() && |
7988 | "Vector and mask must have same number of elements."); |
7989 | |
7990 | if (N1.isUndef() || N2.isUndef()) |
7991 | return N3; |
7992 | |
7993 | break; |
7994 | } |
7995 | case ISD::PARTIAL_REDUCE_UMLA: |
7996 | case ISD::PARTIAL_REDUCE_SMLA: |
7997 | case ISD::PARTIAL_REDUCE_SUMLA: { |
7998 | [[maybe_unused]] EVT AccVT = N1.getValueType(); |
7999 | [[maybe_unused]] EVT Input1VT = N2.getValueType(); |
8000 | [[maybe_unused]] EVT Input2VT = N3.getValueType(); |
8001 | assert(Input1VT.isVector() && Input1VT == Input2VT && |
8002 | "Expected the second and third operands of the PARTIAL_REDUCE_MLA " |
8003 | "node to have the same type!"); |
8004 | assert(VT.isVector() && VT == AccVT && |
8005 | "Expected the first operand of the PARTIAL_REDUCE_MLA node to have " |
8006 | "the same type as its result!"); |
8007 | assert(Input1VT.getVectorElementCount().hasKnownScalarFactor( |
8008 | AccVT.getVectorElementCount()) && |
8009 | "Expected the element count of the second and third operands of the " |
8010 | "PARTIAL_REDUCE_MLA node to be a positive integer multiple of the " |
8011 | "element count of the first operand and the result!"); |
8012 | assert(N2.getScalarValueSizeInBits() <= N1.getScalarValueSizeInBits() && |
8013 | "Expected the second and third operands of the PARTIAL_REDUCE_MLA " |
8014 | "node to have an element type which is the same as or smaller than " |
8015 | "the element type of the first operand and result!"); |
8016 | break; |
8017 | } |
8018 | } |
8019 | |
8020 | // Memoize node if it doesn't produce a glue result. |
8021 | SDNode *N; |
8022 | SDVTList VTs = getVTList(VT); |
8023 | SDValue Ops[] = {N1, N2, N3}; |
8024 | if (VT != MVT::Glue) { |
8025 | FoldingSetNodeID ID; |
8026 | AddNodeIDNode(ID, OpC: Opcode, VTList: VTs, OpList: Ops); |
8027 | void *IP = nullptr; |
8028 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
8029 | E->intersectFlagsWith(Flags); |
8030 | return SDValue(E, 0); |
8031 | } |
8032 | |
8033 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
8034 | N->setFlags(Flags); |
8035 | createOperands(Node: N, Vals: Ops); |
8036 | CSEMap.InsertNode(N, InsertPos: IP); |
8037 | } else { |
8038 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
8039 | createOperands(Node: N, Vals: Ops); |
8040 | } |
8041 | |
8042 | InsertNode(N); |
8043 | SDValue V = SDValue(N, 0); |
8044 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
8045 | return V; |
8046 | } |
8047 | |
8048 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
8049 | SDValue N1, SDValue N2, SDValue N3, SDValue N4, |
8050 | const SDNodeFlags Flags) { |
8051 | SDValue Ops[] = { N1, N2, N3, N4 }; |
8052 | return getNode(Opcode, DL, VT, Ops, Flags); |
8053 | } |
8054 | |
8055 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
8056 | SDValue N1, SDValue N2, SDValue N3, SDValue N4) { |
8057 | SDNodeFlags Flags; |
8058 | if (Inserter) |
8059 | Flags = Inserter->getFlags(); |
8060 | return getNode(Opcode, DL, VT, N1, N2, N3, N4, Flags); |
8061 | } |
8062 | |
8063 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
8064 | SDValue N1, SDValue N2, SDValue N3, SDValue N4, |
8065 | SDValue N5, const SDNodeFlags Flags) { |
8066 | SDValue Ops[] = { N1, N2, N3, N4, N5 }; |
8067 | return getNode(Opcode, DL, VT, Ops, Flags); |
8068 | } |
8069 | |
8070 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
8071 | SDValue N1, SDValue N2, SDValue N3, SDValue N4, |
8072 | SDValue N5) { |
8073 | SDNodeFlags Flags; |
8074 | if (Inserter) |
8075 | Flags = Inserter->getFlags(); |
8076 | return getNode(Opcode, DL, VT, N1, N2, N3, N4, N5, Flags); |
8077 | } |
8078 | |
8079 | /// getStackArgumentTokenFactor - Compute a TokenFactor to force all |
8080 | /// the incoming stack arguments to be loaded from the stack. |
8081 | SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { |
8082 | SmallVector<SDValue, 8> ArgChains; |
8083 | |
8084 | // Include the original chain at the beginning of the list. When this is |
8085 | // used by target LowerCall hooks, this helps legalize find the |
8086 | // CALLSEQ_BEGIN node. |
8087 | ArgChains.push_back(Elt: Chain); |
8088 | |
8089 | // Add a chain value for each stack argument. |
8090 | for (SDNode *U : getEntryNode().getNode()->users()) |
8091 | if (LoadSDNode *L = dyn_cast<LoadSDNode>(Val: U)) |
8092 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: L->getBasePtr())) |
8093 | if (FI->getIndex() < 0) |
8094 | ArgChains.push_back(Elt: SDValue(L, 1)); |
8095 | |
8096 | // Build a tokenfactor for all the chains. |
8097 | return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); |
8098 | } |
8099 | |
8100 | /// getMemsetValue - Vectorized representation of the memset value |
8101 | /// operand. |
8102 | static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, |
8103 | const SDLoc &dl) { |
8104 | assert(!Value.isUndef()); |
8105 | |
8106 | unsigned NumBits = VT.getScalarSizeInBits(); |
8107 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Value)) { |
8108 | assert(C->getAPIntValue().getBitWidth() == 8); |
8109 | APInt Val = APInt::getSplat(NewLen: NumBits, V: C->getAPIntValue()); |
8110 | if (VT.isInteger()) { |
8111 | bool IsOpaque = VT.getSizeInBits() > 64 || |
8112 | !DAG.getTargetLoweringInfo().isLegalStoreImmediate(Value: C->getSExtValue()); |
8113 | return DAG.getConstant(Val, DL: dl, VT, isT: false, isO: IsOpaque); |
8114 | } |
8115 | return DAG.getConstantFP(V: APFloat(VT.getFltSemantics(), Val), DL: dl, VT); |
8116 | } |
8117 | |
8118 | assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); |
8119 | EVT IntVT = VT.getScalarType(); |
8120 | if (!IntVT.isInteger()) |
8121 | IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: IntVT.getSizeInBits()); |
8122 | |
8123 | Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: IntVT, N1: Value); |
8124 | if (NumBits > 8) { |
8125 | // Use a multiplication with 0x010101... to extend the input to the |
8126 | // required length. |
8127 | APInt Magic = APInt::getSplat(NewLen: NumBits, V: APInt(8, 0x01)); |
8128 | Value = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntVT, N1: Value, |
8129 | N2: DAG.getConstant(Val: Magic, DL: dl, VT: IntVT)); |
8130 | } |
8131 | |
8132 | if (VT != Value.getValueType() && !VT.isInteger()) |
8133 | Value = DAG.getBitcast(VT: VT.getScalarType(), V: Value); |
8134 | if (VT != Value.getValueType()) |
8135 | Value = DAG.getSplatBuildVector(VT, DL: dl, Op: Value); |
8136 | |
8137 | return Value; |
8138 | } |
8139 | |
8140 | /// getMemsetStringVal - Similar to getMemsetValue. Except this is only |
8141 | /// used when a memcpy is turned into a memset when the source is a constant |
8142 | /// string ptr. |
8143 | static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, |
8144 | const TargetLowering &TLI, |
8145 | const ConstantDataArraySlice &Slice) { |
8146 | // Handle vector with all elements zero. |
8147 | if (Slice.Array == nullptr) { |
8148 | if (VT.isInteger()) |
8149 | return DAG.getConstant(Val: 0, DL: dl, VT); |
8150 | return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, |
8151 | N1: DAG.getConstant(Val: 0, DL: dl, VT: VT.changeTypeToInteger())); |
8152 | } |
8153 | |
8154 | assert(!VT.isVector() && "Can't handle vector type here!"); |
8155 | unsigned NumVTBits = VT.getSizeInBits(); |
8156 | unsigned NumVTBytes = NumVTBits / 8; |
8157 | unsigned NumBytes = std::min(a: NumVTBytes, b: unsigned(Slice.Length)); |
8158 | |
8159 | APInt Val(NumVTBits, 0); |
8160 | if (DAG.getDataLayout().isLittleEndian()) { |
8161 | for (unsigned i = 0; i != NumBytes; ++i) |
8162 | Val |= (uint64_t)(unsigned char)Slice[i] << i*8; |
8163 | } else { |
8164 | for (unsigned i = 0; i != NumBytes; ++i) |
8165 | Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; |
8166 | } |
8167 | |
8168 | // If the "cost" of materializing the integer immediate is less than the cost |
8169 | // of a load, then it is cost effective to turn the load into the immediate. |
8170 | Type *Ty = VT.getTypeForEVT(Context&: *DAG.getContext()); |
8171 | if (TLI.shouldConvertConstantLoadToIntImm(Imm: Val, Ty)) |
8172 | return DAG.getConstant(Val, DL: dl, VT); |
8173 | return SDValue(); |
8174 | } |
8175 | |
8176 | SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, |
8177 | const SDLoc &DL, |
8178 | const SDNodeFlags Flags) { |
8179 | EVT VT = Base.getValueType(); |
8180 | SDValue Index; |
8181 | |
8182 | if (Offset.isScalable()) |
8183 | Index = getVScale(DL, VT: Base.getValueType(), |
8184 | MulImm: APInt(Base.getValueSizeInBits().getFixedValue(), |
8185 | Offset.getKnownMinValue())); |
8186 | else |
8187 | Index = getConstant(Val: Offset.getFixedValue(), DL, VT); |
8188 | |
8189 | return getMemBasePlusOffset(Base, Offset: Index, DL, Flags); |
8190 | } |
8191 | |
8192 | SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, |
8193 | const SDLoc &DL, |
8194 | const SDNodeFlags Flags) { |
8195 | assert(Offset.getValueType().isInteger()); |
8196 | EVT BasePtrVT = Ptr.getValueType(); |
8197 | if (TLI->shouldPreservePtrArith(F: this->getMachineFunction().getFunction(), |
8198 | PtrVT: BasePtrVT)) |
8199 | return getNode(Opcode: ISD::PTRADD, DL, VT: BasePtrVT, N1: Ptr, N2: Offset, Flags); |
8200 | return getNode(Opcode: ISD::ADD, DL, VT: BasePtrVT, N1: Ptr, N2: Offset, Flags); |
8201 | } |
8202 | |
8203 | /// Returns true if memcpy source is constant data. |
8204 | static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { |
8205 | uint64_t SrcDelta = 0; |
8206 | GlobalAddressSDNode *G = nullptr; |
8207 | if (Src.getOpcode() == ISD::GlobalAddress) |
8208 | G = cast<GlobalAddressSDNode>(Val&: Src); |
8209 | else if (Src.getOpcode() == ISD::ADD && |
8210 | Src.getOperand(i: 0).getOpcode() == ISD::GlobalAddress && |
8211 | Src.getOperand(i: 1).getOpcode() == ISD::Constant) { |
8212 | G = cast<GlobalAddressSDNode>(Val: Src.getOperand(i: 0)); |
8213 | SrcDelta = Src.getConstantOperandVal(i: 1); |
8214 | } |
8215 | if (!G) |
8216 | return false; |
8217 | |
8218 | return getConstantDataArrayInfo(V: G->getGlobal(), Slice, ElementSize: 8, |
8219 | Offset: SrcDelta + G->getOffset()); |
8220 | } |
8221 | |
8222 | static bool shouldLowerMemFuncForSize(const MachineFunction &MF, |
8223 | SelectionDAG &DAG) { |
8224 | // On Darwin, -Os means optimize for size without hurting performance, so |
8225 | // only really optimize for size when -Oz (MinSize) is used. |
8226 | if (MF.getTarget().getTargetTriple().isOSDarwin()) |
8227 | return MF.getFunction().hasMinSize(); |
8228 | return DAG.shouldOptForSize(); |
8229 | } |
8230 | |
8231 | static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, |
8232 | SmallVector<SDValue, 32> &OutChains, unsigned From, |
8233 | unsigned To, SmallVector<SDValue, 16> &OutLoadChains, |
8234 | SmallVector<SDValue, 16> &OutStoreChains) { |
8235 | assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); |
8236 | assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); |
8237 | SmallVector<SDValue, 16> GluedLoadChains; |
8238 | for (unsigned i = From; i < To; ++i) { |
8239 | OutChains.push_back(Elt: OutLoadChains[i]); |
8240 | GluedLoadChains.push_back(Elt: OutLoadChains[i]); |
8241 | } |
8242 | |
8243 | // Chain for all loads. |
8244 | SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
8245 | GluedLoadChains); |
8246 | |
8247 | for (unsigned i = From; i < To; ++i) { |
8248 | StoreSDNode *ST = dyn_cast<StoreSDNode>(Val&: OutStoreChains[i]); |
8249 | SDValue NewStore = DAG.getTruncStore(Chain: LoadToken, dl, Val: ST->getValue(), |
8250 | Ptr: ST->getBasePtr(), SVT: ST->getMemoryVT(), |
8251 | MMO: ST->getMemOperand()); |
8252 | OutChains.push_back(Elt: NewStore); |
8253 | } |
8254 | } |
8255 | |
8256 | static SDValue getMemcpyLoadsAndStores( |
8257 | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
8258 | uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, |
8259 | MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, |
8260 | const AAMDNodes &AAInfo, BatchAAResults *BatchAA) { |
8261 | // Turn a memcpy of undef to nop. |
8262 | // FIXME: We need to honor volatile even is Src is undef. |
8263 | if (Src.isUndef()) |
8264 | return Chain; |
8265 | |
8266 | // Expand memcpy to a series of load and store ops if the size operand falls |
8267 | // below a certain threshold. |
8268 | // TODO: In the AlwaysInline case, if the size is big then generate a loop |
8269 | // rather than maybe a humongous number of loads and stores. |
8270 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8271 | const DataLayout &DL = DAG.getDataLayout(); |
8272 | LLVMContext &C = *DAG.getContext(); |
8273 | std::vector<EVT> MemOps; |
8274 | bool DstAlignCanChange = false; |
8275 | MachineFunction &MF = DAG.getMachineFunction(); |
8276 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
8277 | bool OptSize = shouldLowerMemFuncForSize(MF, DAG); |
8278 | FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Dst); |
8279 | if (FI && !MFI.isFixedObjectIndex(ObjectIdx: FI->getIndex())) |
8280 | DstAlignCanChange = true; |
8281 | MaybeAlign SrcAlign = DAG.InferPtrAlign(Ptr: Src); |
8282 | if (!SrcAlign || Alignment > *SrcAlign) |
8283 | SrcAlign = Alignment; |
8284 | assert(SrcAlign && "SrcAlign must be set"); |
8285 | ConstantDataArraySlice Slice; |
8286 | // If marked as volatile, perform a copy even when marked as constant. |
8287 | bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); |
8288 | bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; |
8289 | unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); |
8290 | const MemOp Op = isZeroConstant |
8291 | ? MemOp::Set(Size, DstAlignCanChange, DstAlign: Alignment, |
8292 | /*IsZeroMemset*/ true, IsVolatile: isVol) |
8293 | : MemOp::Copy(Size, DstAlignCanChange, DstAlign: Alignment, |
8294 | SrcAlign: *SrcAlign, IsVolatile: isVol, MemcpyStrSrc: CopyFromConstant); |
8295 | if (!TLI.findOptimalMemOpLowering( |
8296 | MemOps, Limit, Op, DstAS: DstPtrInfo.getAddrSpace(), |
8297 | SrcAS: SrcPtrInfo.getAddrSpace(), FuncAttributes: MF.getFunction().getAttributes())) |
8298 | return SDValue(); |
8299 | |
8300 | if (DstAlignCanChange) { |
8301 | Type *Ty = MemOps[0].getTypeForEVT(Context&: C); |
8302 | Align NewAlign = DL.getABITypeAlign(Ty); |
8303 | |
8304 | // Don't promote to an alignment that would require dynamic stack |
8305 | // realignment which may conflict with optimizations such as tail call |
8306 | // optimization. |
8307 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
8308 | if (!TRI->hasStackRealignment(MF)) |
8309 | if (MaybeAlign StackAlign = DL.getStackAlignment()) |
8310 | NewAlign = std::min(a: NewAlign, b: *StackAlign); |
8311 | |
8312 | if (NewAlign > Alignment) { |
8313 | // Give the stack frame object a larger alignment if needed. |
8314 | if (MFI.getObjectAlign(ObjectIdx: FI->getIndex()) < NewAlign) |
8315 | MFI.setObjectAlignment(ObjectIdx: FI->getIndex(), Alignment: NewAlign); |
8316 | Alignment = NewAlign; |
8317 | } |
8318 | } |
8319 | |
8320 | // Prepare AAInfo for loads/stores after lowering this memcpy. |
8321 | AAMDNodes NewAAInfo = AAInfo; |
8322 | NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; |
8323 | |
8324 | const Value *SrcVal = dyn_cast_if_present<const Value *>(Val&: SrcPtrInfo.V); |
8325 | bool isConstant = |
8326 | BatchAA && SrcVal && |
8327 | BatchAA->pointsToConstantMemory(Loc: MemoryLocation(SrcVal, Size, AAInfo)); |
8328 | |
8329 | MachineMemOperand::Flags MMOFlags = |
8330 | isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; |
8331 | SmallVector<SDValue, 16> OutLoadChains; |
8332 | SmallVector<SDValue, 16> OutStoreChains; |
8333 | SmallVector<SDValue, 32> OutChains; |
8334 | unsigned NumMemOps = MemOps.size(); |
8335 | uint64_t SrcOff = 0, DstOff = 0; |
8336 | for (unsigned i = 0; i != NumMemOps; ++i) { |
8337 | EVT VT = MemOps[i]; |
8338 | unsigned VTSize = VT.getSizeInBits() / 8; |
8339 | SDValue Value, Store; |
8340 | |
8341 | if (VTSize > Size) { |
8342 | // Issuing an unaligned load / store pair that overlaps with the previous |
8343 | // pair. Adjust the offset accordingly. |
8344 | assert(i == NumMemOps-1 && i != 0); |
8345 | SrcOff -= VTSize - Size; |
8346 | DstOff -= VTSize - Size; |
8347 | } |
8348 | |
8349 | if (CopyFromConstant && |
8350 | (isZeroConstant || (VT.isInteger() && !VT.isVector()))) { |
8351 | // It's unlikely a store of a vector immediate can be done in a single |
8352 | // instruction. It would require a load from a constantpool first. |
8353 | // We only handle zero vectors here. |
8354 | // FIXME: Handle other cases where store of vector immediate is done in |
8355 | // a single instruction. |
8356 | ConstantDataArraySlice SubSlice; |
8357 | if (SrcOff < Slice.Length) { |
8358 | SubSlice = Slice; |
8359 | SubSlice.move(Delta: SrcOff); |
8360 | } else { |
8361 | // This is an out-of-bounds access and hence UB. Pretend we read zero. |
8362 | SubSlice.Array = nullptr; |
8363 | SubSlice.Offset = 0; |
8364 | SubSlice.Length = VTSize; |
8365 | } |
8366 | Value = getMemsetStringVal(VT, dl, DAG, TLI, Slice: SubSlice); |
8367 | if (Value.getNode()) { |
8368 | Store = DAG.getStore( |
8369 | Chain, dl, Val: Value, |
8370 | Ptr: DAG.getMemBasePlusOffset(Base: Dst, Offset: TypeSize::getFixed(ExactSize: DstOff), DL: dl), |
8371 | PtrInfo: DstPtrInfo.getWithOffset(O: DstOff), Alignment, MMOFlags, AAInfo: NewAAInfo); |
8372 | OutChains.push_back(Elt: Store); |
8373 | } |
8374 | } |
8375 | |
8376 | if (!Store.getNode()) { |
8377 | // The type might not be legal for the target. This should only happen |
8378 | // if the type is smaller than a legal type, as on PPC, so the right |
8379 | // thing to do is generate a LoadExt/StoreTrunc pair. These simplify |
8380 | // to Load/Store if NVT==VT. |
8381 | // FIXME does the case above also need this? |
8382 | EVT NVT = TLI.getTypeToTransformTo(Context&: C, VT); |
8383 | assert(NVT.bitsGE(VT)); |
8384 | |
8385 | bool isDereferenceable = |
8386 | SrcPtrInfo.getWithOffset(O: SrcOff).isDereferenceable(Size: VTSize, C, DL); |
8387 | MachineMemOperand::Flags SrcMMOFlags = MMOFlags; |
8388 | if (isDereferenceable) |
8389 | SrcMMOFlags |= MachineMemOperand::MODereferenceable; |
8390 | if (isConstant) |
8391 | SrcMMOFlags |= MachineMemOperand::MOInvariant; |
8392 | |
8393 | Value = DAG.getExtLoad( |
8394 | ExtType: ISD::EXTLOAD, dl, VT: NVT, Chain, |
8395 | Ptr: DAG.getMemBasePlusOffset(Base: Src, Offset: TypeSize::getFixed(ExactSize: SrcOff), DL: dl), |
8396 | PtrInfo: SrcPtrInfo.getWithOffset(O: SrcOff), MemVT: VT, |
8397 | Alignment: commonAlignment(A: *SrcAlign, Offset: SrcOff), MMOFlags: SrcMMOFlags, AAInfo: NewAAInfo); |
8398 | OutLoadChains.push_back(Elt: Value.getValue(R: 1)); |
8399 | |
8400 | Store = DAG.getTruncStore( |
8401 | Chain, dl, Val: Value, |
8402 | Ptr: DAG.getMemBasePlusOffset(Base: Dst, Offset: TypeSize::getFixed(ExactSize: DstOff), DL: dl), |
8403 | PtrInfo: DstPtrInfo.getWithOffset(O: DstOff), SVT: VT, Alignment, MMOFlags, AAInfo: NewAAInfo); |
8404 | OutStoreChains.push_back(Elt: Store); |
8405 | } |
8406 | SrcOff += VTSize; |
8407 | DstOff += VTSize; |
8408 | Size -= VTSize; |
8409 | } |
8410 | |
8411 | unsigned GluedLdStLimit = MaxLdStGlue == 0 ? |
8412 | TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; |
8413 | unsigned NumLdStInMemcpy = OutStoreChains.size(); |
8414 | |
8415 | if (NumLdStInMemcpy) { |
8416 | // It may be that memcpy might be converted to memset if it's memcpy |
8417 | // of constants. In such a case, we won't have loads and stores, but |
8418 | // just stores. In the absence of loads, there is nothing to gang up. |
8419 | if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { |
8420 | // If target does not care, just leave as it. |
8421 | for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { |
8422 | OutChains.push_back(Elt: OutLoadChains[i]); |
8423 | OutChains.push_back(Elt: OutStoreChains[i]); |
8424 | } |
8425 | } else { |
8426 | // Ld/St less than/equal limit set by target. |
8427 | if (NumLdStInMemcpy <= GluedLdStLimit) { |
8428 | chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, From: 0, |
8429 | To: NumLdStInMemcpy, OutLoadChains, |
8430 | OutStoreChains); |
8431 | } else { |
8432 | unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; |
8433 | unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; |
8434 | unsigned GlueIter = 0; |
8435 | |
8436 | for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { |
8437 | unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; |
8438 | unsigned IndexTo = NumLdStInMemcpy - GlueIter; |
8439 | |
8440 | chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, From: IndexFrom, To: IndexTo, |
8441 | OutLoadChains, OutStoreChains); |
8442 | GlueIter += GluedLdStLimit; |
8443 | } |
8444 | |
8445 | // Residual ld/st. |
8446 | if (RemainingLdStInMemcpy) { |
8447 | chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, From: 0, |
8448 | To: RemainingLdStInMemcpy, OutLoadChains, |
8449 | OutStoreChains); |
8450 | } |
8451 | } |
8452 | } |
8453 | } |
8454 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
8455 | } |
8456 | |
8457 | static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, |
8458 | SDValue Chain, SDValue Dst, SDValue Src, |
8459 | uint64_t Size, Align Alignment, |
8460 | bool isVol, bool AlwaysInline, |
8461 | MachinePointerInfo DstPtrInfo, |
8462 | MachinePointerInfo SrcPtrInfo, |
8463 | const AAMDNodes &AAInfo) { |
8464 | // Turn a memmove of undef to nop. |
8465 | // FIXME: We need to honor volatile even is Src is undef. |
8466 | if (Src.isUndef()) |
8467 | return Chain; |
8468 | |
8469 | // Expand memmove to a series of load and store ops if the size operand falls |
8470 | // below a certain threshold. |
8471 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8472 | const DataLayout &DL = DAG.getDataLayout(); |
8473 | LLVMContext &C = *DAG.getContext(); |
8474 | std::vector<EVT> MemOps; |
8475 | bool DstAlignCanChange = false; |
8476 | MachineFunction &MF = DAG.getMachineFunction(); |
8477 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
8478 | bool OptSize = shouldLowerMemFuncForSize(MF, DAG); |
8479 | FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Dst); |
8480 | if (FI && !MFI.isFixedObjectIndex(ObjectIdx: FI->getIndex())) |
8481 | DstAlignCanChange = true; |
8482 | MaybeAlign SrcAlign = DAG.InferPtrAlign(Ptr: Src); |
8483 | if (!SrcAlign || Alignment > *SrcAlign) |
8484 | SrcAlign = Alignment; |
8485 | assert(SrcAlign && "SrcAlign must be set"); |
8486 | unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); |
8487 | if (!TLI.findOptimalMemOpLowering( |
8488 | MemOps, Limit, |
8489 | Op: MemOp::Copy(Size, DstAlignCanChange, DstAlign: Alignment, SrcAlign: *SrcAlign, |
8490 | /*IsVolatile*/ true), |
8491 | DstAS: DstPtrInfo.getAddrSpace(), SrcAS: SrcPtrInfo.getAddrSpace(), |
8492 | FuncAttributes: MF.getFunction().getAttributes())) |
8493 | return SDValue(); |
8494 | |
8495 | if (DstAlignCanChange) { |
8496 | Type *Ty = MemOps[0].getTypeForEVT(Context&: C); |
8497 | Align NewAlign = DL.getABITypeAlign(Ty); |
8498 | |
8499 | // Don't promote to an alignment that would require dynamic stack |
8500 | // realignment which may conflict with optimizations such as tail call |
8501 | // optimization. |
8502 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
8503 | if (!TRI->hasStackRealignment(MF)) |
8504 | if (MaybeAlign StackAlign = DL.getStackAlignment()) |
8505 | NewAlign = std::min(a: NewAlign, b: *StackAlign); |
8506 | |
8507 | if (NewAlign > Alignment) { |
8508 | // Give the stack frame object a larger alignment if needed. |
8509 | if (MFI.getObjectAlign(ObjectIdx: FI->getIndex()) < NewAlign) |
8510 | MFI.setObjectAlignment(ObjectIdx: FI->getIndex(), Alignment: NewAlign); |
8511 | Alignment = NewAlign; |
8512 | } |
8513 | } |
8514 | |
8515 | // Prepare AAInfo for loads/stores after lowering this memmove. |
8516 | AAMDNodes NewAAInfo = AAInfo; |
8517 | NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; |
8518 | |
8519 | MachineMemOperand::Flags MMOFlags = |
8520 | isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; |
8521 | uint64_t SrcOff = 0, DstOff = 0; |
8522 | SmallVector<SDValue, 8> LoadValues; |
8523 | SmallVector<SDValue, 8> LoadChains; |
8524 | SmallVector<SDValue, 8> OutChains; |
8525 | unsigned NumMemOps = MemOps.size(); |
8526 | for (unsigned i = 0; i < NumMemOps; i++) { |
8527 | EVT VT = MemOps[i]; |
8528 | unsigned VTSize = VT.getSizeInBits() / 8; |
8529 | SDValue Value; |
8530 | |
8531 | bool isDereferenceable = |
8532 | SrcPtrInfo.getWithOffset(O: SrcOff).isDereferenceable(Size: VTSize, C, DL); |
8533 | MachineMemOperand::Flags SrcMMOFlags = MMOFlags; |
8534 | if (isDereferenceable) |
8535 | SrcMMOFlags |= MachineMemOperand::MODereferenceable; |
8536 | |
8537 | Value = DAG.getLoad( |
8538 | VT, dl, Chain, |
8539 | Ptr: DAG.getMemBasePlusOffset(Base: Src, Offset: TypeSize::getFixed(ExactSize: SrcOff), DL: dl), |
8540 | PtrInfo: SrcPtrInfo.getWithOffset(O: SrcOff), Alignment: *SrcAlign, MMOFlags: SrcMMOFlags, AAInfo: NewAAInfo); |
8541 | LoadValues.push_back(Elt: Value); |
8542 | LoadChains.push_back(Elt: Value.getValue(R: 1)); |
8543 | SrcOff += VTSize; |
8544 | } |
8545 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); |
8546 | OutChains.clear(); |
8547 | for (unsigned i = 0; i < NumMemOps; i++) { |
8548 | EVT VT = MemOps[i]; |
8549 | unsigned VTSize = VT.getSizeInBits() / 8; |
8550 | SDValue Store; |
8551 | |
8552 | Store = DAG.getStore( |
8553 | Chain, dl, Val: LoadValues[i], |
8554 | Ptr: DAG.getMemBasePlusOffset(Base: Dst, Offset: TypeSize::getFixed(ExactSize: DstOff), DL: dl), |
8555 | PtrInfo: DstPtrInfo.getWithOffset(O: DstOff), Alignment, MMOFlags, AAInfo: NewAAInfo); |
8556 | OutChains.push_back(Elt: Store); |
8557 | DstOff += VTSize; |
8558 | } |
8559 | |
8560 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
8561 | } |
8562 | |
8563 | /// Lower the call to 'memset' intrinsic function into a series of store |
8564 | /// operations. |
8565 | /// |
8566 | /// \param DAG Selection DAG where lowered code is placed. |
8567 | /// \param dl Link to corresponding IR location. |
8568 | /// \param Chain Control flow dependency. |
8569 | /// \param Dst Pointer to destination memory location. |
8570 | /// \param Src Value of byte to write into the memory. |
8571 | /// \param Size Number of bytes to write. |
8572 | /// \param Alignment Alignment of the destination in bytes. |
8573 | /// \param isVol True if destination is volatile. |
8574 | /// \param AlwaysInline Makes sure no function call is generated. |
8575 | /// \param DstPtrInfo IR information on the memory pointer. |
8576 | /// \returns New head in the control flow, if lowering was successful, empty |
8577 | /// SDValue otherwise. |
8578 | /// |
8579 | /// The function tries to replace 'llvm.memset' intrinsic with several store |
8580 | /// operations and value calculation code. This is usually profitable for small |
8581 | /// memory size or when the semantic requires inlining. |
8582 | static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, |
8583 | SDValue Chain, SDValue Dst, SDValue Src, |
8584 | uint64_t Size, Align Alignment, bool isVol, |
8585 | bool AlwaysInline, MachinePointerInfo DstPtrInfo, |
8586 | const AAMDNodes &AAInfo) { |
8587 | // Turn a memset of undef to nop. |
8588 | // FIXME: We need to honor volatile even is Src is undef. |
8589 | if (Src.isUndef()) |
8590 | return Chain; |
8591 | |
8592 | // Expand memset to a series of load/store ops if the size operand |
8593 | // falls below a certain threshold. |
8594 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8595 | std::vector<EVT> MemOps; |
8596 | bool DstAlignCanChange = false; |
8597 | MachineFunction &MF = DAG.getMachineFunction(); |
8598 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
8599 | bool OptSize = shouldLowerMemFuncForSize(MF, DAG); |
8600 | FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Dst); |
8601 | if (FI && !MFI.isFixedObjectIndex(ObjectIdx: FI->getIndex())) |
8602 | DstAlignCanChange = true; |
8603 | bool IsZeroVal = isNullConstant(V: Src); |
8604 | unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); |
8605 | |
8606 | if (!TLI.findOptimalMemOpLowering( |
8607 | MemOps, Limit, |
8608 | Op: MemOp::Set(Size, DstAlignCanChange, DstAlign: Alignment, IsZeroMemset: IsZeroVal, IsVolatile: isVol), |
8609 | DstAS: DstPtrInfo.getAddrSpace(), SrcAS: ~0u, FuncAttributes: MF.getFunction().getAttributes())) |
8610 | return SDValue(); |
8611 | |
8612 | if (DstAlignCanChange) { |
8613 | Type *Ty = MemOps[0].getTypeForEVT(Context&: *DAG.getContext()); |
8614 | const DataLayout &DL = DAG.getDataLayout(); |
8615 | Align NewAlign = DL.getABITypeAlign(Ty); |
8616 | |
8617 | // Don't promote to an alignment that would require dynamic stack |
8618 | // realignment which may conflict with optimizations such as tail call |
8619 | // optimization. |
8620 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
8621 | if (!TRI->hasStackRealignment(MF)) |
8622 | if (MaybeAlign StackAlign = DL.getStackAlignment()) |
8623 | NewAlign = std::min(a: NewAlign, b: *StackAlign); |
8624 | |
8625 | if (NewAlign > Alignment) { |
8626 | // Give the stack frame object a larger alignment if needed. |
8627 | if (MFI.getObjectAlign(ObjectIdx: FI->getIndex()) < NewAlign) |
8628 | MFI.setObjectAlignment(ObjectIdx: FI->getIndex(), Alignment: NewAlign); |
8629 | Alignment = NewAlign; |
8630 | } |
8631 | } |
8632 | |
8633 | SmallVector<SDValue, 8> OutChains; |
8634 | uint64_t DstOff = 0; |
8635 | unsigned NumMemOps = MemOps.size(); |
8636 | |
8637 | // Find the largest store and generate the bit pattern for it. |
8638 | EVT LargestVT = MemOps[0]; |
8639 | for (unsigned i = 1; i < NumMemOps; i++) |
8640 | if (MemOps[i].bitsGT(VT: LargestVT)) |
8641 | LargestVT = MemOps[i]; |
8642 | SDValue MemSetValue = getMemsetValue(Value: Src, VT: LargestVT, DAG, dl); |
8643 | |
8644 | // Prepare AAInfo for loads/stores after lowering this memset. |
8645 | AAMDNodes NewAAInfo = AAInfo; |
8646 | NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; |
8647 | |
8648 | for (unsigned i = 0; i < NumMemOps; i++) { |
8649 | EVT VT = MemOps[i]; |
8650 | unsigned VTSize = VT.getSizeInBits() / 8; |
8651 | if (VTSize > Size) { |
8652 | // Issuing an unaligned load / store pair that overlaps with the previous |
8653 | // pair. Adjust the offset accordingly. |
8654 | assert(i == NumMemOps-1 && i != 0); |
8655 | DstOff -= VTSize - Size; |
8656 | } |
8657 | |
8658 | // If this store is smaller than the largest store see whether we can get |
8659 | // the smaller value for free with a truncate or extract vector element and |
8660 | // then store. |
8661 | SDValue Value = MemSetValue; |
8662 | if (VT.bitsLT(VT: LargestVT)) { |
8663 | unsigned Index; |
8664 | unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits(); |
8665 | EVT SVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getScalarType(), NumElements: NElts); |
8666 | if (!LargestVT.isVector() && !VT.isVector() && |
8667 | TLI.isTruncateFree(FromVT: LargestVT, ToVT: VT)) |
8668 | Value = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, N1: MemSetValue); |
8669 | else if (LargestVT.isVector() && !VT.isVector() && |
8670 | TLI.shallExtractConstSplatVectorElementToStore( |
8671 | VectorTy: LargestVT.getTypeForEVT(Context&: *DAG.getContext()), |
8672 | ElemSizeInBits: VT.getSizeInBits(), Index) && |
8673 | TLI.isTypeLegal(VT: SVT) && |
8674 | LargestVT.getSizeInBits() == SVT.getSizeInBits()) { |
8675 | // Target which can combine store(extractelement VectorTy, Idx) can get |
8676 | // the smaller value for free. |
8677 | SDValue TailValue = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: SVT, N1: MemSetValue); |
8678 | Value = DAG.getExtractVectorElt(DL: dl, VT, Vec: TailValue, Idx: Index); |
8679 | } else |
8680 | Value = getMemsetValue(Value: Src, VT, DAG, dl); |
8681 | } |
8682 | assert(Value.getValueType() == VT && "Value with wrong type."); |
8683 | SDValue Store = DAG.getStore( |
8684 | Chain, dl, Val: Value, |
8685 | Ptr: DAG.getMemBasePlusOffset(Base: Dst, Offset: TypeSize::getFixed(ExactSize: DstOff), DL: dl), |
8686 | PtrInfo: DstPtrInfo.getWithOffset(O: DstOff), Alignment, |
8687 | MMOFlags: isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, |
8688 | AAInfo: NewAAInfo); |
8689 | OutChains.push_back(Elt: Store); |
8690 | DstOff += VT.getSizeInBits() / 8; |
8691 | Size -= VTSize; |
8692 | } |
8693 | |
8694 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
8695 | } |
8696 | |
8697 | static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, |
8698 | unsigned AS) { |
8699 | // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all |
8700 | // pointer operands can be losslessly bitcasted to pointers of address space 0 |
8701 | if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(SrcAS: AS, DestAS: 0)) { |
8702 | report_fatal_error(reason: "cannot lower memory intrinsic in address space "+ |
8703 | Twine(AS)); |
8704 | } |
8705 | } |
8706 | |
8707 | SDValue SelectionDAG::getMemcpy( |
8708 | SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, |
8709 | Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, |
8710 | std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo, |
8711 | MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, |
8712 | BatchAAResults *BatchAA) { |
8713 | // Check to see if we should lower the memcpy to loads and stores first. |
8714 | // For cases within the target-specified limits, this is the best choice. |
8715 | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size); |
8716 | if (ConstantSize) { |
8717 | // Memcpy with size zero? Just return the original chain. |
8718 | if (ConstantSize->isZero()) |
8719 | return Chain; |
8720 | |
8721 | SDValue Result = getMemcpyLoadsAndStores( |
8722 | DAG&: *this, dl, Chain, Dst, Src, Size: ConstantSize->getZExtValue(), Alignment, |
8723 | isVol, AlwaysInline: false, DstPtrInfo, SrcPtrInfo, AAInfo, BatchAA); |
8724 | if (Result.getNode()) |
8725 | return Result; |
8726 | } |
8727 | |
8728 | // Then check to see if we should lower the memcpy with target-specific |
8729 | // code. If the target chooses to do this, this is the next best. |
8730 | if (TSI) { |
8731 | SDValue Result = TSI->EmitTargetCodeForMemcpy( |
8732 | DAG&: *this, dl, Chain, Op1: Dst, Op2: Src, Op3: Size, Alignment, isVolatile: isVol, AlwaysInline, |
8733 | DstPtrInfo, SrcPtrInfo); |
8734 | if (Result.getNode()) |
8735 | return Result; |
8736 | } |
8737 | |
8738 | // If we really need inline code and the target declined to provide it, |
8739 | // use a (potentially long) sequence of loads and stores. |
8740 | if (AlwaysInline) { |
8741 | assert(ConstantSize && "AlwaysInline requires a constant size!"); |
8742 | return getMemcpyLoadsAndStores( |
8743 | DAG&: *this, dl, Chain, Dst, Src, Size: ConstantSize->getZExtValue(), Alignment, |
8744 | isVol, AlwaysInline: true, DstPtrInfo, SrcPtrInfo, AAInfo, BatchAA); |
8745 | } |
8746 | |
8747 | checkAddrSpaceIsValidForLibcall(TLI, AS: DstPtrInfo.getAddrSpace()); |
8748 | checkAddrSpaceIsValidForLibcall(TLI, AS: SrcPtrInfo.getAddrSpace()); |
8749 | |
8750 | // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc |
8751 | // memcpy is not guaranteed to be safe. libc memcpys aren't required to |
8752 | // respect volatile, so they may do things like read or write memory |
8753 | // beyond the given memory regions. But fixing this isn't easy, and most |
8754 | // people don't care. |
8755 | |
8756 | // Emit a library call. |
8757 | TargetLowering::ArgListTy Args; |
8758 | TargetLowering::ArgListEntry Entry; |
8759 | Entry.Ty = PointerType::getUnqual(C&: *getContext()); |
8760 | Entry.Node = Dst; Args.push_back(x: Entry); |
8761 | Entry.Node = Src; Args.push_back(x: Entry); |
8762 | |
8763 | Entry.Ty = getDataLayout().getIntPtrType(C&: *getContext()); |
8764 | Entry.Node = Size; Args.push_back(x: Entry); |
8765 | // FIXME: pass in SDLoc |
8766 | TargetLowering::CallLoweringInfo CLI(*this); |
8767 | bool IsTailCall = false; |
8768 | if (OverrideTailCall.has_value()) { |
8769 | IsTailCall = *OverrideTailCall; |
8770 | } else { |
8771 | bool LowersToMemcpy = |
8772 | TLI->getLibcallName(Call: RTLIB::MEMCPY) == StringRef("memcpy"); |
8773 | bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(CI: *CI); |
8774 | IsTailCall = CI && CI->isTailCall() && |
8775 | isInTailCallPosition(Call: *CI, TM: getTarget(), |
8776 | ReturnsFirstArg: ReturnsFirstArg && LowersToMemcpy); |
8777 | } |
8778 | |
8779 | CLI.setDebugLoc(dl) |
8780 | .setChain(Chain) |
8781 | .setLibCallee(CC: TLI->getLibcallCallingConv(Call: RTLIB::MEMCPY), |
8782 | ResultType: Dst.getValueType().getTypeForEVT(Context&: *getContext()), |
8783 | Target: getExternalSymbol(Sym: TLI->getLibcallName(Call: RTLIB::MEMCPY), |
8784 | VT: TLI->getPointerTy(DL: getDataLayout())), |
8785 | ArgsList: std::move(Args)) |
8786 | .setDiscardResult() |
8787 | .setTailCall(IsTailCall); |
8788 | |
8789 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
8790 | return CallResult.second; |
8791 | } |
8792 | |
8793 | SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, |
8794 | SDValue Dst, SDValue Src, SDValue Size, |
8795 | Type *SizeTy, unsigned ElemSz, |
8796 | bool isTailCall, |
8797 | MachinePointerInfo DstPtrInfo, |
8798 | MachinePointerInfo SrcPtrInfo) { |
8799 | // Emit a library call. |
8800 | TargetLowering::ArgListTy Args; |
8801 | TargetLowering::ArgListEntry Entry; |
8802 | Entry.Ty = getDataLayout().getIntPtrType(C&: *getContext()); |
8803 | Entry.Node = Dst; |
8804 | Args.push_back(x: Entry); |
8805 | |
8806 | Entry.Node = Src; |
8807 | Args.push_back(x: Entry); |
8808 | |
8809 | Entry.Ty = SizeTy; |
8810 | Entry.Node = Size; |
8811 | Args.push_back(x: Entry); |
8812 | |
8813 | RTLIB::Libcall LibraryCall = |
8814 | RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSize: ElemSz); |
8815 | if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) |
8816 | report_fatal_error(reason: "Unsupported element size"); |
8817 | |
8818 | TargetLowering::CallLoweringInfo CLI(*this); |
8819 | CLI.setDebugLoc(dl) |
8820 | .setChain(Chain) |
8821 | .setLibCallee(CC: TLI->getLibcallCallingConv(Call: LibraryCall), |
8822 | ResultType: Type::getVoidTy(C&: *getContext()), |
8823 | Target: getExternalSymbol(Sym: TLI->getLibcallName(Call: LibraryCall), |
8824 | VT: TLI->getPointerTy(DL: getDataLayout())), |
8825 | ArgsList: std::move(Args)) |
8826 | .setDiscardResult() |
8827 | .setTailCall(isTailCall); |
8828 | |
8829 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
8830 | return CallResult.second; |
8831 | } |
8832 | |
8833 | SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, |
8834 | SDValue Src, SDValue Size, Align Alignment, |
8835 | bool isVol, const CallInst *CI, |
8836 | std::optional<bool> OverrideTailCall, |
8837 | MachinePointerInfo DstPtrInfo, |
8838 | MachinePointerInfo SrcPtrInfo, |
8839 | const AAMDNodes &AAInfo, |
8840 | BatchAAResults *BatchAA) { |
8841 | // Check to see if we should lower the memmove to loads and stores first. |
8842 | // For cases within the target-specified limits, this is the best choice. |
8843 | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size); |
8844 | if (ConstantSize) { |
8845 | // Memmove with size zero? Just return the original chain. |
8846 | if (ConstantSize->isZero()) |
8847 | return Chain; |
8848 | |
8849 | SDValue Result = getMemmoveLoadsAndStores( |
8850 | DAG&: *this, dl, Chain, Dst, Src, Size: ConstantSize->getZExtValue(), Alignment, |
8851 | isVol, AlwaysInline: false, DstPtrInfo, SrcPtrInfo, AAInfo); |
8852 | if (Result.getNode()) |
8853 | return Result; |
8854 | } |
8855 | |
8856 | // Then check to see if we should lower the memmove with target-specific |
8857 | // code. If the target chooses to do this, this is the next best. |
8858 | if (TSI) { |
8859 | SDValue Result = |
8860 | TSI->EmitTargetCodeForMemmove(DAG&: *this, dl, Chain, Op1: Dst, Op2: Src, Op3: Size, |
8861 | Alignment, isVolatile: isVol, DstPtrInfo, SrcPtrInfo); |
8862 | if (Result.getNode()) |
8863 | return Result; |
8864 | } |
8865 | |
8866 | checkAddrSpaceIsValidForLibcall(TLI, AS: DstPtrInfo.getAddrSpace()); |
8867 | checkAddrSpaceIsValidForLibcall(TLI, AS: SrcPtrInfo.getAddrSpace()); |
8868 | |
8869 | // FIXME: If the memmove is volatile, lowering it to plain libc memmove may |
8870 | // not be safe. See memcpy above for more details. |
8871 | |
8872 | // Emit a library call. |
8873 | TargetLowering::ArgListTy Args; |
8874 | TargetLowering::ArgListEntry Entry; |
8875 | Entry.Ty = PointerType::getUnqual(C&: *getContext()); |
8876 | Entry.Node = Dst; Args.push_back(x: Entry); |
8877 | Entry.Node = Src; Args.push_back(x: Entry); |
8878 | |
8879 | Entry.Ty = getDataLayout().getIntPtrType(C&: *getContext()); |
8880 | Entry.Node = Size; Args.push_back(x: Entry); |
8881 | // FIXME: pass in SDLoc |
8882 | TargetLowering::CallLoweringInfo CLI(*this); |
8883 | |
8884 | bool IsTailCall = false; |
8885 | if (OverrideTailCall.has_value()) { |
8886 | IsTailCall = *OverrideTailCall; |
8887 | } else { |
8888 | bool LowersToMemmove = |
8889 | TLI->getLibcallName(Call: RTLIB::MEMMOVE) == StringRef("memmove"); |
8890 | bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(CI: *CI); |
8891 | IsTailCall = CI && CI->isTailCall() && |
8892 | isInTailCallPosition(Call: *CI, TM: getTarget(), |
8893 | ReturnsFirstArg: ReturnsFirstArg && LowersToMemmove); |
8894 | } |
8895 | |
8896 | CLI.setDebugLoc(dl) |
8897 | .setChain(Chain) |
8898 | .setLibCallee(CC: TLI->getLibcallCallingConv(Call: RTLIB::MEMMOVE), |
8899 | ResultType: Dst.getValueType().getTypeForEVT(Context&: *getContext()), |
8900 | Target: getExternalSymbol(Sym: TLI->getLibcallName(Call: RTLIB::MEMMOVE), |
8901 | VT: TLI->getPointerTy(DL: getDataLayout())), |
8902 | ArgsList: std::move(Args)) |
8903 | .setDiscardResult() |
8904 | .setTailCall(IsTailCall); |
8905 | |
8906 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
8907 | return CallResult.second; |
8908 | } |
8909 | |
8910 | SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, |
8911 | SDValue Dst, SDValue Src, SDValue Size, |
8912 | Type *SizeTy, unsigned ElemSz, |
8913 | bool isTailCall, |
8914 | MachinePointerInfo DstPtrInfo, |
8915 | MachinePointerInfo SrcPtrInfo) { |
8916 | // Emit a library call. |
8917 | TargetLowering::ArgListTy Args; |
8918 | TargetLowering::ArgListEntry Entry; |
8919 | Entry.Ty = getDataLayout().getIntPtrType(C&: *getContext()); |
8920 | Entry.Node = Dst; |
8921 | Args.push_back(x: Entry); |
8922 | |
8923 | Entry.Node = Src; |
8924 | Args.push_back(x: Entry); |
8925 | |
8926 | Entry.Ty = SizeTy; |
8927 | Entry.Node = Size; |
8928 | Args.push_back(x: Entry); |
8929 | |
8930 | RTLIB::Libcall LibraryCall = |
8931 | RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSize: ElemSz); |
8932 | if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) |
8933 | report_fatal_error(reason: "Unsupported element size"); |
8934 | |
8935 | TargetLowering::CallLoweringInfo CLI(*this); |
8936 | CLI.setDebugLoc(dl) |
8937 | .setChain(Chain) |
8938 | .setLibCallee(CC: TLI->getLibcallCallingConv(Call: LibraryCall), |
8939 | ResultType: Type::getVoidTy(C&: *getContext()), |
8940 | Target: getExternalSymbol(Sym: TLI->getLibcallName(Call: LibraryCall), |
8941 | VT: TLI->getPointerTy(DL: getDataLayout())), |
8942 | ArgsList: std::move(Args)) |
8943 | .setDiscardResult() |
8944 | .setTailCall(isTailCall); |
8945 | |
8946 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
8947 | return CallResult.second; |
8948 | } |
8949 | |
8950 | SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, |
8951 | SDValue Src, SDValue Size, Align Alignment, |
8952 | bool isVol, bool AlwaysInline, |
8953 | const CallInst *CI, |
8954 | MachinePointerInfo DstPtrInfo, |
8955 | const AAMDNodes &AAInfo) { |
8956 | // Check to see if we should lower the memset to stores first. |
8957 | // For cases within the target-specified limits, this is the best choice. |
8958 | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size); |
8959 | if (ConstantSize) { |
8960 | // Memset with size zero? Just return the original chain. |
8961 | if (ConstantSize->isZero()) |
8962 | return Chain; |
8963 | |
8964 | SDValue Result = getMemsetStores(DAG&: *this, dl, Chain, Dst, Src, |
8965 | Size: ConstantSize->getZExtValue(), Alignment, |
8966 | isVol, AlwaysInline: false, DstPtrInfo, AAInfo); |
8967 | |
8968 | if (Result.getNode()) |
8969 | return Result; |
8970 | } |
8971 | |
8972 | // Then check to see if we should lower the memset with target-specific |
8973 | // code. If the target chooses to do this, this is the next best. |
8974 | if (TSI) { |
8975 | SDValue Result = TSI->EmitTargetCodeForMemset( |
8976 | DAG&: *this, dl, Chain, Op1: Dst, Op2: Src, Op3: Size, Alignment, isVolatile: isVol, AlwaysInline, DstPtrInfo); |
8977 | if (Result.getNode()) |
8978 | return Result; |
8979 | } |
8980 | |
8981 | // If we really need inline code and the target declined to provide it, |
8982 | // use a (potentially long) sequence of loads and stores. |
8983 | if (AlwaysInline) { |
8984 | assert(ConstantSize && "AlwaysInline requires a constant size!"); |
8985 | SDValue Result = getMemsetStores(DAG&: *this, dl, Chain, Dst, Src, |
8986 | Size: ConstantSize->getZExtValue(), Alignment, |
8987 | isVol, AlwaysInline: true, DstPtrInfo, AAInfo); |
8988 | assert(Result && |
8989 | "getMemsetStores must return a valid sequence when AlwaysInline"); |
8990 | return Result; |
8991 | } |
8992 | |
8993 | checkAddrSpaceIsValidForLibcall(TLI, AS: DstPtrInfo.getAddrSpace()); |
8994 | |
8995 | // Emit a library call. |
8996 | auto &Ctx = *getContext(); |
8997 | const auto& DL = getDataLayout(); |
8998 | |
8999 | TargetLowering::CallLoweringInfo CLI(*this); |
9000 | // FIXME: pass in SDLoc |
9001 | CLI.setDebugLoc(dl).setChain(Chain); |
9002 | |
9003 | const char *BzeroName = getTargetLoweringInfo().getLibcallName(Call: RTLIB::BZERO); |
9004 | |
9005 | // Helper function to create an Entry from Node and Type. |
9006 | const auto CreateEntry = [](SDValue Node, Type *Ty) { |
9007 | TargetLowering::ArgListEntry Entry; |
9008 | Entry.Node = Node; |
9009 | Entry.Ty = Ty; |
9010 | return Entry; |
9011 | }; |
9012 | |
9013 | bool UseBZero = isNullConstant(V: Src) && BzeroName; |
9014 | // If zeroing out and bzero is present, use it. |
9015 | if (UseBZero) { |
9016 | TargetLowering::ArgListTy Args; |
9017 | Args.push_back(x: CreateEntry(Dst, PointerType::getUnqual(C&: Ctx))); |
9018 | Args.push_back(x: CreateEntry(Size, DL.getIntPtrType(C&: Ctx))); |
9019 | CLI.setLibCallee( |
9020 | CC: TLI->getLibcallCallingConv(Call: RTLIB::BZERO), ResultType: Type::getVoidTy(C&: Ctx), |
9021 | Target: getExternalSymbol(Sym: BzeroName, VT: TLI->getPointerTy(DL)), ArgsList: std::move(Args)); |
9022 | } else { |
9023 | TargetLowering::ArgListTy Args; |
9024 | Args.push_back(x: CreateEntry(Dst, PointerType::getUnqual(C&: Ctx))); |
9025 | Args.push_back(x: CreateEntry(Src, Src.getValueType().getTypeForEVT(Context&: Ctx))); |
9026 | Args.push_back(x: CreateEntry(Size, DL.getIntPtrType(C&: Ctx))); |
9027 | CLI.setLibCallee(CC: TLI->getLibcallCallingConv(Call: RTLIB::MEMSET), |
9028 | ResultType: Dst.getValueType().getTypeForEVT(Context&: Ctx), |
9029 | Target: getExternalSymbol(Sym: TLI->getLibcallName(Call: RTLIB::MEMSET), |
9030 | VT: TLI->getPointerTy(DL)), |
9031 | ArgsList: std::move(Args)); |
9032 | } |
9033 | bool LowersToMemset = |
9034 | TLI->getLibcallName(Call: RTLIB::MEMSET) == StringRef("memset"); |
9035 | // If we're going to use bzero, make sure not to tail call unless the |
9036 | // subsequent return doesn't need a value, as bzero doesn't return the first |
9037 | // arg unlike memset. |
9038 | bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(CI: *CI) && !UseBZero; |
9039 | bool IsTailCall = |
9040 | CI && CI->isTailCall() && |
9041 | isInTailCallPosition(Call: *CI, TM: getTarget(), ReturnsFirstArg: ReturnsFirstArg && LowersToMemset); |
9042 | CLI.setDiscardResult().setTailCall(IsTailCall); |
9043 | |
9044 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
9045 | return CallResult.second; |
9046 | } |
9047 | |
9048 | SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, |
9049 | SDValue Dst, SDValue Value, SDValue Size, |
9050 | Type *SizeTy, unsigned ElemSz, |
9051 | bool isTailCall, |
9052 | MachinePointerInfo DstPtrInfo) { |
9053 | // Emit a library call. |
9054 | TargetLowering::ArgListTy Args; |
9055 | TargetLowering::ArgListEntry Entry; |
9056 | Entry.Ty = getDataLayout().getIntPtrType(C&: *getContext()); |
9057 | Entry.Node = Dst; |
9058 | Args.push_back(x: Entry); |
9059 | |
9060 | Entry.Ty = Type::getInt8Ty(C&: *getContext()); |
9061 | Entry.Node = Value; |
9062 | Args.push_back(x: Entry); |
9063 | |
9064 | Entry.Ty = SizeTy; |
9065 | Entry.Node = Size; |
9066 | Args.push_back(x: Entry); |
9067 | |
9068 | RTLIB::Libcall LibraryCall = |
9069 | RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSize: ElemSz); |
9070 | if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) |
9071 | report_fatal_error(reason: "Unsupported element size"); |
9072 | |
9073 | TargetLowering::CallLoweringInfo CLI(*this); |
9074 | CLI.setDebugLoc(dl) |
9075 | .setChain(Chain) |
9076 | .setLibCallee(CC: TLI->getLibcallCallingConv(Call: LibraryCall), |
9077 | ResultType: Type::getVoidTy(C&: *getContext()), |
9078 | Target: getExternalSymbol(Sym: TLI->getLibcallName(Call: LibraryCall), |
9079 | VT: TLI->getPointerTy(DL: getDataLayout())), |
9080 | ArgsList: std::move(Args)) |
9081 | .setDiscardResult() |
9082 | .setTailCall(isTailCall); |
9083 | |
9084 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
9085 | return CallResult.second; |
9086 | } |
9087 | |
9088 | SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, |
9089 | SDVTList VTList, ArrayRef<SDValue> Ops, |
9090 | MachineMemOperand *MMO, |
9091 | ISD::LoadExtType ExtType) { |
9092 | FoldingSetNodeID ID; |
9093 | AddNodeIDNode(ID, OpC: Opcode, VTList, OpList: Ops); |
9094 | ID.AddInteger(I: MemVT.getRawBits()); |
9095 | ID.AddInteger(I: getSyntheticNodeSubclassData<AtomicSDNode>( |
9096 | IROrder: dl.getIROrder(), Args&: Opcode, Args&: VTList, Args&: MemVT, Args&: MMO, Args&: ExtType)); |
9097 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9098 | ID.AddInteger(I: MMO->getFlags()); |
9099 | void* IP = nullptr; |
9100 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9101 | cast<AtomicSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9102 | return SDValue(E, 0); |
9103 | } |
9104 | |
9105 | auto *N = newSDNode<AtomicSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: Opcode, |
9106 | Args&: VTList, Args&: MemVT, Args&: MMO, Args&: ExtType); |
9107 | createOperands(Node: N, Vals: Ops); |
9108 | |
9109 | CSEMap.InsertNode(N, InsertPos: IP); |
9110 | InsertNode(N); |
9111 | SDValue V(N, 0); |
9112 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9113 | return V; |
9114 | } |
9115 | |
9116 | SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, |
9117 | EVT MemVT, SDVTList VTs, SDValue Chain, |
9118 | SDValue Ptr, SDValue Cmp, SDValue Swp, |
9119 | MachineMemOperand *MMO) { |
9120 | assert(Opcode == ISD::ATOMIC_CMP_SWAP || |
9121 | Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); |
9122 | assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); |
9123 | |
9124 | SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; |
9125 | return getAtomic(Opcode, dl, MemVT, VTList: VTs, Ops, MMO); |
9126 | } |
9127 | |
9128 | SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, |
9129 | SDValue Chain, SDValue Ptr, SDValue Val, |
9130 | MachineMemOperand *MMO) { |
9131 | assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || |
9132 | Opcode == ISD::ATOMIC_LOAD_AND || Opcode == ISD::ATOMIC_LOAD_CLR || |
9133 | Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || |
9134 | Opcode == ISD::ATOMIC_LOAD_NAND || Opcode == ISD::ATOMIC_LOAD_MIN || |
9135 | Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN || |
9136 | Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || |
9137 | Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || |
9138 | Opcode == ISD::ATOMIC_LOAD_FMIN || |
9139 | Opcode == ISD::ATOMIC_LOAD_FMINIMUM || |
9140 | Opcode == ISD::ATOMIC_LOAD_FMAXIMUM || |
9141 | Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || |
9142 | Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || |
9143 | Opcode == ISD::ATOMIC_LOAD_USUB_COND || |
9144 | Opcode == ISD::ATOMIC_LOAD_USUB_SAT || Opcode == ISD::ATOMIC_SWAP || |
9145 | Opcode == ISD::ATOMIC_STORE) && |
9146 | "Invalid Atomic Op"); |
9147 | |
9148 | EVT VT = Val.getValueType(); |
9149 | |
9150 | SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : |
9151 | getVTList(VT, MVT::Other); |
9152 | SDValue Ops[] = {Chain, Ptr, Val}; |
9153 | return getAtomic(Opcode, dl, MemVT, VTList: VTs, Ops, MMO); |
9154 | } |
9155 | |
9156 | SDValue SelectionDAG::getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, |
9157 | EVT MemVT, EVT VT, SDValue Chain, |
9158 | SDValue Ptr, MachineMemOperand *MMO) { |
9159 | SDVTList VTs = getVTList(VT, MVT::Other); |
9160 | SDValue Ops[] = {Chain, Ptr}; |
9161 | return getAtomic(Opcode: ISD::ATOMIC_LOAD, dl, MemVT, VTList: VTs, Ops, MMO, ExtType); |
9162 | } |
9163 | |
9164 | /// getMergeValues - Create a MERGE_VALUES node from the given operands. |
9165 | SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { |
9166 | if (Ops.size() == 1) |
9167 | return Ops[0]; |
9168 | |
9169 | SmallVector<EVT, 4> VTs; |
9170 | VTs.reserve(N: Ops.size()); |
9171 | for (const SDValue &Op : Ops) |
9172 | VTs.push_back(Elt: Op.getValueType()); |
9173 | return getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: getVTList(VTs), Ops); |
9174 | } |
9175 | |
9176 | SDValue SelectionDAG::getMemIntrinsicNode( |
9177 | unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, |
9178 | EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, |
9179 | MachineMemOperand::Flags Flags, LocationSize Size, |
9180 | const AAMDNodes &AAInfo) { |
9181 | if (Size.hasValue() && !Size.getValue()) |
9182 | Size = LocationSize::precise(Value: MemVT.getStoreSize()); |
9183 | |
9184 | MachineFunction &MF = getMachineFunction(); |
9185 | MachineMemOperand *MMO = |
9186 | MF.getMachineMemOperand(PtrInfo, F: Flags, Size, BaseAlignment: Alignment, AAInfo); |
9187 | |
9188 | return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); |
9189 | } |
9190 | |
9191 | SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, |
9192 | SDVTList VTList, |
9193 | ArrayRef<SDValue> Ops, EVT MemVT, |
9194 | MachineMemOperand *MMO) { |
9195 | assert( |
9196 | (Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || |
9197 | Opcode == ISD::PREFETCH || |
9198 | (Opcode <= (unsigned)std::numeric_limits<int>::max() && |
9199 | Opcode >= ISD::BUILTIN_OP_END && TSI->isTargetMemoryOpcode(Opcode))) && |
9200 | "Opcode is not a memory-accessing opcode!"); |
9201 | |
9202 | // Memoize the node unless it returns a glue result. |
9203 | MemIntrinsicSDNode *N; |
9204 | if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { |
9205 | FoldingSetNodeID ID; |
9206 | AddNodeIDNode(ID, OpC: Opcode, VTList, OpList: Ops); |
9207 | ID.AddInteger(I: getSyntheticNodeSubclassData<MemIntrinsicSDNode>( |
9208 | Opc: Opcode, Order: dl.getIROrder(), VTs: VTList, MemoryVT: MemVT, MMO)); |
9209 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9210 | ID.AddInteger(I: MMO->getFlags()); |
9211 | ID.AddInteger(I: MemVT.getRawBits()); |
9212 | void *IP = nullptr; |
9213 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9214 | cast<MemIntrinsicSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9215 | return SDValue(E, 0); |
9216 | } |
9217 | |
9218 | N = newSDNode<MemIntrinsicSDNode>(Args&: Opcode, Args: dl.getIROrder(), Args: dl.getDebugLoc(), |
9219 | Args&: VTList, Args&: MemVT, Args&: MMO); |
9220 | createOperands(Node: N, Vals: Ops); |
9221 | |
9222 | CSEMap.InsertNode(N, InsertPos: IP); |
9223 | } else { |
9224 | N = newSDNode<MemIntrinsicSDNode>(Args&: Opcode, Args: dl.getIROrder(), Args: dl.getDebugLoc(), |
9225 | Args&: VTList, Args&: MemVT, Args&: MMO); |
9226 | createOperands(Node: N, Vals: Ops); |
9227 | } |
9228 | InsertNode(N); |
9229 | SDValue V(N, 0); |
9230 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9231 | return V; |
9232 | } |
9233 | |
9234 | SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, |
9235 | SDValue Chain, int FrameIndex, |
9236 | int64_t Size, int64_t Offset) { |
9237 | const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; |
9238 | const auto VTs = getVTList(MVT::Other); |
9239 | SDValue Ops[2] = { |
9240 | Chain, |
9241 | getFrameIndex(FI: FrameIndex, |
9242 | VT: getTargetLoweringInfo().getFrameIndexTy(DL: getDataLayout()), |
9243 | isTarget: true)}; |
9244 | |
9245 | FoldingSetNodeID ID; |
9246 | AddNodeIDNode(ID, Opcode, VTs, Ops); |
9247 | ID.AddInteger(I: FrameIndex); |
9248 | ID.AddInteger(I: Size); |
9249 | ID.AddInteger(I: Offset); |
9250 | void *IP = nullptr; |
9251 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) |
9252 | return SDValue(E, 0); |
9253 | |
9254 | LifetimeSDNode *N = newSDNode<LifetimeSDNode>( |
9255 | Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); |
9256 | createOperands(Node: N, Vals: Ops); |
9257 | CSEMap.InsertNode(N, InsertPos: IP); |
9258 | InsertNode(N); |
9259 | SDValue V(N, 0); |
9260 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9261 | return V; |
9262 | } |
9263 | |
9264 | SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, |
9265 | uint64_t Guid, uint64_t Index, |
9266 | uint32_t Attr) { |
9267 | const unsigned Opcode = ISD::PSEUDO_PROBE; |
9268 | const auto VTs = getVTList(MVT::Other); |
9269 | SDValue Ops[] = {Chain}; |
9270 | FoldingSetNodeID ID; |
9271 | AddNodeIDNode(ID, Opcode, VTs, Ops); |
9272 | ID.AddInteger(I: Guid); |
9273 | ID.AddInteger(I: Index); |
9274 | void *IP = nullptr; |
9275 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: Dl, InsertPos&: IP)) |
9276 | return SDValue(E, 0); |
9277 | |
9278 | auto *N = newSDNode<PseudoProbeSDNode>( |
9279 | Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr); |
9280 | createOperands(Node: N, Vals: Ops); |
9281 | CSEMap.InsertNode(N, IP); |
9282 | InsertNode(N: N); |
9283 | SDValue V(N, 0); |
9284 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9285 | return V; |
9286 | } |
9287 | |
9288 | /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a |
9289 | /// MachinePointerInfo record from it. This is particularly useful because the |
9290 | /// code generator has many cases where it doesn't bother passing in a |
9291 | /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". |
9292 | static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, |
9293 | SelectionDAG &DAG, SDValue Ptr, |
9294 | int64_t Offset = 0) { |
9295 | // If this is FI+Offset, we can model it. |
9296 | if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Ptr)) |
9297 | return MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), |
9298 | FI: FI->getIndex(), Offset); |
9299 | |
9300 | // If this is (FI+Offset1)+Offset2, we can model it. |
9301 | if (Ptr.getOpcode() != ISD::ADD || |
9302 | !isa<ConstantSDNode>(Val: Ptr.getOperand(i: 1)) || |
9303 | !isa<FrameIndexSDNode>(Val: Ptr.getOperand(i: 0))) |
9304 | return Info; |
9305 | |
9306 | int FI = cast<FrameIndexSDNode>(Val: Ptr.getOperand(i: 0))->getIndex(); |
9307 | return MachinePointerInfo::getFixedStack( |
9308 | MF&: DAG.getMachineFunction(), FI, |
9309 | Offset: Offset + cast<ConstantSDNode>(Val: Ptr.getOperand(i: 1))->getSExtValue()); |
9310 | } |
9311 | |
9312 | /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a |
9313 | /// MachinePointerInfo record from it. This is particularly useful because the |
9314 | /// code generator has many cases where it doesn't bother passing in a |
9315 | /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". |
9316 | static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, |
9317 | SelectionDAG &DAG, SDValue Ptr, |
9318 | SDValue OffsetOp) { |
9319 | // If the 'Offset' value isn't a constant, we can't handle this. |
9320 | if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(Val&: OffsetOp)) |
9321 | return InferPointerInfo(Info, DAG, Ptr, Offset: OffsetNode->getSExtValue()); |
9322 | if (OffsetOp.isUndef()) |
9323 | return InferPointerInfo(Info, DAG, Ptr); |
9324 | return Info; |
9325 | } |
9326 | |
9327 | SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, |
9328 | EVT VT, const SDLoc &dl, SDValue Chain, |
9329 | SDValue Ptr, SDValue Offset, |
9330 | MachinePointerInfo PtrInfo, EVT MemVT, |
9331 | Align Alignment, |
9332 | MachineMemOperand::Flags MMOFlags, |
9333 | const AAMDNodes &AAInfo, const MDNode *Ranges) { |
9334 | assert(Chain.getValueType() == MVT::Other && |
9335 | "Invalid chain type"); |
9336 | |
9337 | MMOFlags |= MachineMemOperand::MOLoad; |
9338 | assert((MMOFlags & MachineMemOperand::MOStore) == 0); |
9339 | // If we don't have a PtrInfo, infer the trivial frame index case to simplify |
9340 | // clients. |
9341 | if (PtrInfo.V.isNull()) |
9342 | PtrInfo = InferPointerInfo(Info: PtrInfo, DAG&: *this, Ptr, OffsetOp: Offset); |
9343 | |
9344 | TypeSize Size = MemVT.getStoreSize(); |
9345 | MachineFunction &MF = getMachineFunction(); |
9346 | MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, F: MMOFlags, Size, |
9347 | BaseAlignment: Alignment, AAInfo, Ranges); |
9348 | return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); |
9349 | } |
9350 | |
9351 | SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, |
9352 | EVT VT, const SDLoc &dl, SDValue Chain, |
9353 | SDValue Ptr, SDValue Offset, EVT MemVT, |
9354 | MachineMemOperand *MMO) { |
9355 | if (VT == MemVT) { |
9356 | ExtType = ISD::NON_EXTLOAD; |
9357 | } else if (ExtType == ISD::NON_EXTLOAD) { |
9358 | assert(VT == MemVT && "Non-extending load from different memory type!"); |
9359 | } else { |
9360 | // Extending load. |
9361 | assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && |
9362 | "Should only be an extending load, not truncating!"); |
9363 | assert(VT.isInteger() == MemVT.isInteger() && |
9364 | "Cannot convert from FP to Int or Int -> FP!"); |
9365 | assert(VT.isVector() == MemVT.isVector() && |
9366 | "Cannot use an ext load to convert to or from a vector!"); |
9367 | assert((!VT.isVector() || |
9368 | VT.getVectorElementCount() == MemVT.getVectorElementCount()) && |
9369 | "Cannot use an ext load to change the number of vector elements!"); |
9370 | } |
9371 | |
9372 | assert((!MMO->getRanges() || |
9373 | (mdconst::extract<ConstantInt>(MMO->getRanges()->getOperand(0)) |
9374 | ->getBitWidth() == MemVT.getScalarSizeInBits() && |
9375 | MemVT.isInteger())) && |
9376 | "Range metadata and load type must match!"); |
9377 | |
9378 | bool Indexed = AM != ISD::UNINDEXED; |
9379 | assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); |
9380 | |
9381 | SDVTList VTs = Indexed ? |
9382 | getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); |
9383 | SDValue Ops[] = { Chain, Ptr, Offset }; |
9384 | FoldingSetNodeID ID; |
9385 | AddNodeIDNode(ID, OpC: ISD::LOAD, VTList: VTs, OpList: Ops); |
9386 | ID.AddInteger(I: MemVT.getRawBits()); |
9387 | ID.AddInteger(I: getSyntheticNodeSubclassData<LoadSDNode>( |
9388 | IROrder: dl.getIROrder(), Args&: VTs, Args&: AM, Args&: ExtType, Args&: MemVT, Args&: MMO)); |
9389 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9390 | ID.AddInteger(I: MMO->getFlags()); |
9391 | void *IP = nullptr; |
9392 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9393 | cast<LoadSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9394 | return SDValue(E, 0); |
9395 | } |
9396 | auto *N = newSDNode<LoadSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, Args&: AM, |
9397 | Args&: ExtType, Args&: MemVT, Args&: MMO); |
9398 | createOperands(Node: N, Vals: Ops); |
9399 | |
9400 | CSEMap.InsertNode(N, InsertPos: IP); |
9401 | InsertNode(N); |
9402 | SDValue V(N, 0); |
9403 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9404 | return V; |
9405 | } |
9406 | |
9407 | SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, |
9408 | SDValue Ptr, MachinePointerInfo PtrInfo, |
9409 | MaybeAlign Alignment, |
9410 | MachineMemOperand::Flags MMOFlags, |
9411 | const AAMDNodes &AAInfo, const MDNode *Ranges) { |
9412 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9413 | return getLoad(AM: ISD::UNINDEXED, ExtType: ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Offset: Undef, |
9414 | PtrInfo, MemVT: VT, Alignment, MMOFlags, AAInfo, Ranges); |
9415 | } |
9416 | |
9417 | SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, |
9418 | SDValue Ptr, MachineMemOperand *MMO) { |
9419 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9420 | return getLoad(AM: ISD::UNINDEXED, ExtType: ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Offset: Undef, |
9421 | MemVT: VT, MMO); |
9422 | } |
9423 | |
9424 | SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, |
9425 | EVT VT, SDValue Chain, SDValue Ptr, |
9426 | MachinePointerInfo PtrInfo, EVT MemVT, |
9427 | MaybeAlign Alignment, |
9428 | MachineMemOperand::Flags MMOFlags, |
9429 | const AAMDNodes &AAInfo) { |
9430 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9431 | return getLoad(AM: ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Offset: Undef, PtrInfo, |
9432 | MemVT, Alignment, MMOFlags, AAInfo); |
9433 | } |
9434 | |
9435 | SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, |
9436 | EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, |
9437 | MachineMemOperand *MMO) { |
9438 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9439 | return getLoad(AM: ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Offset: Undef, |
9440 | MemVT, MMO); |
9441 | } |
9442 | |
9443 | SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, |
9444 | SDValue Base, SDValue Offset, |
9445 | ISD::MemIndexedMode AM) { |
9446 | LoadSDNode *LD = cast<LoadSDNode>(Val&: OrigLoad); |
9447 | assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); |
9448 | // Don't propagate the invariant or dereferenceable flags. |
9449 | auto MMOFlags = |
9450 | LD->getMemOperand()->getFlags() & |
9451 | ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); |
9452 | return getLoad(AM, ExtType: LD->getExtensionType(), VT: OrigLoad.getValueType(), dl, |
9453 | Chain: LD->getChain(), Ptr: Base, Offset, PtrInfo: LD->getPointerInfo(), |
9454 | MemVT: LD->getMemoryVT(), Alignment: LD->getAlign(), MMOFlags, AAInfo: LD->getAAInfo()); |
9455 | } |
9456 | |
9457 | SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
9458 | SDValue Ptr, MachinePointerInfo PtrInfo, |
9459 | Align Alignment, |
9460 | MachineMemOperand::Flags MMOFlags, |
9461 | const AAMDNodes &AAInfo) { |
9462 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9463 | |
9464 | MMOFlags |= MachineMemOperand::MOStore; |
9465 | assert((MMOFlags & MachineMemOperand::MOLoad) == 0); |
9466 | |
9467 | if (PtrInfo.V.isNull()) |
9468 | PtrInfo = InferPointerInfo(Info: PtrInfo, DAG&: *this, Ptr); |
9469 | |
9470 | MachineFunction &MF = getMachineFunction(); |
9471 | TypeSize Size = Val.getValueType().getStoreSize(); |
9472 | MachineMemOperand *MMO = |
9473 | MF.getMachineMemOperand(PtrInfo, F: MMOFlags, Size, BaseAlignment: Alignment, AAInfo); |
9474 | return getStore(Chain, dl, Val, Ptr, MMO); |
9475 | } |
9476 | |
9477 | SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
9478 | SDValue Ptr, MachineMemOperand *MMO) { |
9479 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9480 | return getStore(Chain, dl, Val, Ptr, Offset: Undef, SVT: Val.getValueType(), MMO, |
9481 | AM: ISD::UNINDEXED); |
9482 | } |
9483 | |
9484 | SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
9485 | SDValue Ptr, SDValue Offset, EVT SVT, |
9486 | MachineMemOperand *MMO, ISD::MemIndexedMode AM, |
9487 | bool IsTruncating) { |
9488 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9489 | EVT VT = Val.getValueType(); |
9490 | if (VT == SVT) { |
9491 | IsTruncating = false; |
9492 | } else if (!IsTruncating) { |
9493 | assert(VT == SVT && "No-truncating store from different memory type!"); |
9494 | } else { |
9495 | assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && |
9496 | "Should only be a truncating store, not extending!"); |
9497 | assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); |
9498 | assert(VT.isVector() == SVT.isVector() && |
9499 | "Cannot use trunc store to convert to or from a vector!"); |
9500 | assert((!VT.isVector() || |
9501 | VT.getVectorElementCount() == SVT.getVectorElementCount()) && |
9502 | "Cannot use trunc store to change the number of vector elements!"); |
9503 | } |
9504 | |
9505 | bool Indexed = AM != ISD::UNINDEXED; |
9506 | assert((Indexed || Offset.isUndef()) && "Unindexed store with an offset!"); |
9507 | SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) |
9508 | : getVTList(MVT::Other); |
9509 | SDValue Ops[] = {Chain, Val, Ptr, Offset}; |
9510 | FoldingSetNodeID ID; |
9511 | AddNodeIDNode(ID, OpC: ISD::STORE, VTList: VTs, OpList: Ops); |
9512 | ID.AddInteger(I: SVT.getRawBits()); |
9513 | ID.AddInteger(I: getSyntheticNodeSubclassData<StoreSDNode>( |
9514 | IROrder: dl.getIROrder(), Args&: VTs, Args&: AM, Args&: IsTruncating, Args&: SVT, Args&: MMO)); |
9515 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9516 | ID.AddInteger(I: MMO->getFlags()); |
9517 | void *IP = nullptr; |
9518 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9519 | cast<StoreSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9520 | return SDValue(E, 0); |
9521 | } |
9522 | auto *N = newSDNode<StoreSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, Args&: AM, |
9523 | Args&: IsTruncating, Args&: SVT, Args&: MMO); |
9524 | createOperands(Node: N, Vals: Ops); |
9525 | |
9526 | CSEMap.InsertNode(N, InsertPos: IP); |
9527 | InsertNode(N); |
9528 | SDValue V(N, 0); |
9529 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9530 | return V; |
9531 | } |
9532 | |
9533 | SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
9534 | SDValue Ptr, MachinePointerInfo PtrInfo, |
9535 | EVT SVT, Align Alignment, |
9536 | MachineMemOperand::Flags MMOFlags, |
9537 | const AAMDNodes &AAInfo) { |
9538 | assert(Chain.getValueType() == MVT::Other && |
9539 | "Invalid chain type"); |
9540 | |
9541 | MMOFlags |= MachineMemOperand::MOStore; |
9542 | assert((MMOFlags & MachineMemOperand::MOLoad) == 0); |
9543 | |
9544 | if (PtrInfo.V.isNull()) |
9545 | PtrInfo = InferPointerInfo(Info: PtrInfo, DAG&: *this, Ptr); |
9546 | |
9547 | MachineFunction &MF = getMachineFunction(); |
9548 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
9549 | PtrInfo, F: MMOFlags, Size: SVT.getStoreSize(), BaseAlignment: Alignment, AAInfo); |
9550 | return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); |
9551 | } |
9552 | |
9553 | SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
9554 | SDValue Ptr, EVT SVT, |
9555 | MachineMemOperand *MMO) { |
9556 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9557 | return getStore(Chain, dl, Val, Ptr, Offset: Undef, SVT, MMO, AM: ISD::UNINDEXED, IsTruncating: true); |
9558 | } |
9559 | |
9560 | SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, |
9561 | SDValue Base, SDValue Offset, |
9562 | ISD::MemIndexedMode AM) { |
9563 | StoreSDNode *ST = cast<StoreSDNode>(Val&: OrigStore); |
9564 | assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); |
9565 | return getStore(Chain: ST->getChain(), dl, Val: ST->getValue(), Ptr: Base, Offset, |
9566 | SVT: ST->getMemoryVT(), MMO: ST->getMemOperand(), AM, |
9567 | IsTruncating: ST->isTruncatingStore()); |
9568 | } |
9569 | |
9570 | SDValue SelectionDAG::getLoadVP( |
9571 | ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, |
9572 | SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, |
9573 | MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, |
9574 | MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, |
9575 | const MDNode *Ranges, bool IsExpanding) { |
9576 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9577 | |
9578 | MMOFlags |= MachineMemOperand::MOLoad; |
9579 | assert((MMOFlags & MachineMemOperand::MOStore) == 0); |
9580 | // If we don't have a PtrInfo, infer the trivial frame index case to simplify |
9581 | // clients. |
9582 | if (PtrInfo.V.isNull()) |
9583 | PtrInfo = InferPointerInfo(Info: PtrInfo, DAG&: *this, Ptr, OffsetOp: Offset); |
9584 | |
9585 | TypeSize Size = MemVT.getStoreSize(); |
9586 | MachineFunction &MF = getMachineFunction(); |
9587 | MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, F: MMOFlags, Size, |
9588 | BaseAlignment: Alignment, AAInfo, Ranges); |
9589 | return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT, |
9590 | MMO, IsExpanding); |
9591 | } |
9592 | |
9593 | SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, |
9594 | ISD::LoadExtType ExtType, EVT VT, |
9595 | const SDLoc &dl, SDValue Chain, SDValue Ptr, |
9596 | SDValue Offset, SDValue Mask, SDValue EVL, |
9597 | EVT MemVT, MachineMemOperand *MMO, |
9598 | bool IsExpanding) { |
9599 | bool Indexed = AM != ISD::UNINDEXED; |
9600 | assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); |
9601 | |
9602 | SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) |
9603 | : getVTList(VT, MVT::Other); |
9604 | SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL}; |
9605 | FoldingSetNodeID ID; |
9606 | AddNodeIDNode(ID, OpC: ISD::VP_LOAD, VTList: VTs, OpList: Ops); |
9607 | ID.AddInteger(I: MemVT.getRawBits()); |
9608 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPLoadSDNode>( |
9609 | IROrder: dl.getIROrder(), Args&: VTs, Args&: AM, Args&: ExtType, Args&: IsExpanding, Args&: MemVT, Args&: MMO)); |
9610 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9611 | ID.AddInteger(I: MMO->getFlags()); |
9612 | void *IP = nullptr; |
9613 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9614 | cast<VPLoadSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9615 | return SDValue(E, 0); |
9616 | } |
9617 | auto *N = newSDNode<VPLoadSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, Args&: AM, |
9618 | Args&: ExtType, Args&: IsExpanding, Args&: MemVT, Args&: MMO); |
9619 | createOperands(Node: N, Vals: Ops); |
9620 | |
9621 | CSEMap.InsertNode(N, InsertPos: IP); |
9622 | InsertNode(N); |
9623 | SDValue V(N, 0); |
9624 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9625 | return V; |
9626 | } |
9627 | |
9628 | SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, |
9629 | SDValue Ptr, SDValue Mask, SDValue EVL, |
9630 | MachinePointerInfo PtrInfo, |
9631 | MaybeAlign Alignment, |
9632 | MachineMemOperand::Flags MMOFlags, |
9633 | const AAMDNodes &AAInfo, const MDNode *Ranges, |
9634 | bool IsExpanding) { |
9635 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9636 | return getLoadVP(AM: ISD::UNINDEXED, ExtType: ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Offset: Undef, |
9637 | Mask, EVL, PtrInfo, MemVT: VT, Alignment, MMOFlags, AAInfo, Ranges, |
9638 | IsExpanding); |
9639 | } |
9640 | |
9641 | SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, |
9642 | SDValue Ptr, SDValue Mask, SDValue EVL, |
9643 | MachineMemOperand *MMO, bool IsExpanding) { |
9644 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9645 | return getLoadVP(AM: ISD::UNINDEXED, ExtType: ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Offset: Undef, |
9646 | Mask, EVL, MemVT: VT, MMO, IsExpanding); |
9647 | } |
9648 | |
9649 | SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, |
9650 | EVT VT, SDValue Chain, SDValue Ptr, |
9651 | SDValue Mask, SDValue EVL, |
9652 | MachinePointerInfo PtrInfo, EVT MemVT, |
9653 | MaybeAlign Alignment, |
9654 | MachineMemOperand::Flags MMOFlags, |
9655 | const AAMDNodes &AAInfo, bool IsExpanding) { |
9656 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9657 | return getLoadVP(AM: ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Offset: Undef, Mask, |
9658 | EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, Ranges: nullptr, |
9659 | IsExpanding); |
9660 | } |
9661 | |
9662 | SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, |
9663 | EVT VT, SDValue Chain, SDValue Ptr, |
9664 | SDValue Mask, SDValue EVL, EVT MemVT, |
9665 | MachineMemOperand *MMO, bool IsExpanding) { |
9666 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9667 | return getLoadVP(AM: ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Offset: Undef, Mask, |
9668 | EVL, MemVT, MMO, IsExpanding); |
9669 | } |
9670 | |
9671 | SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, |
9672 | SDValue Base, SDValue Offset, |
9673 | ISD::MemIndexedMode AM) { |
9674 | auto *LD = cast<VPLoadSDNode>(Val&: OrigLoad); |
9675 | assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); |
9676 | // Don't propagate the invariant or dereferenceable flags. |
9677 | auto MMOFlags = |
9678 | LD->getMemOperand()->getFlags() & |
9679 | ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); |
9680 | return getLoadVP(AM, ExtType: LD->getExtensionType(), VT: OrigLoad.getValueType(), dl, |
9681 | Chain: LD->getChain(), Ptr: Base, Offset, Mask: LD->getMask(), |
9682 | EVL: LD->getVectorLength(), PtrInfo: LD->getPointerInfo(), |
9683 | MemVT: LD->getMemoryVT(), Alignment: LD->getAlign(), MMOFlags, AAInfo: LD->getAAInfo(), |
9684 | Ranges: nullptr, IsExpanding: LD->isExpandingLoad()); |
9685 | } |
9686 | |
9687 | SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, |
9688 | SDValue Ptr, SDValue Offset, SDValue Mask, |
9689 | SDValue EVL, EVT MemVT, MachineMemOperand *MMO, |
9690 | ISD::MemIndexedMode AM, bool IsTruncating, |
9691 | bool IsCompressing) { |
9692 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9693 | bool Indexed = AM != ISD::UNINDEXED; |
9694 | assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); |
9695 | SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) |
9696 | : getVTList(MVT::Other); |
9697 | SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL}; |
9698 | FoldingSetNodeID ID; |
9699 | AddNodeIDNode(ID, OpC: ISD::VP_STORE, VTList: VTs, OpList: Ops); |
9700 | ID.AddInteger(I: MemVT.getRawBits()); |
9701 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPStoreSDNode>( |
9702 | IROrder: dl.getIROrder(), Args&: VTs, Args&: AM, Args&: IsTruncating, Args&: IsCompressing, Args&: MemVT, Args&: MMO)); |
9703 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9704 | ID.AddInteger(I: MMO->getFlags()); |
9705 | void *IP = nullptr; |
9706 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9707 | cast<VPStoreSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9708 | return SDValue(E, 0); |
9709 | } |
9710 | auto *N = newSDNode<VPStoreSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, Args&: AM, |
9711 | Args&: IsTruncating, Args&: IsCompressing, Args&: MemVT, Args&: MMO); |
9712 | createOperands(Node: N, Vals: Ops); |
9713 | |
9714 | CSEMap.InsertNode(N, InsertPos: IP); |
9715 | InsertNode(N); |
9716 | SDValue V(N, 0); |
9717 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9718 | return V; |
9719 | } |
9720 | |
9721 | SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, |
9722 | SDValue Val, SDValue Ptr, SDValue Mask, |
9723 | SDValue EVL, MachinePointerInfo PtrInfo, |
9724 | EVT SVT, Align Alignment, |
9725 | MachineMemOperand::Flags MMOFlags, |
9726 | const AAMDNodes &AAInfo, |
9727 | bool IsCompressing) { |
9728 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9729 | |
9730 | MMOFlags |= MachineMemOperand::MOStore; |
9731 | assert((MMOFlags & MachineMemOperand::MOLoad) == 0); |
9732 | |
9733 | if (PtrInfo.V.isNull()) |
9734 | PtrInfo = InferPointerInfo(Info: PtrInfo, DAG&: *this, Ptr); |
9735 | |
9736 | MachineFunction &MF = getMachineFunction(); |
9737 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
9738 | PtrInfo, F: MMOFlags, Size: SVT.getStoreSize(), BaseAlignment: Alignment, AAInfo); |
9739 | return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, |
9740 | IsCompressing); |
9741 | } |
9742 | |
9743 | SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, |
9744 | SDValue Val, SDValue Ptr, SDValue Mask, |
9745 | SDValue EVL, EVT SVT, |
9746 | MachineMemOperand *MMO, |
9747 | bool IsCompressing) { |
9748 | EVT VT = Val.getValueType(); |
9749 | |
9750 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9751 | if (VT == SVT) |
9752 | return getStoreVP(Chain, dl, Val, Ptr, Offset: getUNDEF(VT: Ptr.getValueType()), Mask, |
9753 | EVL, MemVT: VT, MMO, AM: ISD::UNINDEXED, |
9754 | /*IsTruncating*/ false, IsCompressing); |
9755 | |
9756 | assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && |
9757 | "Should only be a truncating store, not extending!"); |
9758 | assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); |
9759 | assert(VT.isVector() == SVT.isVector() && |
9760 | "Cannot use trunc store to convert to or from a vector!"); |
9761 | assert((!VT.isVector() || |
9762 | VT.getVectorElementCount() == SVT.getVectorElementCount()) && |
9763 | "Cannot use trunc store to change the number of vector elements!"); |
9764 | |
9765 | SDVTList VTs = getVTList(MVT::Other); |
9766 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9767 | SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; |
9768 | FoldingSetNodeID ID; |
9769 | AddNodeIDNode(ID, OpC: ISD::VP_STORE, VTList: VTs, OpList: Ops); |
9770 | ID.AddInteger(I: SVT.getRawBits()); |
9771 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPStoreSDNode>( |
9772 | IROrder: dl.getIROrder(), Args&: VTs, Args: ISD::UNINDEXED, Args: true, Args&: IsCompressing, Args&: SVT, Args&: MMO)); |
9773 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9774 | ID.AddInteger(I: MMO->getFlags()); |
9775 | void *IP = nullptr; |
9776 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9777 | cast<VPStoreSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9778 | return SDValue(E, 0); |
9779 | } |
9780 | auto *N = |
9781 | newSDNode<VPStoreSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, |
9782 | Args: ISD::UNINDEXED, Args: true, Args&: IsCompressing, Args&: SVT, Args&: MMO); |
9783 | createOperands(Node: N, Vals: Ops); |
9784 | |
9785 | CSEMap.InsertNode(N, InsertPos: IP); |
9786 | InsertNode(N); |
9787 | SDValue V(N, 0); |
9788 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9789 | return V; |
9790 | } |
9791 | |
9792 | SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, |
9793 | SDValue Base, SDValue Offset, |
9794 | ISD::MemIndexedMode AM) { |
9795 | auto *ST = cast<VPStoreSDNode>(Val&: OrigStore); |
9796 | assert(ST->getOffset().isUndef() && "Store is already an indexed store!"); |
9797 | SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); |
9798 | SDValue Ops[] = {ST->getChain(), ST->getValue(), Base, |
9799 | Offset, ST->getMask(), ST->getVectorLength()}; |
9800 | FoldingSetNodeID ID; |
9801 | AddNodeIDNode(ID, OpC: ISD::VP_STORE, VTList: VTs, OpList: Ops); |
9802 | ID.AddInteger(I: ST->getMemoryVT().getRawBits()); |
9803 | ID.AddInteger(I: ST->getRawSubclassData()); |
9804 | ID.AddInteger(I: ST->getPointerInfo().getAddrSpace()); |
9805 | ID.AddInteger(I: ST->getMemOperand()->getFlags()); |
9806 | void *IP = nullptr; |
9807 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) |
9808 | return SDValue(E, 0); |
9809 | |
9810 | auto *N = newSDNode<VPStoreSDNode>( |
9811 | Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, Args&: AM, Args: ST->isTruncatingStore(), |
9812 | Args: ST->isCompressingStore(), Args: ST->getMemoryVT(), Args: ST->getMemOperand()); |
9813 | createOperands(Node: N, Vals: Ops); |
9814 | |
9815 | CSEMap.InsertNode(N, InsertPos: IP); |
9816 | InsertNode(N); |
9817 | SDValue V(N, 0); |
9818 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9819 | return V; |
9820 | } |
9821 | |
9822 | SDValue SelectionDAG::getStridedLoadVP( |
9823 | ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, |
9824 | SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, |
9825 | SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { |
9826 | bool Indexed = AM != ISD::UNINDEXED; |
9827 | assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); |
9828 | |
9829 | SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL}; |
9830 | SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) |
9831 | : getVTList(VT, MVT::Other); |
9832 | FoldingSetNodeID ID; |
9833 | AddNodeIDNode(ID, OpC: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTList: VTs, OpList: Ops); |
9834 | ID.AddInteger(I: VT.getRawBits()); |
9835 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPStridedLoadSDNode>( |
9836 | IROrder: DL.getIROrder(), Args&: VTs, Args&: AM, Args&: ExtType, Args&: IsExpanding, Args&: MemVT, Args&: MMO)); |
9837 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9838 | |
9839 | void *IP = nullptr; |
9840 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
9841 | cast<VPStridedLoadSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9842 | return SDValue(E, 0); |
9843 | } |
9844 | |
9845 | auto *N = |
9846 | newSDNode<VPStridedLoadSDNode>(Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs, Args&: AM, |
9847 | Args&: ExtType, Args&: IsExpanding, Args&: MemVT, Args&: MMO); |
9848 | createOperands(Node: N, Vals: Ops); |
9849 | CSEMap.InsertNode(N, InsertPos: IP); |
9850 | InsertNode(N); |
9851 | SDValue V(N, 0); |
9852 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9853 | return V; |
9854 | } |
9855 | |
9856 | SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, |
9857 | SDValue Ptr, SDValue Stride, |
9858 | SDValue Mask, SDValue EVL, |
9859 | MachineMemOperand *MMO, |
9860 | bool IsExpanding) { |
9861 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9862 | return getStridedLoadVP(AM: ISD::UNINDEXED, ExtType: ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, |
9863 | Offset: Undef, Stride, Mask, EVL, MemVT: VT, MMO, IsExpanding); |
9864 | } |
9865 | |
9866 | SDValue SelectionDAG::getExtStridedLoadVP( |
9867 | ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, |
9868 | SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, |
9869 | MachineMemOperand *MMO, bool IsExpanding) { |
9870 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9871 | return getStridedLoadVP(AM: ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Offset: Undef, |
9872 | Stride, Mask, EVL, MemVT, MMO, IsExpanding); |
9873 | } |
9874 | |
9875 | SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, |
9876 | SDValue Val, SDValue Ptr, |
9877 | SDValue Offset, SDValue Stride, |
9878 | SDValue Mask, SDValue EVL, EVT MemVT, |
9879 | MachineMemOperand *MMO, |
9880 | ISD::MemIndexedMode AM, |
9881 | bool IsTruncating, bool IsCompressing) { |
9882 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9883 | bool Indexed = AM != ISD::UNINDEXED; |
9884 | assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); |
9885 | SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) |
9886 | : getVTList(MVT::Other); |
9887 | SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL}; |
9888 | FoldingSetNodeID ID; |
9889 | AddNodeIDNode(ID, OpC: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTList: VTs, OpList: Ops); |
9890 | ID.AddInteger(I: MemVT.getRawBits()); |
9891 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPStridedStoreSDNode>( |
9892 | IROrder: DL.getIROrder(), Args&: VTs, Args&: AM, Args&: IsTruncating, Args&: IsCompressing, Args&: MemVT, Args&: MMO)); |
9893 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9894 | void *IP = nullptr; |
9895 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
9896 | cast<VPStridedStoreSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9897 | return SDValue(E, 0); |
9898 | } |
9899 | auto *N = newSDNode<VPStridedStoreSDNode>(Args: DL.getIROrder(), Args: DL.getDebugLoc(), |
9900 | Args&: VTs, Args&: AM, Args&: IsTruncating, |
9901 | Args&: IsCompressing, Args&: MemVT, Args&: MMO); |
9902 | createOperands(Node: N, Vals: Ops); |
9903 | |
9904 | CSEMap.InsertNode(N, InsertPos: IP); |
9905 | InsertNode(N); |
9906 | SDValue V(N, 0); |
9907 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9908 | return V; |
9909 | } |
9910 | |
9911 | SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, |
9912 | SDValue Val, SDValue Ptr, |
9913 | SDValue Stride, SDValue Mask, |
9914 | SDValue EVL, EVT SVT, |
9915 | MachineMemOperand *MMO, |
9916 | bool IsCompressing) { |
9917 | EVT VT = Val.getValueType(); |
9918 | |
9919 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
9920 | if (VT == SVT) |
9921 | return getStridedStoreVP(Chain, DL, Val, Ptr, Offset: getUNDEF(VT: Ptr.getValueType()), |
9922 | Stride, Mask, EVL, MemVT: VT, MMO, AM: ISD::UNINDEXED, |
9923 | /*IsTruncating*/ false, IsCompressing); |
9924 | |
9925 | assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && |
9926 | "Should only be a truncating store, not extending!"); |
9927 | assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); |
9928 | assert(VT.isVector() == SVT.isVector() && |
9929 | "Cannot use trunc store to convert to or from a vector!"); |
9930 | assert((!VT.isVector() || |
9931 | VT.getVectorElementCount() == SVT.getVectorElementCount()) && |
9932 | "Cannot use trunc store to change the number of vector elements!"); |
9933 | |
9934 | SDVTList VTs = getVTList(MVT::Other); |
9935 | SDValue Undef = getUNDEF(VT: Ptr.getValueType()); |
9936 | SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL}; |
9937 | FoldingSetNodeID ID; |
9938 | AddNodeIDNode(ID, OpC: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTList: VTs, OpList: Ops); |
9939 | ID.AddInteger(I: SVT.getRawBits()); |
9940 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPStridedStoreSDNode>( |
9941 | IROrder: DL.getIROrder(), Args&: VTs, Args: ISD::UNINDEXED, Args: true, Args&: IsCompressing, Args&: SVT, Args&: MMO)); |
9942 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9943 | void *IP = nullptr; |
9944 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
9945 | cast<VPStridedStoreSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9946 | return SDValue(E, 0); |
9947 | } |
9948 | auto *N = newSDNode<VPStridedStoreSDNode>(Args: DL.getIROrder(), Args: DL.getDebugLoc(), |
9949 | Args&: VTs, Args: ISD::UNINDEXED, Args: true, |
9950 | Args&: IsCompressing, Args&: SVT, Args&: MMO); |
9951 | createOperands(Node: N, Vals: Ops); |
9952 | |
9953 | CSEMap.InsertNode(N, InsertPos: IP); |
9954 | InsertNode(N); |
9955 | SDValue V(N, 0); |
9956 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
9957 | return V; |
9958 | } |
9959 | |
9960 | SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, |
9961 | ArrayRef<SDValue> Ops, MachineMemOperand *MMO, |
9962 | ISD::MemIndexType IndexType) { |
9963 | assert(Ops.size() == 6 && "Incompatible number of operands"); |
9964 | |
9965 | FoldingSetNodeID ID; |
9966 | AddNodeIDNode(ID, OpC: ISD::VP_GATHER, VTList: VTs, OpList: Ops); |
9967 | ID.AddInteger(I: VT.getRawBits()); |
9968 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPGatherSDNode>( |
9969 | IROrder: dl.getIROrder(), Args&: VTs, Args&: VT, Args&: MMO, Args&: IndexType)); |
9970 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
9971 | ID.AddInteger(I: MMO->getFlags()); |
9972 | void *IP = nullptr; |
9973 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
9974 | cast<VPGatherSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
9975 | return SDValue(E, 0); |
9976 | } |
9977 | |
9978 | auto *N = newSDNode<VPGatherSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, |
9979 | Args&: VT, Args&: MMO, Args&: IndexType); |
9980 | createOperands(Node: N, Vals: Ops); |
9981 | |
9982 | assert(N->getMask().getValueType().getVectorElementCount() == |
9983 | N->getValueType(0).getVectorElementCount() && |
9984 | "Vector width mismatch between mask and data"); |
9985 | assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == |
9986 | N->getValueType(0).getVectorElementCount().isScalable() && |
9987 | "Scalable flags of index and data do not match"); |
9988 | assert(ElementCount::isKnownGE( |
9989 | N->getIndex().getValueType().getVectorElementCount(), |
9990 | N->getValueType(0).getVectorElementCount()) && |
9991 | "Vector width mismatch between index and data"); |
9992 | assert(isa<ConstantSDNode>(N->getScale()) && |
9993 | N->getScale()->getAsAPIntVal().isPowerOf2() && |
9994 | "Scale should be a constant power of 2"); |
9995 | |
9996 | CSEMap.InsertNode(N, InsertPos: IP); |
9997 | InsertNode(N); |
9998 | SDValue V(N, 0); |
9999 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10000 | return V; |
10001 | } |
10002 | |
10003 | SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, |
10004 | ArrayRef<SDValue> Ops, |
10005 | MachineMemOperand *MMO, |
10006 | ISD::MemIndexType IndexType) { |
10007 | assert(Ops.size() == 7 && "Incompatible number of operands"); |
10008 | |
10009 | FoldingSetNodeID ID; |
10010 | AddNodeIDNode(ID, OpC: ISD::VP_SCATTER, VTList: VTs, OpList: Ops); |
10011 | ID.AddInteger(I: VT.getRawBits()); |
10012 | ID.AddInteger(I: getSyntheticNodeSubclassData<VPScatterSDNode>( |
10013 | IROrder: dl.getIROrder(), Args&: VTs, Args&: VT, Args&: MMO, Args&: IndexType)); |
10014 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10015 | ID.AddInteger(I: MMO->getFlags()); |
10016 | void *IP = nullptr; |
10017 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
10018 | cast<VPScatterSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
10019 | return SDValue(E, 0); |
10020 | } |
10021 | auto *N = newSDNode<VPScatterSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, |
10022 | Args&: VT, Args&: MMO, Args&: IndexType); |
10023 | createOperands(Node: N, Vals: Ops); |
10024 | |
10025 | assert(N->getMask().getValueType().getVectorElementCount() == |
10026 | N->getValue().getValueType().getVectorElementCount() && |
10027 | "Vector width mismatch between mask and data"); |
10028 | assert( |
10029 | N->getIndex().getValueType().getVectorElementCount().isScalable() == |
10030 | N->getValue().getValueType().getVectorElementCount().isScalable() && |
10031 | "Scalable flags of index and data do not match"); |
10032 | assert(ElementCount::isKnownGE( |
10033 | N->getIndex().getValueType().getVectorElementCount(), |
10034 | N->getValue().getValueType().getVectorElementCount()) && |
10035 | "Vector width mismatch between index and data"); |
10036 | assert(isa<ConstantSDNode>(N->getScale()) && |
10037 | N->getScale()->getAsAPIntVal().isPowerOf2() && |
10038 | "Scale should be a constant power of 2"); |
10039 | |
10040 | CSEMap.InsertNode(N, InsertPos: IP); |
10041 | InsertNode(N); |
10042 | SDValue V(N, 0); |
10043 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10044 | return V; |
10045 | } |
10046 | |
10047 | SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, |
10048 | SDValue Base, SDValue Offset, SDValue Mask, |
10049 | SDValue PassThru, EVT MemVT, |
10050 | MachineMemOperand *MMO, |
10051 | ISD::MemIndexedMode AM, |
10052 | ISD::LoadExtType ExtTy, bool isExpanding) { |
10053 | bool Indexed = AM != ISD::UNINDEXED; |
10054 | assert((Indexed || Offset.isUndef()) && |
10055 | "Unindexed masked load with an offset!"); |
10056 | SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) |
10057 | : getVTList(VT, MVT::Other); |
10058 | SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; |
10059 | FoldingSetNodeID ID; |
10060 | AddNodeIDNode(ID, OpC: ISD::MLOAD, VTList: VTs, OpList: Ops); |
10061 | ID.AddInteger(I: MemVT.getRawBits()); |
10062 | ID.AddInteger(I: getSyntheticNodeSubclassData<MaskedLoadSDNode>( |
10063 | IROrder: dl.getIROrder(), Args&: VTs, Args&: AM, Args&: ExtTy, Args&: isExpanding, Args&: MemVT, Args&: MMO)); |
10064 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10065 | ID.AddInteger(I: MMO->getFlags()); |
10066 | void *IP = nullptr; |
10067 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
10068 | cast<MaskedLoadSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
10069 | return SDValue(E, 0); |
10070 | } |
10071 | auto *N = newSDNode<MaskedLoadSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, |
10072 | Args&: AM, Args&: ExtTy, Args&: isExpanding, Args&: MemVT, Args&: MMO); |
10073 | createOperands(Node: N, Vals: Ops); |
10074 | |
10075 | CSEMap.InsertNode(N, InsertPos: IP); |
10076 | InsertNode(N); |
10077 | SDValue V(N, 0); |
10078 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10079 | return V; |
10080 | } |
10081 | |
10082 | SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, |
10083 | SDValue Base, SDValue Offset, |
10084 | ISD::MemIndexedMode AM) { |
10085 | MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(Val&: OrigLoad); |
10086 | assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); |
10087 | return getMaskedLoad(VT: OrigLoad.getValueType(), dl, Chain: LD->getChain(), Base, |
10088 | Offset, Mask: LD->getMask(), PassThru: LD->getPassThru(), |
10089 | MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand(), AM, |
10090 | ExtTy: LD->getExtensionType(), isExpanding: LD->isExpandingLoad()); |
10091 | } |
10092 | |
10093 | SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, |
10094 | SDValue Val, SDValue Base, SDValue Offset, |
10095 | SDValue Mask, EVT MemVT, |
10096 | MachineMemOperand *MMO, |
10097 | ISD::MemIndexedMode AM, bool IsTruncating, |
10098 | bool IsCompressing) { |
10099 | assert(Chain.getValueType() == MVT::Other && |
10100 | "Invalid chain type"); |
10101 | bool Indexed = AM != ISD::UNINDEXED; |
10102 | assert((Indexed || Offset.isUndef()) && |
10103 | "Unindexed masked store with an offset!"); |
10104 | SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) |
10105 | : getVTList(MVT::Other); |
10106 | SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; |
10107 | FoldingSetNodeID ID; |
10108 | AddNodeIDNode(ID, OpC: ISD::MSTORE, VTList: VTs, OpList: Ops); |
10109 | ID.AddInteger(I: MemVT.getRawBits()); |
10110 | ID.AddInteger(I: getSyntheticNodeSubclassData<MaskedStoreSDNode>( |
10111 | IROrder: dl.getIROrder(), Args&: VTs, Args&: AM, Args&: IsTruncating, Args&: IsCompressing, Args&: MemVT, Args&: MMO)); |
10112 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10113 | ID.AddInteger(I: MMO->getFlags()); |
10114 | void *IP = nullptr; |
10115 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
10116 | cast<MaskedStoreSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
10117 | return SDValue(E, 0); |
10118 | } |
10119 | auto *N = |
10120 | newSDNode<MaskedStoreSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), Args&: VTs, Args&: AM, |
10121 | Args&: IsTruncating, Args&: IsCompressing, Args&: MemVT, Args&: MMO); |
10122 | createOperands(Node: N, Vals: Ops); |
10123 | |
10124 | CSEMap.InsertNode(N, InsertPos: IP); |
10125 | InsertNode(N); |
10126 | SDValue V(N, 0); |
10127 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10128 | return V; |
10129 | } |
10130 | |
10131 | SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, |
10132 | SDValue Base, SDValue Offset, |
10133 | ISD::MemIndexedMode AM) { |
10134 | MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(Val&: OrigStore); |
10135 | assert(ST->getOffset().isUndef() && |
10136 | "Masked store is already a indexed store!"); |
10137 | return getMaskedStore(Chain: ST->getChain(), dl, Val: ST->getValue(), Base, Offset, |
10138 | Mask: ST->getMask(), MemVT: ST->getMemoryVT(), MMO: ST->getMemOperand(), |
10139 | AM, IsTruncating: ST->isTruncatingStore(), IsCompressing: ST->isCompressingStore()); |
10140 | } |
10141 | |
10142 | SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, |
10143 | ArrayRef<SDValue> Ops, |
10144 | MachineMemOperand *MMO, |
10145 | ISD::MemIndexType IndexType, |
10146 | ISD::LoadExtType ExtTy) { |
10147 | assert(Ops.size() == 6 && "Incompatible number of operands"); |
10148 | |
10149 | FoldingSetNodeID ID; |
10150 | AddNodeIDNode(ID, OpC: ISD::MGATHER, VTList: VTs, OpList: Ops); |
10151 | ID.AddInteger(I: MemVT.getRawBits()); |
10152 | ID.AddInteger(I: getSyntheticNodeSubclassData<MaskedGatherSDNode>( |
10153 | IROrder: dl.getIROrder(), Args&: VTs, Args&: MemVT, Args&: MMO, Args&: IndexType, Args&: ExtTy)); |
10154 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10155 | ID.AddInteger(I: MMO->getFlags()); |
10156 | void *IP = nullptr; |
10157 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
10158 | cast<MaskedGatherSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
10159 | return SDValue(E, 0); |
10160 | } |
10161 | |
10162 | auto *N = newSDNode<MaskedGatherSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), |
10163 | Args&: VTs, Args&: MemVT, Args&: MMO, Args&: IndexType, Args&: ExtTy); |
10164 | createOperands(Node: N, Vals: Ops); |
10165 | |
10166 | assert(N->getPassThru().getValueType() == N->getValueType(0) && |
10167 | "Incompatible type of the PassThru value in MaskedGatherSDNode"); |
10168 | assert(N->getMask().getValueType().getVectorElementCount() == |
10169 | N->getValueType(0).getVectorElementCount() && |
10170 | "Vector width mismatch between mask and data"); |
10171 | assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == |
10172 | N->getValueType(0).getVectorElementCount().isScalable() && |
10173 | "Scalable flags of index and data do not match"); |
10174 | assert(ElementCount::isKnownGE( |
10175 | N->getIndex().getValueType().getVectorElementCount(), |
10176 | N->getValueType(0).getVectorElementCount()) && |
10177 | "Vector width mismatch between index and data"); |
10178 | assert(isa<ConstantSDNode>(N->getScale()) && |
10179 | N->getScale()->getAsAPIntVal().isPowerOf2() && |
10180 | "Scale should be a constant power of 2"); |
10181 | |
10182 | CSEMap.InsertNode(N, InsertPos: IP); |
10183 | InsertNode(N); |
10184 | SDValue V(N, 0); |
10185 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10186 | return V; |
10187 | } |
10188 | |
10189 | SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, |
10190 | ArrayRef<SDValue> Ops, |
10191 | MachineMemOperand *MMO, |
10192 | ISD::MemIndexType IndexType, |
10193 | bool IsTrunc) { |
10194 | assert(Ops.size() == 6 && "Incompatible number of operands"); |
10195 | |
10196 | FoldingSetNodeID ID; |
10197 | AddNodeIDNode(ID, OpC: ISD::MSCATTER, VTList: VTs, OpList: Ops); |
10198 | ID.AddInteger(I: MemVT.getRawBits()); |
10199 | ID.AddInteger(I: getSyntheticNodeSubclassData<MaskedScatterSDNode>( |
10200 | IROrder: dl.getIROrder(), Args&: VTs, Args&: MemVT, Args&: MMO, Args&: IndexType, Args&: IsTrunc)); |
10201 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10202 | ID.AddInteger(I: MMO->getFlags()); |
10203 | void *IP = nullptr; |
10204 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
10205 | cast<MaskedScatterSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
10206 | return SDValue(E, 0); |
10207 | } |
10208 | |
10209 | auto *N = newSDNode<MaskedScatterSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), |
10210 | Args&: VTs, Args&: MemVT, Args&: MMO, Args&: IndexType, Args&: IsTrunc); |
10211 | createOperands(Node: N, Vals: Ops); |
10212 | |
10213 | assert(N->getMask().getValueType().getVectorElementCount() == |
10214 | N->getValue().getValueType().getVectorElementCount() && |
10215 | "Vector width mismatch between mask and data"); |
10216 | assert( |
10217 | N->getIndex().getValueType().getVectorElementCount().isScalable() == |
10218 | N->getValue().getValueType().getVectorElementCount().isScalable() && |
10219 | "Scalable flags of index and data do not match"); |
10220 | assert(ElementCount::isKnownGE( |
10221 | N->getIndex().getValueType().getVectorElementCount(), |
10222 | N->getValue().getValueType().getVectorElementCount()) && |
10223 | "Vector width mismatch between index and data"); |
10224 | assert(isa<ConstantSDNode>(N->getScale()) && |
10225 | N->getScale()->getAsAPIntVal().isPowerOf2() && |
10226 | "Scale should be a constant power of 2"); |
10227 | |
10228 | CSEMap.InsertNode(N, InsertPos: IP); |
10229 | InsertNode(N); |
10230 | SDValue V(N, 0); |
10231 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10232 | return V; |
10233 | } |
10234 | |
10235 | SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, |
10236 | const SDLoc &dl, ArrayRef<SDValue> Ops, |
10237 | MachineMemOperand *MMO, |
10238 | ISD::MemIndexType IndexType) { |
10239 | assert(Ops.size() == 7 && "Incompatible number of operands"); |
10240 | |
10241 | FoldingSetNodeID ID; |
10242 | AddNodeIDNode(ID, OpC: ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, VTList: VTs, OpList: Ops); |
10243 | ID.AddInteger(I: MemVT.getRawBits()); |
10244 | ID.AddInteger(I: getSyntheticNodeSubclassData<MaskedHistogramSDNode>( |
10245 | IROrder: dl.getIROrder(), Args&: VTs, Args&: MemVT, Args&: MMO, Args&: IndexType)); |
10246 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10247 | ID.AddInteger(I: MMO->getFlags()); |
10248 | void *IP = nullptr; |
10249 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) { |
10250 | cast<MaskedGatherSDNode>(Val: E)->refineAlignment(NewMMO: MMO); |
10251 | return SDValue(E, 0); |
10252 | } |
10253 | |
10254 | auto *N = newSDNode<MaskedHistogramSDNode>(Args: dl.getIROrder(), Args: dl.getDebugLoc(), |
10255 | Args&: VTs, Args&: MemVT, Args&: MMO, Args&: IndexType); |
10256 | createOperands(Node: N, Vals: Ops); |
10257 | |
10258 | assert(N->getMask().getValueType().getVectorElementCount() == |
10259 | N->getIndex().getValueType().getVectorElementCount() && |
10260 | "Vector width mismatch between mask and data"); |
10261 | assert(isa<ConstantSDNode>(N->getScale()) && |
10262 | N->getScale()->getAsAPIntVal().isPowerOf2() && |
10263 | "Scale should be a constant power of 2"); |
10264 | assert(N->getInc().getValueType().isInteger() && "Non integer update value"); |
10265 | |
10266 | CSEMap.InsertNode(N, InsertPos: IP); |
10267 | InsertNode(N); |
10268 | SDValue V(N, 0); |
10269 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10270 | return V; |
10271 | } |
10272 | |
10273 | SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, |
10274 | EVT MemVT, MachineMemOperand *MMO) { |
10275 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
10276 | SDVTList VTs = getVTList(MVT::Other); |
10277 | SDValue Ops[] = {Chain, Ptr}; |
10278 | FoldingSetNodeID ID; |
10279 | AddNodeIDNode(ID, OpC: ISD::GET_FPENV_MEM, VTList: VTs, OpList: Ops); |
10280 | ID.AddInteger(I: MemVT.getRawBits()); |
10281 | ID.AddInteger(I: getSyntheticNodeSubclassData<FPStateAccessSDNode>( |
10282 | Opc: ISD::GET_FPENV_MEM, Order: dl.getIROrder(), VTs, MemoryVT: MemVT, MMO)); |
10283 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10284 | ID.AddInteger(I: MMO->getFlags()); |
10285 | void *IP = nullptr; |
10286 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) |
10287 | return SDValue(E, 0); |
10288 | |
10289 | auto *N = newSDNode<FPStateAccessSDNode>(Args: ISD::GET_FPENV_MEM, Args: dl.getIROrder(), |
10290 | Args: dl.getDebugLoc(), Args&: VTs, Args&: MemVT, Args&: MMO); |
10291 | createOperands(Node: N, Vals: Ops); |
10292 | |
10293 | CSEMap.InsertNode(N, InsertPos: IP); |
10294 | InsertNode(N); |
10295 | SDValue V(N, 0); |
10296 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10297 | return V; |
10298 | } |
10299 | |
10300 | SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, |
10301 | EVT MemVT, MachineMemOperand *MMO) { |
10302 | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
10303 | SDVTList VTs = getVTList(MVT::Other); |
10304 | SDValue Ops[] = {Chain, Ptr}; |
10305 | FoldingSetNodeID ID; |
10306 | AddNodeIDNode(ID, OpC: ISD::SET_FPENV_MEM, VTList: VTs, OpList: Ops); |
10307 | ID.AddInteger(I: MemVT.getRawBits()); |
10308 | ID.AddInteger(I: getSyntheticNodeSubclassData<FPStateAccessSDNode>( |
10309 | Opc: ISD::SET_FPENV_MEM, Order: dl.getIROrder(), VTs, MemoryVT: MemVT, MMO)); |
10310 | ID.AddInteger(I: MMO->getPointerInfo().getAddrSpace()); |
10311 | ID.AddInteger(I: MMO->getFlags()); |
10312 | void *IP = nullptr; |
10313 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: dl, InsertPos&: IP)) |
10314 | return SDValue(E, 0); |
10315 | |
10316 | auto *N = newSDNode<FPStateAccessSDNode>(Args: ISD::SET_FPENV_MEM, Args: dl.getIROrder(), |
10317 | Args: dl.getDebugLoc(), Args&: VTs, Args&: MemVT, Args&: MMO); |
10318 | createOperands(Node: N, Vals: Ops); |
10319 | |
10320 | CSEMap.InsertNode(N, InsertPos: IP); |
10321 | InsertNode(N); |
10322 | SDValue V(N, 0); |
10323 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10324 | return V; |
10325 | } |
10326 | |
10327 | SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { |
10328 | // select undef, T, F --> T (if T is a constant), otherwise F |
10329 | // select, ?, undef, F --> F |
10330 | // select, ?, T, undef --> T |
10331 | if (Cond.isUndef()) |
10332 | return isConstantValueOfAnyType(N: T) ? T : F; |
10333 | if (T.isUndef()) |
10334 | return F; |
10335 | if (F.isUndef()) |
10336 | return T; |
10337 | |
10338 | // select true, T, F --> T |
10339 | // select false, T, F --> F |
10340 | if (auto C = isBoolConstant(N: Cond, /*AllowTruncation=*/true)) |
10341 | return *C ? T : F; |
10342 | |
10343 | // select ?, T, T --> T |
10344 | if (T == F) |
10345 | return T; |
10346 | |
10347 | return SDValue(); |
10348 | } |
10349 | |
10350 | SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { |
10351 | // shift undef, Y --> 0 (can always assume that the undef value is 0) |
10352 | if (X.isUndef()) |
10353 | return getConstant(Val: 0, DL: SDLoc(X.getNode()), VT: X.getValueType()); |
10354 | // shift X, undef --> undef (because it may shift by the bitwidth) |
10355 | if (Y.isUndef()) |
10356 | return getUNDEF(VT: X.getValueType()); |
10357 | |
10358 | // shift 0, Y --> 0 |
10359 | // shift X, 0 --> X |
10360 | if (isNullOrNullSplat(V: X) || isNullOrNullSplat(V: Y)) |
10361 | return X; |
10362 | |
10363 | // shift X, C >= bitwidth(X) --> undef |
10364 | // All vector elements must be too big (or undef) to avoid partial undefs. |
10365 | auto isShiftTooBig = [X](ConstantSDNode *Val) { |
10366 | return !Val || Val->getAPIntValue().uge(RHS: X.getScalarValueSizeInBits()); |
10367 | }; |
10368 | if (ISD::matchUnaryPredicate(Op: Y, Match: isShiftTooBig, AllowUndefs: true)) |
10369 | return getUNDEF(VT: X.getValueType()); |
10370 | |
10371 | // shift i1/vXi1 X, Y --> X (any non-zero shift amount is undefined). |
10372 | if (X.getValueType().getScalarType() == MVT::i1) |
10373 | return X; |
10374 | |
10375 | return SDValue(); |
10376 | } |
10377 | |
10378 | SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, |
10379 | SDNodeFlags Flags) { |
10380 | // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand |
10381 | // (an undef operand can be chosen to be Nan/Inf), then the result of this |
10382 | // operation is poison. That result can be relaxed to undef. |
10383 | ConstantFPSDNode *XC = isConstOrConstSplatFP(N: X, /* AllowUndefs */ true); |
10384 | ConstantFPSDNode *YC = isConstOrConstSplatFP(N: Y, /* AllowUndefs */ true); |
10385 | bool HasNan = (XC && XC->getValueAPF().isNaN()) || |
10386 | (YC && YC->getValueAPF().isNaN()); |
10387 | bool HasInf = (XC && XC->getValueAPF().isInfinity()) || |
10388 | (YC && YC->getValueAPF().isInfinity()); |
10389 | |
10390 | if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef())) |
10391 | return getUNDEF(VT: X.getValueType()); |
10392 | |
10393 | if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef())) |
10394 | return getUNDEF(VT: X.getValueType()); |
10395 | |
10396 | if (!YC) |
10397 | return SDValue(); |
10398 | |
10399 | // X + -0.0 --> X |
10400 | if (Opcode == ISD::FADD) |
10401 | if (YC->getValueAPF().isNegZero()) |
10402 | return X; |
10403 | |
10404 | // X - +0.0 --> X |
10405 | if (Opcode == ISD::FSUB) |
10406 | if (YC->getValueAPF().isPosZero()) |
10407 | return X; |
10408 | |
10409 | // X * 1.0 --> X |
10410 | // X / 1.0 --> X |
10411 | if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) |
10412 | if (YC->getValueAPF().isExactlyValue(V: 1.0)) |
10413 | return X; |
10414 | |
10415 | // X * 0.0 --> 0.0 |
10416 | if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros()) |
10417 | if (YC->getValueAPF().isZero()) |
10418 | return getConstantFP(Val: 0.0, DL: SDLoc(Y), VT: Y.getValueType()); |
10419 | |
10420 | return SDValue(); |
10421 | } |
10422 | |
10423 | SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, |
10424 | SDValue Ptr, SDValue SV, unsigned Align) { |
10425 | SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; |
10426 | return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); |
10427 | } |
10428 | |
10429 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
10430 | ArrayRef<SDUse> Ops) { |
10431 | switch (Ops.size()) { |
10432 | case 0: return getNode(Opcode, DL, VT); |
10433 | case 1: return getNode(Opcode, DL, VT, N1: static_cast<const SDValue>(Ops[0])); |
10434 | case 2: return getNode(Opcode, DL, VT, N1: Ops[0], N2: Ops[1]); |
10435 | case 3: return getNode(Opcode, DL, VT, N1: Ops[0], N2: Ops[1], N3: Ops[2]); |
10436 | default: break; |
10437 | } |
10438 | |
10439 | // Copy from an SDUse array into an SDValue array for use with |
10440 | // the regular getNode logic. |
10441 | SmallVector<SDValue, 8> NewOps(Ops); |
10442 | return getNode(Opcode, DL, VT, Ops: NewOps); |
10443 | } |
10444 | |
10445 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
10446 | ArrayRef<SDValue> Ops) { |
10447 | SDNodeFlags Flags; |
10448 | if (Inserter) |
10449 | Flags = Inserter->getFlags(); |
10450 | return getNode(Opcode, DL, VT, Ops, Flags); |
10451 | } |
10452 | |
10453 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
10454 | ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { |
10455 | unsigned NumOps = Ops.size(); |
10456 | switch (NumOps) { |
10457 | case 0: return getNode(Opcode, DL, VT); |
10458 | case 1: return getNode(Opcode, DL, VT, N1: Ops[0], Flags); |
10459 | case 2: return getNode(Opcode, DL, VT, N1: Ops[0], N2: Ops[1], Flags); |
10460 | case 3: return getNode(Opcode, DL, VT, N1: Ops[0], N2: Ops[1], N3: Ops[2], Flags); |
10461 | default: break; |
10462 | } |
10463 | |
10464 | #ifndef NDEBUG |
10465 | for (const auto &Op : Ops) |
10466 | assert(Op.getOpcode() != ISD::DELETED_NODE && |
10467 | "Operand is DELETED_NODE!"); |
10468 | #endif |
10469 | |
10470 | switch (Opcode) { |
10471 | default: break; |
10472 | case ISD::BUILD_VECTOR: |
10473 | // Attempt to simplify BUILD_VECTOR. |
10474 | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, DAG&: *this)) |
10475 | return V; |
10476 | break; |
10477 | case ISD::CONCAT_VECTORS: |
10478 | if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, DAG&: *this)) |
10479 | return V; |
10480 | break; |
10481 | case ISD::SELECT_CC: |
10482 | assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); |
10483 | assert(Ops[0].getValueType() == Ops[1].getValueType() && |
10484 | "LHS and RHS of condition must have same type!"); |
10485 | assert(Ops[2].getValueType() == Ops[3].getValueType() && |
10486 | "True and False arms of SelectCC must have same type!"); |
10487 | assert(Ops[2].getValueType() == VT && |
10488 | "select_cc node must be of same type as true and false value!"); |
10489 | assert((!Ops[0].getValueType().isVector() || |
10490 | Ops[0].getValueType().getVectorElementCount() == |
10491 | VT.getVectorElementCount()) && |
10492 | "Expected select_cc with vector result to have the same sized " |
10493 | "comparison type!"); |
10494 | break; |
10495 | case ISD::BR_CC: |
10496 | assert(NumOps == 5 && "BR_CC takes 5 operands!"); |
10497 | assert(Ops[2].getValueType() == Ops[3].getValueType() && |
10498 | "LHS/RHS of comparison should match types!"); |
10499 | break; |
10500 | case ISD::VP_ADD: |
10501 | case ISD::VP_SUB: |
10502 | // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR |
10503 | if (VT.getScalarType() == MVT::i1) |
10504 | Opcode = ISD::VP_XOR; |
10505 | break; |
10506 | case ISD::VP_MUL: |
10507 | // If it is VP_MUL mask operation then turn it to VP_AND |
10508 | if (VT.getScalarType() == MVT::i1) |
10509 | Opcode = ISD::VP_AND; |
10510 | break; |
10511 | case ISD::VP_REDUCE_MUL: |
10512 | // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND |
10513 | if (VT == MVT::i1) |
10514 | Opcode = ISD::VP_REDUCE_AND; |
10515 | break; |
10516 | case ISD::VP_REDUCE_ADD: |
10517 | // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR |
10518 | if (VT == MVT::i1) |
10519 | Opcode = ISD::VP_REDUCE_XOR; |
10520 | break; |
10521 | case ISD::VP_REDUCE_SMAX: |
10522 | case ISD::VP_REDUCE_UMIN: |
10523 | // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to |
10524 | // VP_REDUCE_AND. |
10525 | if (VT == MVT::i1) |
10526 | Opcode = ISD::VP_REDUCE_AND; |
10527 | break; |
10528 | case ISD::VP_REDUCE_SMIN: |
10529 | case ISD::VP_REDUCE_UMAX: |
10530 | // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to |
10531 | // VP_REDUCE_OR. |
10532 | if (VT == MVT::i1) |
10533 | Opcode = ISD::VP_REDUCE_OR; |
10534 | break; |
10535 | } |
10536 | |
10537 | // Memoize nodes. |
10538 | SDNode *N; |
10539 | SDVTList VTs = getVTList(VT); |
10540 | |
10541 | if (VT != MVT::Glue) { |
10542 | FoldingSetNodeID ID; |
10543 | AddNodeIDNode(ID, OpC: Opcode, VTList: VTs, OpList: Ops); |
10544 | void *IP = nullptr; |
10545 | |
10546 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
10547 | E->intersectFlagsWith(Flags); |
10548 | return SDValue(E, 0); |
10549 | } |
10550 | |
10551 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
10552 | createOperands(Node: N, Vals: Ops); |
10553 | |
10554 | CSEMap.InsertNode(N, InsertPos: IP); |
10555 | } else { |
10556 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
10557 | createOperands(Node: N, Vals: Ops); |
10558 | } |
10559 | |
10560 | N->setFlags(Flags); |
10561 | InsertNode(N); |
10562 | SDValue V(N, 0); |
10563 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10564 | return V; |
10565 | } |
10566 | |
10567 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, |
10568 | ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { |
10569 | return getNode(Opcode, DL, VTList: getVTList(VTs: ResultTys), Ops); |
10570 | } |
10571 | |
10572 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10573 | ArrayRef<SDValue> Ops) { |
10574 | SDNodeFlags Flags; |
10575 | if (Inserter) |
10576 | Flags = Inserter->getFlags(); |
10577 | return getNode(Opcode, DL, VTList, Ops, Flags); |
10578 | } |
10579 | |
10580 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10581 | ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { |
10582 | if (VTList.NumVTs == 1) |
10583 | return getNode(Opcode, DL, VT: VTList.VTs[0], Ops, Flags); |
10584 | |
10585 | #ifndef NDEBUG |
10586 | for (const auto &Op : Ops) |
10587 | assert(Op.getOpcode() != ISD::DELETED_NODE && |
10588 | "Operand is DELETED_NODE!"); |
10589 | #endif |
10590 | |
10591 | switch (Opcode) { |
10592 | case ISD::SADDO: |
10593 | case ISD::UADDO: |
10594 | case ISD::SSUBO: |
10595 | case ISD::USUBO: { |
10596 | assert(VTList.NumVTs == 2 && Ops.size() == 2 && |
10597 | "Invalid add/sub overflow op!"); |
10598 | assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && |
10599 | Ops[0].getValueType() == Ops[1].getValueType() && |
10600 | Ops[0].getValueType() == VTList.VTs[0] && |
10601 | "Binary operator types must match!"); |
10602 | SDValue N1 = Ops[0], N2 = Ops[1]; |
10603 | canonicalizeCommutativeBinop(Opcode, N1, N2); |
10604 | |
10605 | // (X +- 0) -> X with zero-overflow. |
10606 | ConstantSDNode *N2CV = isConstOrConstSplat(N: N2, /*AllowUndefs*/ false, |
10607 | /*AllowTruncation*/ true); |
10608 | if (N2CV && N2CV->isZero()) { |
10609 | SDValue ZeroOverFlow = getConstant(Val: 0, DL, VT: VTList.VTs[1]); |
10610 | return getNode(Opcode: ISD::MERGE_VALUES, DL, VTList, Ops: {N1, ZeroOverFlow}, Flags); |
10611 | } |
10612 | |
10613 | if (VTList.VTs[0].getScalarType() == MVT::i1 && |
10614 | VTList.VTs[1].getScalarType() == MVT::i1) { |
10615 | SDValue F1 = getFreeze(V: N1); |
10616 | SDValue F2 = getFreeze(V: N2); |
10617 | // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)} |
10618 | if (Opcode == ISD::UADDO || Opcode == ISD::SADDO) |
10619 | return getNode(Opcode: ISD::MERGE_VALUES, DL, VTList, |
10620 | Ops: {getNode(Opcode: ISD::XOR, DL, VT: VTList.VTs[0], N1: F1, N2: F2), |
10621 | getNode(Opcode: ISD::AND, DL, VT: VTList.VTs[1], N1: F1, N2: F2)}, |
10622 | Flags); |
10623 | // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)} |
10624 | if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) { |
10625 | SDValue NotF1 = getNOT(DL, Val: F1, VT: VTList.VTs[0]); |
10626 | return getNode(Opcode: ISD::MERGE_VALUES, DL, VTList, |
10627 | Ops: {getNode(Opcode: ISD::XOR, DL, VT: VTList.VTs[0], N1: F1, N2: F2), |
10628 | getNode(Opcode: ISD::AND, DL, VT: VTList.VTs[1], N1: NotF1, N2: F2)}, |
10629 | Flags); |
10630 | } |
10631 | } |
10632 | break; |
10633 | } |
10634 | case ISD::SADDO_CARRY: |
10635 | case ISD::UADDO_CARRY: |
10636 | case ISD::SSUBO_CARRY: |
10637 | case ISD::USUBO_CARRY: |
10638 | assert(VTList.NumVTs == 2 && Ops.size() == 3 && |
10639 | "Invalid add/sub overflow op!"); |
10640 | assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && |
10641 | Ops[0].getValueType() == Ops[1].getValueType() && |
10642 | Ops[0].getValueType() == VTList.VTs[0] && |
10643 | Ops[2].getValueType() == VTList.VTs[1] && |
10644 | "Binary operator types must match!"); |
10645 | break; |
10646 | case ISD::SMUL_LOHI: |
10647 | case ISD::UMUL_LOHI: { |
10648 | assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!"); |
10649 | assert(VTList.VTs[0].isInteger() && VTList.VTs[0] == VTList.VTs[1] && |
10650 | VTList.VTs[0] == Ops[0].getValueType() && |
10651 | VTList.VTs[0] == Ops[1].getValueType() && |
10652 | "Binary operator types must match!"); |
10653 | // Constant fold. |
10654 | ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Val: Ops[0]); |
10655 | ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Ops[1]); |
10656 | if (LHS && RHS) { |
10657 | unsigned Width = VTList.VTs[0].getScalarSizeInBits(); |
10658 | unsigned OutWidth = Width * 2; |
10659 | APInt Val = LHS->getAPIntValue(); |
10660 | APInt Mul = RHS->getAPIntValue(); |
10661 | if (Opcode == ISD::SMUL_LOHI) { |
10662 | Val = Val.sext(width: OutWidth); |
10663 | Mul = Mul.sext(width: OutWidth); |
10664 | } else { |
10665 | Val = Val.zext(width: OutWidth); |
10666 | Mul = Mul.zext(width: OutWidth); |
10667 | } |
10668 | Val *= Mul; |
10669 | |
10670 | SDValue Hi = |
10671 | getConstant(Val: Val.extractBits(numBits: Width, bitPosition: Width), DL, VT: VTList.VTs[0]); |
10672 | SDValue Lo = getConstant(Val: Val.trunc(width: Width), DL, VT: VTList.VTs[0]); |
10673 | return getNode(Opcode: ISD::MERGE_VALUES, DL, VTList, Ops: {Lo, Hi}, Flags); |
10674 | } |
10675 | break; |
10676 | } |
10677 | case ISD::FFREXP: { |
10678 | assert(VTList.NumVTs == 2 && Ops.size() == 1 && "Invalid ffrexp op!"); |
10679 | assert(VTList.VTs[0].isFloatingPoint() && VTList.VTs[1].isInteger() && |
10680 | VTList.VTs[0] == Ops[0].getValueType() && "frexp type mismatch"); |
10681 | |
10682 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val: Ops[0])) { |
10683 | int FrexpExp; |
10684 | APFloat FrexpMant = |
10685 | frexp(X: C->getValueAPF(), Exp&: FrexpExp, RM: APFloat::rmNearestTiesToEven); |
10686 | SDValue Result0 = getConstantFP(V: FrexpMant, DL, VT: VTList.VTs[0]); |
10687 | SDValue Result1 = |
10688 | getConstant(Val: FrexpMant.isFinite() ? FrexpExp : 0, DL, VT: VTList.VTs[1]); |
10689 | return getNode(Opcode: ISD::MERGE_VALUES, DL, VTList, Ops: {Result0, Result1}, Flags); |
10690 | } |
10691 | |
10692 | break; |
10693 | } |
10694 | case ISD::STRICT_FP_EXTEND: |
10695 | assert(VTList.NumVTs == 2 && Ops.size() == 2 && |
10696 | "Invalid STRICT_FP_EXTEND!"); |
10697 | assert(VTList.VTs[0].isFloatingPoint() && |
10698 | Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!"); |
10699 | assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && |
10700 | "STRICT_FP_EXTEND result type should be vector iff the operand " |
10701 | "type is vector!"); |
10702 | assert((!VTList.VTs[0].isVector() || |
10703 | VTList.VTs[0].getVectorElementCount() == |
10704 | Ops[1].getValueType().getVectorElementCount()) && |
10705 | "Vector element count mismatch!"); |
10706 | assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) && |
10707 | "Invalid fpext node, dst <= src!"); |
10708 | break; |
10709 | case ISD::STRICT_FP_ROUND: |
10710 | assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!"); |
10711 | assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && |
10712 | "STRICT_FP_ROUND result type should be vector iff the operand " |
10713 | "type is vector!"); |
10714 | assert((!VTList.VTs[0].isVector() || |
10715 | VTList.VTs[0].getVectorElementCount() == |
10716 | Ops[1].getValueType().getVectorElementCount()) && |
10717 | "Vector element count mismatch!"); |
10718 | assert(VTList.VTs[0].isFloatingPoint() && |
10719 | Ops[1].getValueType().isFloatingPoint() && |
10720 | VTList.VTs[0].bitsLT(Ops[1].getValueType()) && |
10721 | Ops[2].getOpcode() == ISD::TargetConstant && |
10722 | (Ops[2]->getAsZExtVal() == 0 || Ops[2]->getAsZExtVal() == 1) && |
10723 | "Invalid STRICT_FP_ROUND!"); |
10724 | break; |
10725 | #if 0 |
10726 | // FIXME: figure out how to safely handle things like |
10727 | // int foo(int x) { return 1 << (x & 255); } |
10728 | // int bar() { return foo(256); } |
10729 | case ISD::SRA_PARTS: |
10730 | case ISD::SRL_PARTS: |
10731 | case ISD::SHL_PARTS: |
10732 | if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && |
10733 | cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) |
10734 | return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); |
10735 | else if (N3.getOpcode() == ISD::AND) |
10736 | if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { |
10737 | // If the and is only masking out bits that cannot effect the shift, |
10738 | // eliminate the and. |
10739 | unsigned NumBits = VT.getScalarSizeInBits()*2; |
10740 | if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) |
10741 | return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); |
10742 | } |
10743 | break; |
10744 | #endif |
10745 | } |
10746 | |
10747 | // Memoize the node unless it returns a glue result. |
10748 | SDNode *N; |
10749 | if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { |
10750 | FoldingSetNodeID ID; |
10751 | AddNodeIDNode(ID, OpC: Opcode, VTList, OpList: Ops); |
10752 | void *IP = nullptr; |
10753 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
10754 | E->intersectFlagsWith(Flags); |
10755 | return SDValue(E, 0); |
10756 | } |
10757 | |
10758 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTList); |
10759 | createOperands(Node: N, Vals: Ops); |
10760 | CSEMap.InsertNode(N, InsertPos: IP); |
10761 | } else { |
10762 | N = newSDNode<SDNode>(Args&: Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTList); |
10763 | createOperands(Node: N, Vals: Ops); |
10764 | } |
10765 | |
10766 | N->setFlags(Flags); |
10767 | InsertNode(N); |
10768 | SDValue V(N, 0); |
10769 | NewSDValueDbgMsg(V, Msg: "Creating new node: ", G: this); |
10770 | return V; |
10771 | } |
10772 | |
10773 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, |
10774 | SDVTList VTList) { |
10775 | return getNode(Opcode, DL, VTList, Ops: ArrayRef<SDValue>()); |
10776 | } |
10777 | |
10778 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10779 | SDValue N1) { |
10780 | SDValue Ops[] = { N1 }; |
10781 | return getNode(Opcode, DL, VTList, Ops); |
10782 | } |
10783 | |
10784 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10785 | SDValue N1, SDValue N2) { |
10786 | SDValue Ops[] = { N1, N2 }; |
10787 | return getNode(Opcode, DL, VTList, Ops); |
10788 | } |
10789 | |
10790 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10791 | SDValue N1, SDValue N2, SDValue N3) { |
10792 | SDValue Ops[] = { N1, N2, N3 }; |
10793 | return getNode(Opcode, DL, VTList, Ops); |
10794 | } |
10795 | |
10796 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10797 | SDValue N1, SDValue N2, SDValue N3, SDValue N4) { |
10798 | SDValue Ops[] = { N1, N2, N3, N4 }; |
10799 | return getNode(Opcode, DL, VTList, Ops); |
10800 | } |
10801 | |
10802 | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
10803 | SDValue N1, SDValue N2, SDValue N3, SDValue N4, |
10804 | SDValue N5) { |
10805 | SDValue Ops[] = { N1, N2, N3, N4, N5 }; |
10806 | return getNode(Opcode, DL, VTList, Ops); |
10807 | } |
10808 | |
10809 | SDVTList SelectionDAG::getVTList(EVT VT) { |
10810 | if (!VT.isExtended()) |
10811 | return makeVTList(VTs: SDNode::getValueTypeList(VT: VT.getSimpleVT()), NumVTs: 1); |
10812 | |
10813 | return makeVTList(VTs: &(*EVTs.insert(x: VT).first), NumVTs: 1); |
10814 | } |
10815 | |
10816 | SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { |
10817 | FoldingSetNodeID ID; |
10818 | ID.AddInteger(I: 2U); |
10819 | ID.AddInteger(I: VT1.getRawBits()); |
10820 | ID.AddInteger(I: VT2.getRawBits()); |
10821 | |
10822 | void *IP = nullptr; |
10823 | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, InsertPos&: IP); |
10824 | if (!Result) { |
10825 | EVT *Array = Allocator.Allocate<EVT>(Num: 2); |
10826 | Array[0] = VT1; |
10827 | Array[1] = VT2; |
10828 | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); |
10829 | VTListMap.InsertNode(N: Result, InsertPos: IP); |
10830 | } |
10831 | return Result->getSDVTList(); |
10832 | } |
10833 | |
10834 | SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { |
10835 | FoldingSetNodeID ID; |
10836 | ID.AddInteger(I: 3U); |
10837 | ID.AddInteger(I: VT1.getRawBits()); |
10838 | ID.AddInteger(I: VT2.getRawBits()); |
10839 | ID.AddInteger(I: VT3.getRawBits()); |
10840 | |
10841 | void *IP = nullptr; |
10842 | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, InsertPos&: IP); |
10843 | if (!Result) { |
10844 | EVT *Array = Allocator.Allocate<EVT>(Num: 3); |
10845 | Array[0] = VT1; |
10846 | Array[1] = VT2; |
10847 | Array[2] = VT3; |
10848 | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); |
10849 | VTListMap.InsertNode(N: Result, InsertPos: IP); |
10850 | } |
10851 | return Result->getSDVTList(); |
10852 | } |
10853 | |
10854 | SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { |
10855 | FoldingSetNodeID ID; |
10856 | ID.AddInteger(I: 4U); |
10857 | ID.AddInteger(I: VT1.getRawBits()); |
10858 | ID.AddInteger(I: VT2.getRawBits()); |
10859 | ID.AddInteger(I: VT3.getRawBits()); |
10860 | ID.AddInteger(I: VT4.getRawBits()); |
10861 | |
10862 | void *IP = nullptr; |
10863 | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, InsertPos&: IP); |
10864 | if (!Result) { |
10865 | EVT *Array = Allocator.Allocate<EVT>(Num: 4); |
10866 | Array[0] = VT1; |
10867 | Array[1] = VT2; |
10868 | Array[2] = VT3; |
10869 | Array[3] = VT4; |
10870 | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); |
10871 | VTListMap.InsertNode(N: Result, InsertPos: IP); |
10872 | } |
10873 | return Result->getSDVTList(); |
10874 | } |
10875 | |
10876 | SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { |
10877 | unsigned NumVTs = VTs.size(); |
10878 | FoldingSetNodeID ID; |
10879 | ID.AddInteger(I: NumVTs); |
10880 | for (unsigned index = 0; index < NumVTs; index++) { |
10881 | ID.AddInteger(I: VTs[index].getRawBits()); |
10882 | } |
10883 | |
10884 | void *IP = nullptr; |
10885 | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, InsertPos&: IP); |
10886 | if (!Result) { |
10887 | EVT *Array = Allocator.Allocate<EVT>(Num: NumVTs); |
10888 | llvm::copy(Range&: VTs, Out: Array); |
10889 | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); |
10890 | VTListMap.InsertNode(N: Result, InsertPos: IP); |
10891 | } |
10892 | return Result->getSDVTList(); |
10893 | } |
10894 | |
10895 | |
10896 | /// UpdateNodeOperands - *Mutate* the specified node in-place to have the |
10897 | /// specified operands. If the resultant node already exists in the DAG, |
10898 | /// this does not modify the specified node, instead it returns the node that |
10899 | /// already exists. If the resultant node does not exist in the DAG, the |
10900 | /// input node is returned. As a degenerate case, if you specify the same |
10901 | /// input operands as the node already has, the input node is returned. |
10902 | SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { |
10903 | assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); |
10904 | |
10905 | // Check to see if there is no change. |
10906 | if (Op == N->getOperand(Num: 0)) return N; |
10907 | |
10908 | // See if the modified node already exists. |
10909 | void *InsertPos = nullptr; |
10910 | if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) |
10911 | return Existing; |
10912 | |
10913 | // Nope it doesn't. Remove the node from its current place in the maps. |
10914 | if (InsertPos) |
10915 | if (!RemoveNodeFromCSEMaps(N)) |
10916 | InsertPos = nullptr; |
10917 | |
10918 | // Now we update the operands. |
10919 | N->OperandList[0].set(Op); |
10920 | |
10921 | updateDivergence(N); |
10922 | // If this gets put into a CSE map, add it. |
10923 | if (InsertPos) CSEMap.InsertNode(N, InsertPos); |
10924 | return N; |
10925 | } |
10926 | |
10927 | SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { |
10928 | assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); |
10929 | |
10930 | // Check to see if there is no change. |
10931 | if (Op1 == N->getOperand(Num: 0) && Op2 == N->getOperand(Num: 1)) |
10932 | return N; // No operands changed, just return the input node. |
10933 | |
10934 | // See if the modified node already exists. |
10935 | void *InsertPos = nullptr; |
10936 | if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) |
10937 | return Existing; |
10938 | |
10939 | // Nope it doesn't. Remove the node from its current place in the maps. |
10940 | if (InsertPos) |
10941 | if (!RemoveNodeFromCSEMaps(N)) |
10942 | InsertPos = nullptr; |
10943 | |
10944 | // Now we update the operands. |
10945 | if (N->OperandList[0] != Op1) |
10946 | N->OperandList[0].set(Op1); |
10947 | if (N->OperandList[1] != Op2) |
10948 | N->OperandList[1].set(Op2); |
10949 | |
10950 | updateDivergence(N); |
10951 | // If this gets put into a CSE map, add it. |
10952 | if (InsertPos) CSEMap.InsertNode(N, InsertPos); |
10953 | return N; |
10954 | } |
10955 | |
10956 | SDNode *SelectionDAG:: |
10957 | UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { |
10958 | SDValue Ops[] = { Op1, Op2, Op3 }; |
10959 | return UpdateNodeOperands(N, Ops); |
10960 | } |
10961 | |
10962 | SDNode *SelectionDAG:: |
10963 | UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, |
10964 | SDValue Op3, SDValue Op4) { |
10965 | SDValue Ops[] = { Op1, Op2, Op3, Op4 }; |
10966 | return UpdateNodeOperands(N, Ops); |
10967 | } |
10968 | |
10969 | SDNode *SelectionDAG:: |
10970 | UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, |
10971 | SDValue Op3, SDValue Op4, SDValue Op5) { |
10972 | SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; |
10973 | return UpdateNodeOperands(N, Ops); |
10974 | } |
10975 | |
10976 | SDNode *SelectionDAG:: |
10977 | UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { |
10978 | unsigned NumOps = Ops.size(); |
10979 | assert(N->getNumOperands() == NumOps && |
10980 | "Update with wrong number of operands"); |
10981 | |
10982 | // If no operands changed just return the input node. |
10983 | if (std::equal(first1: Ops.begin(), last1: Ops.end(), first2: N->op_begin())) |
10984 | return N; |
10985 | |
10986 | // See if the modified node already exists. |
10987 | void *InsertPos = nullptr; |
10988 | if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) |
10989 | return Existing; |
10990 | |
10991 | // Nope it doesn't. Remove the node from its current place in the maps. |
10992 | if (InsertPos) |
10993 | if (!RemoveNodeFromCSEMaps(N)) |
10994 | InsertPos = nullptr; |
10995 | |
10996 | // Now we update the operands. |
10997 | for (unsigned i = 0; i != NumOps; ++i) |
10998 | if (N->OperandList[i] != Ops[i]) |
10999 | N->OperandList[i].set(Ops[i]); |
11000 | |
11001 | updateDivergence(N); |
11002 | // If this gets put into a CSE map, add it. |
11003 | if (InsertPos) CSEMap.InsertNode(N, InsertPos); |
11004 | return N; |
11005 | } |
11006 | |
11007 | /// DropOperands - Release the operands and set this node to have |
11008 | /// zero operands. |
11009 | void SDNode::DropOperands() { |
11010 | // Unlike the code in MorphNodeTo that does this, we don't need to |
11011 | // watch for dead nodes here. |
11012 | for (op_iterator I = op_begin(), E = op_end(); I != E; ) { |
11013 | SDUse &Use = *I++; |
11014 | Use.set(SDValue()); |
11015 | } |
11016 | } |
11017 | |
11018 | void SelectionDAG::setNodeMemRefs(MachineSDNode *N, |
11019 | ArrayRef<MachineMemOperand *> NewMemRefs) { |
11020 | if (NewMemRefs.empty()) { |
11021 | N->clearMemRefs(); |
11022 | return; |
11023 | } |
11024 | |
11025 | // Check if we can avoid allocating by storing a single reference directly. |
11026 | if (NewMemRefs.size() == 1) { |
11027 | N->MemRefs = NewMemRefs[0]; |
11028 | N->NumMemRefs = 1; |
11029 | return; |
11030 | } |
11031 | |
11032 | MachineMemOperand **MemRefsBuffer = |
11033 | Allocator.template Allocate<MachineMemOperand *>(Num: NewMemRefs.size()); |
11034 | llvm::copy(Range&: NewMemRefs, Out: MemRefsBuffer); |
11035 | N->MemRefs = MemRefsBuffer; |
11036 | N->NumMemRefs = static_cast<int>(NewMemRefs.size()); |
11037 | } |
11038 | |
11039 | /// SelectNodeTo - These are wrappers around MorphNodeTo that accept a |
11040 | /// machine opcode. |
11041 | /// |
11042 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11043 | EVT VT) { |
11044 | SDVTList VTs = getVTList(VT); |
11045 | return SelectNodeTo(N, MachineOpc, VTs, Ops: {}); |
11046 | } |
11047 | |
11048 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11049 | EVT VT, SDValue Op1) { |
11050 | SDVTList VTs = getVTList(VT); |
11051 | SDValue Ops[] = { Op1 }; |
11052 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11053 | } |
11054 | |
11055 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11056 | EVT VT, SDValue Op1, |
11057 | SDValue Op2) { |
11058 | SDVTList VTs = getVTList(VT); |
11059 | SDValue Ops[] = { Op1, Op2 }; |
11060 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11061 | } |
11062 | |
11063 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11064 | EVT VT, SDValue Op1, |
11065 | SDValue Op2, SDValue Op3) { |
11066 | SDVTList VTs = getVTList(VT); |
11067 | SDValue Ops[] = { Op1, Op2, Op3 }; |
11068 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11069 | } |
11070 | |
11071 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11072 | EVT VT, ArrayRef<SDValue> Ops) { |
11073 | SDVTList VTs = getVTList(VT); |
11074 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11075 | } |
11076 | |
11077 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11078 | EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { |
11079 | SDVTList VTs = getVTList(VT1, VT2); |
11080 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11081 | } |
11082 | |
11083 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11084 | EVT VT1, EVT VT2) { |
11085 | SDVTList VTs = getVTList(VT1, VT2); |
11086 | return SelectNodeTo(N, MachineOpc, VTs, Ops: {}); |
11087 | } |
11088 | |
11089 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11090 | EVT VT1, EVT VT2, EVT VT3, |
11091 | ArrayRef<SDValue> Ops) { |
11092 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
11093 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11094 | } |
11095 | |
11096 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11097 | EVT VT1, EVT VT2, |
11098 | SDValue Op1, SDValue Op2) { |
11099 | SDVTList VTs = getVTList(VT1, VT2); |
11100 | SDValue Ops[] = { Op1, Op2 }; |
11101 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
11102 | } |
11103 | |
11104 | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
11105 | SDVTList VTs,ArrayRef<SDValue> Ops) { |
11106 | SDNode *New = MorphNodeTo(N, Opc: ~MachineOpc, VTs, Ops); |
11107 | // Reset the NodeID to -1. |
11108 | New->setNodeId(-1); |
11109 | if (New != N) { |
11110 | ReplaceAllUsesWith(From: N, To: New); |
11111 | RemoveDeadNode(N); |
11112 | } |
11113 | return New; |
11114 | } |
11115 | |
11116 | /// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away |
11117 | /// the line number information on the merged node since it is not possible to |
11118 | /// preserve the information that operation is associated with multiple lines. |
11119 | /// This will make the debugger working better at -O0, were there is a higher |
11120 | /// probability having other instructions associated with that line. |
11121 | /// |
11122 | /// For IROrder, we keep the smaller of the two |
11123 | SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { |
11124 | DebugLoc NLoc = N->getDebugLoc(); |
11125 | if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) { |
11126 | N->setDebugLoc(DebugLoc()); |
11127 | } |
11128 | unsigned Order = std::min(a: N->getIROrder(), b: OLoc.getIROrder()); |
11129 | N->setIROrder(Order); |
11130 | return N; |
11131 | } |
11132 | |
11133 | /// MorphNodeTo - This *mutates* the specified node to have the specified |
11134 | /// return type, opcode, and operands. |
11135 | /// |
11136 | /// Note that MorphNodeTo returns the resultant node. If there is already a |
11137 | /// node of the specified opcode and operands, it returns that node instead of |
11138 | /// the current one. Note that the SDLoc need not be the same. |
11139 | /// |
11140 | /// Using MorphNodeTo is faster than creating a new node and swapping it in |
11141 | /// with ReplaceAllUsesWith both because it often avoids allocating a new |
11142 | /// node, and because it doesn't require CSE recalculation for any of |
11143 | /// the node's users. |
11144 | /// |
11145 | /// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. |
11146 | /// As a consequence it isn't appropriate to use from within the DAG combiner or |
11147 | /// the legalizer which maintain worklists that would need to be updated when |
11148 | /// deleting things. |
11149 | SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, |
11150 | SDVTList VTs, ArrayRef<SDValue> Ops) { |
11151 | // If an identical node already exists, use it. |
11152 | void *IP = nullptr; |
11153 | if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { |
11154 | FoldingSetNodeID ID; |
11155 | AddNodeIDNode(ID, OpC: Opc, VTList: VTs, OpList: Ops); |
11156 | if (SDNode *ON = FindNodeOrInsertPos(ID, DL: SDLoc(N), InsertPos&: IP)) |
11157 | return UpdateSDLocOnMergeSDNode(N: ON, OLoc: SDLoc(N)); |
11158 | } |
11159 | |
11160 | if (!RemoveNodeFromCSEMaps(N)) |
11161 | IP = nullptr; |
11162 | |
11163 | // Start the morphing. |
11164 | N->NodeType = Opc; |
11165 | N->ValueList = VTs.VTs; |
11166 | N->NumValues = VTs.NumVTs; |
11167 | |
11168 | // Clear the operands list, updating used nodes to remove this from their |
11169 | // use list. Keep track of any operands that become dead as a result. |
11170 | SmallPtrSet<SDNode*, 16> DeadNodeSet; |
11171 | for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { |
11172 | SDUse &Use = *I++; |
11173 | SDNode *Used = Use.getNode(); |
11174 | Use.set(SDValue()); |
11175 | if (Used->use_empty()) |
11176 | DeadNodeSet.insert(Ptr: Used); |
11177 | } |
11178 | |
11179 | // For MachineNode, initialize the memory references information. |
11180 | if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Val: N)) |
11181 | MN->clearMemRefs(); |
11182 | |
11183 | // Swap for an appropriately sized array from the recycler. |
11184 | removeOperands(Node: N); |
11185 | createOperands(Node: N, Vals: Ops); |
11186 | |
11187 | // Delete any nodes that are still dead after adding the uses for the |
11188 | // new operands. |
11189 | if (!DeadNodeSet.empty()) { |
11190 | SmallVector<SDNode *, 16> DeadNodes; |
11191 | for (SDNode *N : DeadNodeSet) |
11192 | if (N->use_empty()) |
11193 | DeadNodes.push_back(Elt: N); |
11194 | RemoveDeadNodes(DeadNodes); |
11195 | } |
11196 | |
11197 | if (IP) |
11198 | CSEMap.InsertNode(N, InsertPos: IP); // Memoize the new node. |
11199 | return N; |
11200 | } |
11201 | |
11202 | SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { |
11203 | unsigned OrigOpc = Node->getOpcode(); |
11204 | unsigned NewOpc; |
11205 | switch (OrigOpc) { |
11206 | default: |
11207 | llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); |
11208 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
11209 | case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; |
11210 | #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
11211 | case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; |
11212 | #include "llvm/IR/ConstrainedOps.def" |
11213 | } |
11214 | |
11215 | assert(Node->getNumValues() == 2 && "Unexpected number of results!"); |
11216 | |
11217 | // We're taking this node out of the chain, so we need to re-link things. |
11218 | SDValue InputChain = Node->getOperand(Num: 0); |
11219 | SDValue OutputChain = SDValue(Node, 1); |
11220 | ReplaceAllUsesOfValueWith(From: OutputChain, To: InputChain); |
11221 | |
11222 | SmallVector<SDValue, 3> Ops; |
11223 | for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) |
11224 | Ops.push_back(Elt: Node->getOperand(Num: i)); |
11225 | |
11226 | SDVTList VTs = getVTList(VT: Node->getValueType(ResNo: 0)); |
11227 | SDNode *Res = MorphNodeTo(N: Node, Opc: NewOpc, VTs, Ops); |
11228 | |
11229 | // MorphNodeTo can operate in two ways: if an existing node with the |
11230 | // specified operands exists, it can just return it. Otherwise, it |
11231 | // updates the node in place to have the requested operands. |
11232 | if (Res == Node) { |
11233 | // If we updated the node in place, reset the node ID. To the isel, |
11234 | // this should be just like a newly allocated machine node. |
11235 | Res->setNodeId(-1); |
11236 | } else { |
11237 | ReplaceAllUsesWith(From: Node, To: Res); |
11238 | RemoveDeadNode(N: Node); |
11239 | } |
11240 | |
11241 | return Res; |
11242 | } |
11243 | |
11244 | /// getMachineNode - These are used for target selectors to create a new node |
11245 | /// with specified return type(s), MachineInstr opcode, and operands. |
11246 | /// |
11247 | /// Note that getMachineNode returns the resultant node. If there is already a |
11248 | /// node of the specified opcode and operands, it returns that node instead of |
11249 | /// the current one. |
11250 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11251 | EVT VT) { |
11252 | SDVTList VTs = getVTList(VT); |
11253 | return getMachineNode(Opcode, dl, VTs, Ops: {}); |
11254 | } |
11255 | |
11256 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11257 | EVT VT, SDValue Op1) { |
11258 | SDVTList VTs = getVTList(VT); |
11259 | SDValue Ops[] = { Op1 }; |
11260 | return getMachineNode(Opcode, dl, VTs, Ops); |
11261 | } |
11262 | |
11263 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11264 | EVT VT, SDValue Op1, SDValue Op2) { |
11265 | SDVTList VTs = getVTList(VT); |
11266 | SDValue Ops[] = { Op1, Op2 }; |
11267 | return getMachineNode(Opcode, dl, VTs, Ops); |
11268 | } |
11269 | |
11270 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11271 | EVT VT, SDValue Op1, SDValue Op2, |
11272 | SDValue Op3) { |
11273 | SDVTList VTs = getVTList(VT); |
11274 | SDValue Ops[] = { Op1, Op2, Op3 }; |
11275 | return getMachineNode(Opcode, dl, VTs, Ops); |
11276 | } |
11277 | |
11278 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11279 | EVT VT, ArrayRef<SDValue> Ops) { |
11280 | SDVTList VTs = getVTList(VT); |
11281 | return getMachineNode(Opcode, dl, VTs, Ops); |
11282 | } |
11283 | |
11284 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11285 | EVT VT1, EVT VT2, SDValue Op1, |
11286 | SDValue Op2) { |
11287 | SDVTList VTs = getVTList(VT1, VT2); |
11288 | SDValue Ops[] = { Op1, Op2 }; |
11289 | return getMachineNode(Opcode, dl, VTs, Ops); |
11290 | } |
11291 | |
11292 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11293 | EVT VT1, EVT VT2, SDValue Op1, |
11294 | SDValue Op2, SDValue Op3) { |
11295 | SDVTList VTs = getVTList(VT1, VT2); |
11296 | SDValue Ops[] = { Op1, Op2, Op3 }; |
11297 | return getMachineNode(Opcode, dl, VTs, Ops); |
11298 | } |
11299 | |
11300 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11301 | EVT VT1, EVT VT2, |
11302 | ArrayRef<SDValue> Ops) { |
11303 | SDVTList VTs = getVTList(VT1, VT2); |
11304 | return getMachineNode(Opcode, dl, VTs, Ops); |
11305 | } |
11306 | |
11307 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11308 | EVT VT1, EVT VT2, EVT VT3, |
11309 | SDValue Op1, SDValue Op2) { |
11310 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
11311 | SDValue Ops[] = { Op1, Op2 }; |
11312 | return getMachineNode(Opcode, dl, VTs, Ops); |
11313 | } |
11314 | |
11315 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11316 | EVT VT1, EVT VT2, EVT VT3, |
11317 | SDValue Op1, SDValue Op2, |
11318 | SDValue Op3) { |
11319 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
11320 | SDValue Ops[] = { Op1, Op2, Op3 }; |
11321 | return getMachineNode(Opcode, dl, VTs, Ops); |
11322 | } |
11323 | |
11324 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11325 | EVT VT1, EVT VT2, EVT VT3, |
11326 | ArrayRef<SDValue> Ops) { |
11327 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
11328 | return getMachineNode(Opcode, dl, VTs, Ops); |
11329 | } |
11330 | |
11331 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
11332 | ArrayRef<EVT> ResultTys, |
11333 | ArrayRef<SDValue> Ops) { |
11334 | SDVTList VTs = getVTList(VTs: ResultTys); |
11335 | return getMachineNode(Opcode, dl, VTs, Ops); |
11336 | } |
11337 | |
11338 | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, |
11339 | SDVTList VTs, |
11340 | ArrayRef<SDValue> Ops) { |
11341 | bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; |
11342 | MachineSDNode *N; |
11343 | void *IP = nullptr; |
11344 | |
11345 | if (DoCSE) { |
11346 | FoldingSetNodeID ID; |
11347 | AddNodeIDNode(ID, OpC: ~Opcode, VTList: VTs, OpList: Ops); |
11348 | IP = nullptr; |
11349 | if (SDNode *E = FindNodeOrInsertPos(ID, DL, InsertPos&: IP)) { |
11350 | return cast<MachineSDNode>(Val: UpdateSDLocOnMergeSDNode(N: E, OLoc: DL)); |
11351 | } |
11352 | } |
11353 | |
11354 | // Allocate a new MachineSDNode. |
11355 | N = newSDNode<MachineSDNode>(Args: ~Opcode, Args: DL.getIROrder(), Args: DL.getDebugLoc(), Args&: VTs); |
11356 | createOperands(Node: N, Vals: Ops); |
11357 | |
11358 | if (DoCSE) |
11359 | CSEMap.InsertNode(N, InsertPos: IP); |
11360 | |
11361 | InsertNode(N); |
11362 | NewSDValueDbgMsg(V: SDValue(N, 0), Msg: "Creating new machine node: ", G: this); |
11363 | return N; |
11364 | } |
11365 | |
11366 | /// getTargetExtractSubreg - A convenience function for creating |
11367 | /// TargetOpcode::EXTRACT_SUBREG nodes. |
11368 | SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, |
11369 | SDValue Operand) { |
11370 | SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); |
11371 | SDNode *Subreg = getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, |
11372 | VT, Op1: Operand, Op2: SRIdxVal); |
11373 | return SDValue(Subreg, 0); |
11374 | } |
11375 | |
11376 | /// getTargetInsertSubreg - A convenience function for creating |
11377 | /// TargetOpcode::INSERT_SUBREG nodes. |
11378 | SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, |
11379 | SDValue Operand, SDValue Subreg) { |
11380 | SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); |
11381 | SDNode *Result = getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, |
11382 | VT, Op1: Operand, Op2: Subreg, Op3: SRIdxVal); |
11383 | return SDValue(Result, 0); |
11384 | } |
11385 | |
11386 | /// getNodeIfExists - Get the specified node if it's already available, or |
11387 | /// else return NULL. |
11388 | SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, |
11389 | ArrayRef<SDValue> Ops) { |
11390 | SDNodeFlags Flags; |
11391 | if (Inserter) |
11392 | Flags = Inserter->getFlags(); |
11393 | return getNodeIfExists(Opcode, VTList, Ops, Flags); |
11394 | } |
11395 | |
11396 | SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, |
11397 | ArrayRef<SDValue> Ops, |
11398 | const SDNodeFlags Flags) { |
11399 | if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { |
11400 | FoldingSetNodeID ID; |
11401 | AddNodeIDNode(ID, OpC: Opcode, VTList, OpList: Ops); |
11402 | void *IP = nullptr; |
11403 | if (SDNode *E = FindNodeOrInsertPos(ID, DL: SDLoc(), InsertPos&: IP)) { |
11404 | E->intersectFlagsWith(Flags); |
11405 | return E; |
11406 | } |
11407 | } |
11408 | return nullptr; |
11409 | } |
11410 | |
11411 | /// doesNodeExist - Check if a node exists without modifying its flags. |
11412 | bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList, |
11413 | ArrayRef<SDValue> Ops) { |
11414 | if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { |
11415 | FoldingSetNodeID ID; |
11416 | AddNodeIDNode(ID, OpC: Opcode, VTList, OpList: Ops); |
11417 | void *IP = nullptr; |
11418 | if (FindNodeOrInsertPos(ID, DL: SDLoc(), InsertPos&: IP)) |
11419 | return true; |
11420 | } |
11421 | return false; |
11422 | } |
11423 | |
11424 | /// getDbgValue - Creates a SDDbgValue node. |
11425 | /// |
11426 | /// SDNode |
11427 | SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, |
11428 | SDNode *N, unsigned R, bool IsIndirect, |
11429 | const DebugLoc &DL, unsigned O) { |
11430 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
11431 | "Expected inlined-at fields to agree"); |
11432 | return new (DbgInfo->getAlloc()) |
11433 | SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromNode(Node: N, ResNo: R), |
11434 | {}, IsIndirect, DL, O, |
11435 | /*IsVariadic=*/false); |
11436 | } |
11437 | |
11438 | /// Constant |
11439 | SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, |
11440 | DIExpression *Expr, |
11441 | const Value *C, |
11442 | const DebugLoc &DL, unsigned O) { |
11443 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
11444 | "Expected inlined-at fields to agree"); |
11445 | return new (DbgInfo->getAlloc()) |
11446 | SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromConst(Const: C), {}, |
11447 | /*IsIndirect=*/false, DL, O, |
11448 | /*IsVariadic=*/false); |
11449 | } |
11450 | |
11451 | /// FrameIndex |
11452 | SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, |
11453 | DIExpression *Expr, unsigned FI, |
11454 | bool IsIndirect, |
11455 | const DebugLoc &DL, |
11456 | unsigned O) { |
11457 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
11458 | "Expected inlined-at fields to agree"); |
11459 | return getFrameIndexDbgValue(Var, Expr, FI, Dependencies: {}, IsIndirect, DL, O); |
11460 | } |
11461 | |
11462 | /// FrameIndex with dependencies |
11463 | SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, |
11464 | DIExpression *Expr, unsigned FI, |
11465 | ArrayRef<SDNode *> Dependencies, |
11466 | bool IsIndirect, |
11467 | const DebugLoc &DL, |
11468 | unsigned O) { |
11469 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
11470 | "Expected inlined-at fields to agree"); |
11471 | return new (DbgInfo->getAlloc()) |
11472 | SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromFrameIdx(FrameIdx: FI), |
11473 | Dependencies, IsIndirect, DL, O, |
11474 | /*IsVariadic=*/false); |
11475 | } |
11476 | |
11477 | /// VReg |
11478 | SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, DIExpression *Expr, |
11479 | Register VReg, bool IsIndirect, |
11480 | const DebugLoc &DL, unsigned O) { |
11481 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
11482 | "Expected inlined-at fields to agree"); |
11483 | return new (DbgInfo->getAlloc()) |
11484 | SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromVReg(VReg), |
11485 | {}, IsIndirect, DL, O, |
11486 | /*IsVariadic=*/false); |
11487 | } |
11488 | |
11489 | SDDbgValue *SelectionDAG::getDbgValueList(DIVariable *Var, DIExpression *Expr, |
11490 | ArrayRef<SDDbgOperand> Locs, |
11491 | ArrayRef<SDNode *> Dependencies, |
11492 | bool IsIndirect, const DebugLoc &DL, |
11493 | unsigned O, bool IsVariadic) { |
11494 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
11495 | "Expected inlined-at fields to agree"); |
11496 | return new (DbgInfo->getAlloc()) |
11497 | SDDbgValue(DbgInfo->getAlloc(), Var, Expr, Locs, Dependencies, IsIndirect, |
11498 | DL, O, IsVariadic); |
11499 | } |
11500 | |
11501 | void SelectionDAG::transferDbgValues(SDValue From, SDValue To, |
11502 | unsigned OffsetInBits, unsigned SizeInBits, |
11503 | bool InvalidateDbg) { |
11504 | SDNode *FromNode = From.getNode(); |
11505 | SDNode *ToNode = To.getNode(); |
11506 | assert(FromNode && ToNode && "Can't modify dbg values"); |
11507 | |
11508 | // PR35338 |
11509 | // TODO: assert(From != To && "Redundant dbg value transfer"); |
11510 | // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); |
11511 | if (From == To || FromNode == ToNode) |
11512 | return; |
11513 | |
11514 | if (!FromNode->getHasDebugValue()) |
11515 | return; |
11516 | |
11517 | SDDbgOperand FromLocOp = |
11518 | SDDbgOperand::fromNode(Node: From.getNode(), ResNo: From.getResNo()); |
11519 | SDDbgOperand ToLocOp = SDDbgOperand::fromNode(Node: To.getNode(), ResNo: To.getResNo()); |
11520 | |
11521 | SmallVector<SDDbgValue *, 2> ClonedDVs; |
11522 | for (SDDbgValue *Dbg : GetDbgValues(SD: FromNode)) { |
11523 | if (Dbg->isInvalidated()) |
11524 | continue; |
11525 | |
11526 | // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); |
11527 | |
11528 | // Create a new location ops vector that is equal to the old vector, but |
11529 | // with each instance of FromLocOp replaced with ToLocOp. |
11530 | bool Changed = false; |
11531 | auto NewLocOps = Dbg->copyLocationOps(); |
11532 | std::replace_if( |
11533 | first: NewLocOps.begin(), last: NewLocOps.end(), |
11534 | pred: [&Changed, FromLocOp](const SDDbgOperand &Op) { |
11535 | bool Match = Op == FromLocOp; |
11536 | Changed |= Match; |
11537 | return Match; |
11538 | }, |
11539 | new_value: ToLocOp); |
11540 | // Ignore this SDDbgValue if we didn't find a matching location. |
11541 | if (!Changed) |
11542 | continue; |
11543 | |
11544 | DIVariable *Var = Dbg->getVariable(); |
11545 | auto *Expr = Dbg->getExpression(); |
11546 | // If a fragment is requested, update the expression. |
11547 | if (SizeInBits) { |
11548 | // When splitting a larger (e.g., sign-extended) value whose |
11549 | // lower bits are described with an SDDbgValue, do not attempt |
11550 | // to transfer the SDDbgValue to the upper bits. |
11551 | if (auto FI = Expr->getFragmentInfo()) |
11552 | if (OffsetInBits + SizeInBits > FI->SizeInBits) |
11553 | continue; |
11554 | auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, |
11555 | SizeInBits); |
11556 | if (!Fragment) |
11557 | continue; |
11558 | Expr = *Fragment; |
11559 | } |
11560 | |
11561 | auto AdditionalDependencies = Dbg->getAdditionalDependencies(); |
11562 | // Clone the SDDbgValue and move it to To. |
11563 | SDDbgValue *Clone = getDbgValueList( |
11564 | Var, Expr, Locs: NewLocOps, Dependencies: AdditionalDependencies, IsIndirect: Dbg->isIndirect(), |
11565 | DL: Dbg->getDebugLoc(), O: std::max(a: ToNode->getIROrder(), b: Dbg->getOrder()), |
11566 | IsVariadic: Dbg->isVariadic()); |
11567 | ClonedDVs.push_back(Elt: Clone); |
11568 | |
11569 | if (InvalidateDbg) { |
11570 | // Invalidate value and indicate the SDDbgValue should not be emitted. |
11571 | Dbg->setIsInvalidated(); |
11572 | Dbg->setIsEmitted(); |
11573 | } |
11574 | } |
11575 | |
11576 | for (SDDbgValue *Dbg : ClonedDVs) { |
11577 | assert(is_contained(Dbg->getSDNodes(), ToNode) && |
11578 | "Transferred DbgValues should depend on the new SDNode"); |
11579 | AddDbgValue(DB: Dbg, isParameter: false); |
11580 | } |
11581 | } |
11582 | |
11583 | void SelectionDAG::salvageDebugInfo(SDNode &N) { |
11584 | if (!N.getHasDebugValue()) |
11585 | return; |
11586 | |
11587 | auto GetLocationOperand = [](SDNode *Node, unsigned ResNo) { |
11588 | if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: Node)) |
11589 | return SDDbgOperand::fromFrameIdx(FrameIdx: FISDN->getIndex()); |
11590 | return SDDbgOperand::fromNode(Node, ResNo); |
11591 | }; |
11592 | |
11593 | SmallVector<SDDbgValue *, 2> ClonedDVs; |
11594 | for (auto *DV : GetDbgValues(SD: &N)) { |
11595 | if (DV->isInvalidated()) |
11596 | continue; |
11597 | switch (N.getOpcode()) { |
11598 | default: |
11599 | break; |
11600 | case ISD::ADD: { |
11601 | SDValue N0 = N.getOperand(Num: 0); |
11602 | SDValue N1 = N.getOperand(Num: 1); |
11603 | if (!isa<ConstantSDNode>(Val: N0)) { |
11604 | bool RHSConstant = isa<ConstantSDNode>(Val: N1); |
11605 | uint64_t Offset; |
11606 | if (RHSConstant) |
11607 | Offset = N.getConstantOperandVal(Num: 1); |
11608 | // We are not allowed to turn indirect debug values variadic, so |
11609 | // don't salvage those. |
11610 | if (!RHSConstant && DV->isIndirect()) |
11611 | continue; |
11612 | |
11613 | // Rewrite an ADD constant node into a DIExpression. Since we are |
11614 | // performing arithmetic to compute the variable's *value* in the |
11615 | // DIExpression, we need to mark the expression with a |
11616 | // DW_OP_stack_value. |
11617 | auto *DIExpr = DV->getExpression(); |
11618 | auto NewLocOps = DV->copyLocationOps(); |
11619 | bool Changed = false; |
11620 | size_t OrigLocOpsSize = NewLocOps.size(); |
11621 | for (size_t i = 0; i < OrigLocOpsSize; ++i) { |
11622 | // We're not given a ResNo to compare against because the whole |
11623 | // node is going away. We know that any ISD::ADD only has one |
11624 | // result, so we can assume any node match is using the result. |
11625 | if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || |
11626 | NewLocOps[i].getSDNode() != &N) |
11627 | continue; |
11628 | NewLocOps[i] = GetLocationOperand(N0.getNode(), N0.getResNo()); |
11629 | if (RHSConstant) { |
11630 | SmallVector<uint64_t, 3> ExprOps; |
11631 | DIExpression::appendOffset(Ops&: ExprOps, Offset); |
11632 | DIExpr = DIExpression::appendOpsToArg(Expr: DIExpr, Ops: ExprOps, ArgNo: i, StackValue: true); |
11633 | } else { |
11634 | // Convert to a variadic expression (if not already). |
11635 | // convertToVariadicExpression() returns a const pointer, so we use |
11636 | // a temporary const variable here. |
11637 | const auto *TmpDIExpr = |
11638 | DIExpression::convertToVariadicExpression(Expr: DIExpr); |
11639 | SmallVector<uint64_t, 3> ExprOps; |
11640 | ExprOps.push_back(Elt: dwarf::DW_OP_LLVM_arg); |
11641 | ExprOps.push_back(Elt: NewLocOps.size()); |
11642 | ExprOps.push_back(Elt: dwarf::DW_OP_plus); |
11643 | SDDbgOperand RHS = |
11644 | SDDbgOperand::fromNode(Node: N1.getNode(), ResNo: N1.getResNo()); |
11645 | NewLocOps.push_back(Elt: RHS); |
11646 | DIExpr = DIExpression::appendOpsToArg(Expr: TmpDIExpr, Ops: ExprOps, ArgNo: i, StackValue: true); |
11647 | } |
11648 | Changed = true; |
11649 | } |
11650 | (void)Changed; |
11651 | assert(Changed && "Salvage target doesn't use N"); |
11652 | |
11653 | bool IsVariadic = |
11654 | DV->isVariadic() || OrigLocOpsSize != NewLocOps.size(); |
11655 | |
11656 | auto AdditionalDependencies = DV->getAdditionalDependencies(); |
11657 | SDDbgValue *Clone = getDbgValueList( |
11658 | Var: DV->getVariable(), Expr: DIExpr, Locs: NewLocOps, Dependencies: AdditionalDependencies, |
11659 | IsIndirect: DV->isIndirect(), DL: DV->getDebugLoc(), O: DV->getOrder(), IsVariadic); |
11660 | ClonedDVs.push_back(Elt: Clone); |
11661 | DV->setIsInvalidated(); |
11662 | DV->setIsEmitted(); |
11663 | LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; |
11664 | N0.getNode()->dumprFull(this); |
11665 | dbgs() << " into "<< *DIExpr << '\n'); |
11666 | } |
11667 | break; |
11668 | } |
11669 | case ISD::TRUNCATE: { |
11670 | SDValue N0 = N.getOperand(Num: 0); |
11671 | TypeSize FromSize = N0.getValueSizeInBits(); |
11672 | TypeSize ToSize = N.getValueSizeInBits(ResNo: 0); |
11673 | |
11674 | DIExpression *DbgExpression = DV->getExpression(); |
11675 | auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, Signed: false); |
11676 | auto NewLocOps = DV->copyLocationOps(); |
11677 | bool Changed = false; |
11678 | for (size_t i = 0; i < NewLocOps.size(); ++i) { |
11679 | if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || |
11680 | NewLocOps[i].getSDNode() != &N) |
11681 | continue; |
11682 | |
11683 | NewLocOps[i] = GetLocationOperand(N0.getNode(), N0.getResNo()); |
11684 | DbgExpression = DIExpression::appendOpsToArg(Expr: DbgExpression, Ops: ExtOps, ArgNo: i); |
11685 | Changed = true; |
11686 | } |
11687 | assert(Changed && "Salvage target doesn't use N"); |
11688 | (void)Changed; |
11689 | |
11690 | SDDbgValue *Clone = |
11691 | getDbgValueList(Var: DV->getVariable(), Expr: DbgExpression, Locs: NewLocOps, |
11692 | Dependencies: DV->getAdditionalDependencies(), IsIndirect: DV->isIndirect(), |
11693 | DL: DV->getDebugLoc(), O: DV->getOrder(), IsVariadic: DV->isVariadic()); |
11694 | |
11695 | ClonedDVs.push_back(Elt: Clone); |
11696 | DV->setIsInvalidated(); |
11697 | DV->setIsEmitted(); |
11698 | LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); |
11699 | dbgs() << " into "<< *DbgExpression << '\n'); |
11700 | break; |
11701 | } |
11702 | } |
11703 | } |
11704 | |
11705 | for (SDDbgValue *Dbg : ClonedDVs) { |
11706 | assert((!Dbg->getSDNodes().empty() || |
11707 | llvm::any_of(Dbg->getLocationOps(), |
11708 | [&](const SDDbgOperand &Op) { |
11709 | return Op.getKind() == SDDbgOperand::FRAMEIX; |
11710 | })) && |
11711 | "Salvaged DbgValue should depend on a new SDNode"); |
11712 | AddDbgValue(DB: Dbg, isParameter: false); |
11713 | } |
11714 | } |
11715 | |
11716 | /// Creates a SDDbgLabel node. |
11717 | SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, |
11718 | const DebugLoc &DL, unsigned O) { |
11719 | assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && |
11720 | "Expected inlined-at fields to agree"); |
11721 | return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); |
11722 | } |
11723 | |
11724 | namespace { |
11725 | |
11726 | /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node |
11727 | /// pointed to by a use iterator is deleted, increment the use iterator |
11728 | /// so that it doesn't dangle. |
11729 | /// |
11730 | class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { |
11731 | SDNode::use_iterator &UI; |
11732 | SDNode::use_iterator &UE; |
11733 | |
11734 | void NodeDeleted(SDNode *N, SDNode *E) override { |
11735 | // Increment the iterator as needed. |
11736 | while (UI != UE && N == UI->getUser()) |
11737 | ++UI; |
11738 | } |
11739 | |
11740 | public: |
11741 | RAUWUpdateListener(SelectionDAG &d, |
11742 | SDNode::use_iterator &ui, |
11743 | SDNode::use_iterator &ue) |
11744 | : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} |
11745 | }; |
11746 | |
11747 | } // end anonymous namespace |
11748 | |
11749 | /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. |
11750 | /// This can cause recursive merging of nodes in the DAG. |
11751 | /// |
11752 | /// This version assumes From has a single result value. |
11753 | /// |
11754 | void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { |
11755 | SDNode *From = FromN.getNode(); |
11756 | assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && |
11757 | "Cannot replace with this method!"); |
11758 | assert(From != To.getNode() && "Cannot replace uses of with self"); |
11759 | |
11760 | // Preserve Debug Values |
11761 | transferDbgValues(From: FromN, To); |
11762 | // Preserve extra info. |
11763 | copyExtraInfo(From, To: To.getNode()); |
11764 | |
11765 | // Iterate over all the existing uses of From. New uses will be added |
11766 | // to the beginning of the use list, which we avoid visiting. |
11767 | // This specifically avoids visiting uses of From that arise while the |
11768 | // replacement is happening, because any such uses would be the result |
11769 | // of CSE: If an existing node looks like From after one of its operands |
11770 | // is replaced by To, we don't want to replace of all its users with To |
11771 | // too. See PR3018 for more info. |
11772 | SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); |
11773 | RAUWUpdateListener Listener(*this, UI, UE); |
11774 | while (UI != UE) { |
11775 | SDNode *User = UI->getUser(); |
11776 | |
11777 | // This node is about to morph, remove its old self from the CSE maps. |
11778 | RemoveNodeFromCSEMaps(N: User); |
11779 | |
11780 | // A user can appear in a use list multiple times, and when this |
11781 | // happens the uses are usually next to each other in the list. |
11782 | // To help reduce the number of CSE recomputations, process all |
11783 | // the uses of this user that we can find this way. |
11784 | do { |
11785 | SDUse &Use = *UI; |
11786 | ++UI; |
11787 | Use.set(To); |
11788 | if (To->isDivergent() != From->isDivergent()) |
11789 | updateDivergence(N: User); |
11790 | } while (UI != UE && UI->getUser() == User); |
11791 | // Now that we have modified User, add it back to the CSE maps. If it |
11792 | // already exists there, recursively merge the results together. |
11793 | AddModifiedNodeToCSEMaps(N: User); |
11794 | } |
11795 | |
11796 | // If we just RAUW'd the root, take note. |
11797 | if (FromN == getRoot()) |
11798 | setRoot(To); |
11799 | } |
11800 | |
11801 | /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. |
11802 | /// This can cause recursive merging of nodes in the DAG. |
11803 | /// |
11804 | /// This version assumes that for each value of From, there is a |
11805 | /// corresponding value in To in the same position with the same type. |
11806 | /// |
11807 | void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { |
11808 | #ifndef NDEBUG |
11809 | for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) |
11810 | assert((!From->hasAnyUseOfValue(i) || |
11811 | From->getValueType(i) == To->getValueType(i)) && |
11812 | "Cannot use this version of ReplaceAllUsesWith!"); |
11813 | #endif |
11814 | |
11815 | // Handle the trivial case. |
11816 | if (From == To) |
11817 | return; |
11818 | |
11819 | // Preserve Debug Info. Only do this if there's a use. |
11820 | for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) |
11821 | if (From->hasAnyUseOfValue(Value: i)) { |
11822 | assert((i < To->getNumValues()) && "Invalid To location"); |
11823 | transferDbgValues(From: SDValue(From, i), To: SDValue(To, i)); |
11824 | } |
11825 | // Preserve extra info. |
11826 | copyExtraInfo(From, To); |
11827 | |
11828 | // Iterate over just the existing users of From. See the comments in |
11829 | // the ReplaceAllUsesWith above. |
11830 | SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); |
11831 | RAUWUpdateListener Listener(*this, UI, UE); |
11832 | while (UI != UE) { |
11833 | SDNode *User = UI->getUser(); |
11834 | |
11835 | // This node is about to morph, remove its old self from the CSE maps. |
11836 | RemoveNodeFromCSEMaps(N: User); |
11837 | |
11838 | // A user can appear in a use list multiple times, and when this |
11839 | // happens the uses are usually next to each other in the list. |
11840 | // To help reduce the number of CSE recomputations, process all |
11841 | // the uses of this user that we can find this way. |
11842 | do { |
11843 | SDUse &Use = *UI; |
11844 | ++UI; |
11845 | Use.setNode(To); |
11846 | if (To->isDivergent() != From->isDivergent()) |
11847 | updateDivergence(N: User); |
11848 | } while (UI != UE && UI->getUser() == User); |
11849 | |
11850 | // Now that we have modified User, add it back to the CSE maps. If it |
11851 | // already exists there, recursively merge the results together. |
11852 | AddModifiedNodeToCSEMaps(N: User); |
11853 | } |
11854 | |
11855 | // If we just RAUW'd the root, take note. |
11856 | if (From == getRoot().getNode()) |
11857 | setRoot(SDValue(To, getRoot().getResNo())); |
11858 | } |
11859 | |
11860 | /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. |
11861 | /// This can cause recursive merging of nodes in the DAG. |
11862 | /// |
11863 | /// This version can replace From with any result values. To must match the |
11864 | /// number and types of values returned by From. |
11865 | void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { |
11866 | if (From->getNumValues() == 1) // Handle the simple case efficiently. |
11867 | return ReplaceAllUsesWith(FromN: SDValue(From, 0), To: To[0]); |
11868 | |
11869 | for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) { |
11870 | // Preserve Debug Info. |
11871 | transferDbgValues(From: SDValue(From, i), To: To[i]); |
11872 | // Preserve extra info. |
11873 | copyExtraInfo(From, To: To[i].getNode()); |
11874 | } |
11875 | |
11876 | // Iterate over just the existing users of From. See the comments in |
11877 | // the ReplaceAllUsesWith above. |
11878 | SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); |
11879 | RAUWUpdateListener Listener(*this, UI, UE); |
11880 | while (UI != UE) { |
11881 | SDNode *User = UI->getUser(); |
11882 | |
11883 | // This node is about to morph, remove its old self from the CSE maps. |
11884 | RemoveNodeFromCSEMaps(N: User); |
11885 | |
11886 | // A user can appear in a use list multiple times, and when this happens the |
11887 | // uses are usually next to each other in the list. To help reduce the |
11888 | // number of CSE and divergence recomputations, process all the uses of this |
11889 | // user that we can find this way. |
11890 | bool To_IsDivergent = false; |
11891 | do { |
11892 | SDUse &Use = *UI; |
11893 | const SDValue &ToOp = To[Use.getResNo()]; |
11894 | ++UI; |
11895 | Use.set(ToOp); |
11896 | To_IsDivergent |= ToOp->isDivergent(); |
11897 | } while (UI != UE && UI->getUser() == User); |
11898 | |
11899 | if (To_IsDivergent != From->isDivergent()) |
11900 | updateDivergence(N: User); |
11901 | |
11902 | // Now that we have modified User, add it back to the CSE maps. If it |
11903 | // already exists there, recursively merge the results together. |
11904 | AddModifiedNodeToCSEMaps(N: User); |
11905 | } |
11906 | |
11907 | // If we just RAUW'd the root, take note. |
11908 | if (From == getRoot().getNode()) |
11909 | setRoot(SDValue(To[getRoot().getResNo()])); |
11910 | } |
11911 | |
11912 | /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving |
11913 | /// uses of other values produced by From.getNode() alone. The Deleted |
11914 | /// vector is handled the same way as for ReplaceAllUsesWith. |
11915 | void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ |
11916 | // Handle the really simple, really trivial case efficiently. |
11917 | if (From == To) return; |
11918 | |
11919 | // Handle the simple, trivial, case efficiently. |
11920 | if (From.getNode()->getNumValues() == 1) { |
11921 | ReplaceAllUsesWith(FromN: From, To); |
11922 | return; |
11923 | } |
11924 | |
11925 | // Preserve Debug Info. |
11926 | transferDbgValues(From, To); |
11927 | copyExtraInfo(From: From.getNode(), To: To.getNode()); |
11928 | |
11929 | // Iterate over just the existing users of From. See the comments in |
11930 | // the ReplaceAllUsesWith above. |
11931 | SDNode::use_iterator UI = From.getNode()->use_begin(), |
11932 | UE = From.getNode()->use_end(); |
11933 | RAUWUpdateListener Listener(*this, UI, UE); |
11934 | while (UI != UE) { |
11935 | SDNode *User = UI->getUser(); |
11936 | bool UserRemovedFromCSEMaps = false; |
11937 | |
11938 | // A user can appear in a use list multiple times, and when this |
11939 | // happens the uses are usually next to each other in the list. |
11940 | // To help reduce the number of CSE recomputations, process all |
11941 | // the uses of this user that we can find this way. |
11942 | do { |
11943 | SDUse &Use = *UI; |
11944 | |
11945 | // Skip uses of different values from the same node. |
11946 | if (Use.getResNo() != From.getResNo()) { |
11947 | ++UI; |
11948 | continue; |
11949 | } |
11950 | |
11951 | // If this node hasn't been modified yet, it's still in the CSE maps, |
11952 | // so remove its old self from the CSE maps. |
11953 | if (!UserRemovedFromCSEMaps) { |
11954 | RemoveNodeFromCSEMaps(N: User); |
11955 | UserRemovedFromCSEMaps = true; |
11956 | } |
11957 | |
11958 | ++UI; |
11959 | Use.set(To); |
11960 | if (To->isDivergent() != From->isDivergent()) |
11961 | updateDivergence(N: User); |
11962 | } while (UI != UE && UI->getUser() == User); |
11963 | // We are iterating over all uses of the From node, so if a use |
11964 | // doesn't use the specific value, no changes are made. |
11965 | if (!UserRemovedFromCSEMaps) |
11966 | continue; |
11967 | |
11968 | // Now that we have modified User, add it back to the CSE maps. If it |
11969 | // already exists there, recursively merge the results together. |
11970 | AddModifiedNodeToCSEMaps(N: User); |
11971 | } |
11972 | |
11973 | // If we just RAUW'd the root, take note. |
11974 | if (From == getRoot()) |
11975 | setRoot(To); |
11976 | } |
11977 | |
11978 | namespace { |
11979 | |
11980 | /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith |
11981 | /// to record information about a use. |
11982 | struct UseMemo { |
11983 | SDNode *User; |
11984 | unsigned Index; |
11985 | SDUse *Use; |
11986 | }; |
11987 | |
11988 | /// operator< - Sort Memos by User. |
11989 | bool operator<(const UseMemo &L, const UseMemo &R) { |
11990 | return (intptr_t)L.User < (intptr_t)R.User; |
11991 | } |
11992 | |
11993 | /// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node |
11994 | /// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that |
11995 | /// the node already has been taken care of recursively. |
11996 | class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { |
11997 | SmallVectorImpl<UseMemo> &Uses; |
11998 | |
11999 | void NodeDeleted(SDNode *N, SDNode *E) override { |
12000 | for (UseMemo &Memo : Uses) |
12001 | if (Memo.User == N) |
12002 | Memo.User = nullptr; |
12003 | } |
12004 | |
12005 | public: |
12006 | RAUOVWUpdateListener(SelectionDAG &d, SmallVectorImpl<UseMemo> &uses) |
12007 | : SelectionDAG::DAGUpdateListener(d), Uses(uses) {} |
12008 | }; |
12009 | |
12010 | } // end anonymous namespace |
12011 | |
12012 | /// Return true if a glue output should propagate divergence information. |
12013 | static bool gluePropagatesDivergence(const SDNode *Node) { |
12014 | switch (Node->getOpcode()) { |
12015 | case ISD::CopyFromReg: |
12016 | case ISD::CopyToReg: |
12017 | return false; |
12018 | default: |
12019 | return true; |
12020 | } |
12021 | |
12022 | llvm_unreachable("covered opcode switch"); |
12023 | } |
12024 | |
12025 | bool SelectionDAG::calculateDivergence(SDNode *N) { |
12026 | if (TLI->isSDNodeAlwaysUniform(N)) { |
12027 | assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) && |
12028 | "Conflicting divergence information!"); |
12029 | return false; |
12030 | } |
12031 | if (TLI->isSDNodeSourceOfDivergence(N, FLI, UA)) |
12032 | return true; |
12033 | for (const auto &Op : N->ops()) { |
12034 | EVT VT = Op.getValueType(); |
12035 | |
12036 | // Skip Chain. It does not carry divergence. |
12037 | if (VT != MVT::Other && Op.getNode()->isDivergent() && |
12038 | (VT != MVT::Glue || gluePropagatesDivergence(Op.getNode()))) |
12039 | return true; |
12040 | } |
12041 | return false; |
12042 | } |
12043 | |
12044 | void SelectionDAG::updateDivergence(SDNode *N) { |
12045 | SmallVector<SDNode *, 16> Worklist(1, N); |
12046 | do { |
12047 | N = Worklist.pop_back_val(); |
12048 | bool IsDivergent = calculateDivergence(N); |
12049 | if (N->SDNodeBits.IsDivergent != IsDivergent) { |
12050 | N->SDNodeBits.IsDivergent = IsDivergent; |
12051 | llvm::append_range(C&: Worklist, R: N->users()); |
12052 | } |
12053 | } while (!Worklist.empty()); |
12054 | } |
12055 | |
12056 | void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { |
12057 | DenseMap<SDNode *, unsigned> Degree; |
12058 | Order.reserve(n: AllNodes.size()); |
12059 | for (auto &N : allnodes()) { |
12060 | unsigned NOps = N.getNumOperands(); |
12061 | Degree[&N] = NOps; |
12062 | if (0 == NOps) |
12063 | Order.push_back(x: &N); |
12064 | } |
12065 | for (size_t I = 0; I != Order.size(); ++I) { |
12066 | SDNode *N = Order[I]; |
12067 | for (auto *U : N->users()) { |
12068 | unsigned &UnsortedOps = Degree[U]; |
12069 | if (0 == --UnsortedOps) |
12070 | Order.push_back(x: U); |
12071 | } |
12072 | } |
12073 | } |
12074 | |
12075 | #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS |
12076 | void SelectionDAG::VerifyDAGDivergence() { |
12077 | std::vector<SDNode *> TopoOrder; |
12078 | CreateTopologicalOrder(Order&: TopoOrder); |
12079 | for (auto *N : TopoOrder) { |
12080 | assert(calculateDivergence(N) == N->isDivergent() && |
12081 | "Divergence bit inconsistency detected"); |
12082 | } |
12083 | } |
12084 | #endif |
12085 | |
12086 | /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving |
12087 | /// uses of other values produced by From.getNode() alone. The same value |
12088 | /// may appear in both the From and To list. The Deleted vector is |
12089 | /// handled the same way as for ReplaceAllUsesWith. |
12090 | void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, |
12091 | const SDValue *To, |
12092 | unsigned Num){ |
12093 | // Handle the simple, trivial case efficiently. |
12094 | if (Num == 1) |
12095 | return ReplaceAllUsesOfValueWith(From: *From, To: *To); |
12096 | |
12097 | transferDbgValues(From: *From, To: *To); |
12098 | copyExtraInfo(From: From->getNode(), To: To->getNode()); |
12099 | |
12100 | // Read up all the uses and make records of them. This helps |
12101 | // processing new uses that are introduced during the |
12102 | // replacement process. |
12103 | SmallVector<UseMemo, 4> Uses; |
12104 | for (unsigned i = 0; i != Num; ++i) { |
12105 | unsigned FromResNo = From[i].getResNo(); |
12106 | SDNode *FromNode = From[i].getNode(); |
12107 | for (SDUse &Use : FromNode->uses()) { |
12108 | if (Use.getResNo() == FromResNo) { |
12109 | UseMemo Memo = {.User: Use.getUser(), .Index: i, .Use: &Use}; |
12110 | Uses.push_back(Elt: Memo); |
12111 | } |
12112 | } |
12113 | } |
12114 | |
12115 | // Sort the uses, so that all the uses from a given User are together. |
12116 | llvm::sort(C&: Uses); |
12117 | RAUOVWUpdateListener Listener(*this, Uses); |
12118 | |
12119 | for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); |
12120 | UseIndex != UseIndexEnd; ) { |
12121 | // We know that this user uses some value of From. If it is the right |
12122 | // value, update it. |
12123 | SDNode *User = Uses[UseIndex].User; |
12124 | // If the node has been deleted by recursive CSE updates when updating |
12125 | // another node, then just skip this entry. |
12126 | if (User == nullptr) { |
12127 | ++UseIndex; |
12128 | continue; |
12129 | } |
12130 | |
12131 | // This node is about to morph, remove its old self from the CSE maps. |
12132 | RemoveNodeFromCSEMaps(N: User); |
12133 | |
12134 | // The Uses array is sorted, so all the uses for a given User |
12135 | // are next to each other in the list. |
12136 | // To help reduce the number of CSE recomputations, process all |
12137 | // the uses of this user that we can find this way. |
12138 | do { |
12139 | unsigned i = Uses[UseIndex].Index; |
12140 | SDUse &Use = *Uses[UseIndex].Use; |
12141 | ++UseIndex; |
12142 | |
12143 | Use.set(To[i]); |
12144 | } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); |
12145 | |
12146 | // Now that we have modified User, add it back to the CSE maps. If it |
12147 | // already exists there, recursively merge the results together. |
12148 | AddModifiedNodeToCSEMaps(N: User); |
12149 | } |
12150 | } |
12151 | |
12152 | /// AssignTopologicalOrder - Assign a unique node id for each node in the DAG |
12153 | /// based on their topological order. It returns the maximum id and a vector |
12154 | /// of the SDNodes* in assigned order by reference. |
12155 | unsigned SelectionDAG::AssignTopologicalOrder() { |
12156 | unsigned DAGSize = 0; |
12157 | |
12158 | // SortedPos tracks the progress of the algorithm. Nodes before it are |
12159 | // sorted, nodes after it are unsorted. When the algorithm completes |
12160 | // it is at the end of the list. |
12161 | allnodes_iterator SortedPos = allnodes_begin(); |
12162 | |
12163 | // Visit all the nodes. Move nodes with no operands to the front of |
12164 | // the list immediately. Annotate nodes that do have operands with their |
12165 | // operand count. Before we do this, the Node Id fields of the nodes |
12166 | // may contain arbitrary values. After, the Node Id fields for nodes |
12167 | // before SortedPos will contain the topological sort index, and the |
12168 | // Node Id fields for nodes At SortedPos and after will contain the |
12169 | // count of outstanding operands. |
12170 | for (SDNode &N : llvm::make_early_inc_range(Range: allnodes())) { |
12171 | checkForCycles(N: &N, DAG: this); |
12172 | unsigned Degree = N.getNumOperands(); |
12173 | if (Degree == 0) { |
12174 | // A node with no uses, add it to the result array immediately. |
12175 | N.setNodeId(DAGSize++); |
12176 | allnodes_iterator Q(&N); |
12177 | if (Q != SortedPos) |
12178 | SortedPos = AllNodes.insert(where: SortedPos, New: AllNodes.remove(IT&: Q)); |
12179 | assert(SortedPos != AllNodes.end() && "Overran node list"); |
12180 | ++SortedPos; |
12181 | } else { |
12182 | // Temporarily use the Node Id as scratch space for the degree count. |
12183 | N.setNodeId(Degree); |
12184 | } |
12185 | } |
12186 | |
12187 | // Visit all the nodes. As we iterate, move nodes into sorted order, |
12188 | // such that by the time the end is reached all nodes will be sorted. |
12189 | for (SDNode &Node : allnodes()) { |
12190 | SDNode *N = &Node; |
12191 | checkForCycles(N, DAG: this); |
12192 | // N is in sorted position, so all its uses have one less operand |
12193 | // that needs to be sorted. |
12194 | for (SDNode *P : N->users()) { |
12195 | unsigned Degree = P->getNodeId(); |
12196 | assert(Degree != 0 && "Invalid node degree"); |
12197 | --Degree; |
12198 | if (Degree == 0) { |
12199 | // All of P's operands are sorted, so P may sorted now. |
12200 | P->setNodeId(DAGSize++); |
12201 | if (P->getIterator() != SortedPos) |
12202 | SortedPos = AllNodes.insert(where: SortedPos, New: AllNodes.remove(IT: P)); |
12203 | assert(SortedPos != AllNodes.end() && "Overran node list"); |
12204 | ++SortedPos; |
12205 | } else { |
12206 | // Update P's outstanding operand count. |
12207 | P->setNodeId(Degree); |
12208 | } |
12209 | } |
12210 | if (Node.getIterator() == SortedPos) { |
12211 | #ifndef NDEBUG |
12212 | allnodes_iterator I(N); |
12213 | SDNode *S = &*++I; |
12214 | dbgs() << "Overran sorted position:\n"; |
12215 | S->dumprFull(G: this); dbgs() << "\n"; |
12216 | dbgs() << "Checking if this is due to cycles\n"; |
12217 | checkForCycles(DAG: this, force: true); |
12218 | #endif |
12219 | llvm_unreachable(nullptr); |
12220 | } |
12221 | } |
12222 | |
12223 | assert(SortedPos == AllNodes.end() && |
12224 | "Topological sort incomplete!"); |
12225 | assert(AllNodes.front().getOpcode() == ISD::EntryToken && |
12226 | "First node in topological sort is not the entry token!"); |
12227 | assert(AllNodes.front().getNodeId() == 0 && |
12228 | "First node in topological sort has non-zero id!"); |
12229 | assert(AllNodes.front().getNumOperands() == 0 && |
12230 | "First node in topological sort has operands!"); |
12231 | assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && |
12232 | "Last node in topologic sort has unexpected id!"); |
12233 | assert(AllNodes.back().use_empty() && |
12234 | "Last node in topologic sort has users!"); |
12235 | assert(DAGSize == allnodes_size() && "Node count mismatch!"); |
12236 | return DAGSize; |
12237 | } |
12238 | |
12239 | /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the |
12240 | /// value is produced by SD. |
12241 | void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { |
12242 | for (SDNode *SD : DB->getSDNodes()) { |
12243 | if (!SD) |
12244 | continue; |
12245 | assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); |
12246 | SD->setHasDebugValue(true); |
12247 | } |
12248 | DbgInfo->add(V: DB, isParameter); |
12249 | } |
12250 | |
12251 | void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(L: DB); } |
12252 | |
12253 | SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, |
12254 | SDValue NewMemOpChain) { |
12255 | assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node"); |
12256 | assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT"); |
12257 | // The new memory operation must have the same position as the old load in |
12258 | // terms of memory dependency. Create a TokenFactor for the old load and new |
12259 | // memory operation and update uses of the old load's output chain to use that |
12260 | // TokenFactor. |
12261 | if (OldChain == NewMemOpChain || OldChain.use_empty()) |
12262 | return NewMemOpChain; |
12263 | |
12264 | SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other, |
12265 | OldChain, NewMemOpChain); |
12266 | ReplaceAllUsesOfValueWith(From: OldChain, To: TokenFactor); |
12267 | UpdateNodeOperands(N: TokenFactor.getNode(), Op1: OldChain, Op2: NewMemOpChain); |
12268 | return TokenFactor; |
12269 | } |
12270 | |
12271 | SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, |
12272 | SDValue NewMemOp) { |
12273 | assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); |
12274 | SDValue OldChain = SDValue(OldLoad, 1); |
12275 | SDValue NewMemOpChain = NewMemOp.getValue(R: 1); |
12276 | return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain); |
12277 | } |
12278 | |
12279 | SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, |
12280 | Function **OutFunction) { |
12281 | assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); |
12282 | |
12283 | auto *Symbol = cast<ExternalSymbolSDNode>(Val&: Op)->getSymbol(); |
12284 | auto *Module = MF->getFunction().getParent(); |
12285 | auto *Function = Module->getFunction(Name: Symbol); |
12286 | |
12287 | if (OutFunction != nullptr) |
12288 | *OutFunction = Function; |
12289 | |
12290 | if (Function != nullptr) { |
12291 | auto PtrTy = TLI->getPointerTy(DL: getDataLayout(), AS: Function->getAddressSpace()); |
12292 | return getGlobalAddress(GV: Function, DL: SDLoc(Op), VT: PtrTy); |
12293 | } |
12294 | |
12295 | std::string ErrorStr; |
12296 | raw_string_ostream ErrorFormatter(ErrorStr); |
12297 | ErrorFormatter << "Undefined external symbol "; |
12298 | ErrorFormatter << '"' << Symbol << '"'; |
12299 | report_fatal_error(reason: Twine(ErrorStr)); |
12300 | } |
12301 | |
12302 | //===----------------------------------------------------------------------===// |
12303 | // SDNode Class |
12304 | //===----------------------------------------------------------------------===// |
12305 | |
12306 | bool llvm::isNullConstant(SDValue V) { |
12307 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: V); |
12308 | return Const != nullptr && Const->isZero(); |
12309 | } |
12310 | |
12311 | bool llvm::isNullConstantOrUndef(SDValue V) { |
12312 | return V.isUndef() || isNullConstant(V); |
12313 | } |
12314 | |
12315 | bool llvm::isNullFPConstant(SDValue V) { |
12316 | ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(Val&: V); |
12317 | return Const != nullptr && Const->isZero() && !Const->isNegative(); |
12318 | } |
12319 | |
12320 | bool llvm::isAllOnesConstant(SDValue V) { |
12321 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: V); |
12322 | return Const != nullptr && Const->isAllOnes(); |
12323 | } |
12324 | |
12325 | bool llvm::isOneConstant(SDValue V) { |
12326 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: V); |
12327 | return Const != nullptr && Const->isOne(); |
12328 | } |
12329 | |
12330 | bool llvm::isMinSignedConstant(SDValue V) { |
12331 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: V); |
12332 | return Const != nullptr && Const->isMinSignedValue(); |
12333 | } |
12334 | |
12335 | bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, |
12336 | unsigned OperandNo) { |
12337 | // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). |
12338 | // TODO: Target-specific opcodes could be added. |
12339 | if (auto *ConstV = isConstOrConstSplat(N: V, /*AllowUndefs*/ false, |
12340 | /*AllowTruncation*/ true)) { |
12341 | APInt Const = ConstV->getAPIntValue().trunc(width: V.getScalarValueSizeInBits()); |
12342 | switch (Opcode) { |
12343 | case ISD::ADD: |
12344 | case ISD::OR: |
12345 | case ISD::XOR: |
12346 | case ISD::UMAX: |
12347 | return Const.isZero(); |
12348 | case ISD::MUL: |
12349 | return Const.isOne(); |
12350 | case ISD::AND: |
12351 | case ISD::UMIN: |
12352 | return Const.isAllOnes(); |
12353 | case ISD::SMAX: |
12354 | return Const.isMinSignedValue(); |
12355 | case ISD::SMIN: |
12356 | return Const.isMaxSignedValue(); |
12357 | case ISD::SUB: |
12358 | case ISD::SHL: |
12359 | case ISD::SRA: |
12360 | case ISD::SRL: |
12361 | return OperandNo == 1 && Const.isZero(); |
12362 | case ISD::UDIV: |
12363 | case ISD::SDIV: |
12364 | return OperandNo == 1 && Const.isOne(); |
12365 | } |
12366 | } else if (auto *ConstFP = isConstOrConstSplatFP(N: V)) { |
12367 | switch (Opcode) { |
12368 | case ISD::FADD: |
12369 | return ConstFP->isZero() && |
12370 | (Flags.hasNoSignedZeros() || ConstFP->isNegative()); |
12371 | case ISD::FSUB: |
12372 | return OperandNo == 1 && ConstFP->isZero() && |
12373 | (Flags.hasNoSignedZeros() || !ConstFP->isNegative()); |
12374 | case ISD::FMUL: |
12375 | return ConstFP->isExactlyValue(V: 1.0); |
12376 | case ISD::FDIV: |
12377 | return OperandNo == 1 && ConstFP->isExactlyValue(V: 1.0); |
12378 | case ISD::FMINNUM: |
12379 | case ISD::FMAXNUM: { |
12380 | // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. |
12381 | EVT VT = V.getValueType(); |
12382 | const fltSemantics &Semantics = VT.getFltSemantics(); |
12383 | APFloat NeutralAF = !Flags.hasNoNaNs() |
12384 | ? APFloat::getQNaN(Sem: Semantics) |
12385 | : !Flags.hasNoInfs() |
12386 | ? APFloat::getInf(Sem: Semantics) |
12387 | : APFloat::getLargest(Sem: Semantics); |
12388 | if (Opcode == ISD::FMAXNUM) |
12389 | NeutralAF.changeSign(); |
12390 | |
12391 | return ConstFP->isExactlyValue(V: NeutralAF); |
12392 | } |
12393 | } |
12394 | } |
12395 | return false; |
12396 | } |
12397 | |
12398 | SDValue llvm::peekThroughBitcasts(SDValue V) { |
12399 | while (V.getOpcode() == ISD::BITCAST) |
12400 | V = V.getOperand(i: 0); |
12401 | return V; |
12402 | } |
12403 | |
12404 | SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { |
12405 | while (V.getOpcode() == ISD::BITCAST && V.getOperand(i: 0).hasOneUse()) |
12406 | V = V.getOperand(i: 0); |
12407 | return V; |
12408 | } |
12409 | |
12410 | SDValue llvm::peekThroughExtractSubvectors(SDValue V) { |
12411 | while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) |
12412 | V = V.getOperand(i: 0); |
12413 | return V; |
12414 | } |
12415 | |
12416 | SDValue llvm::peekThroughTruncates(SDValue V) { |
12417 | while (V.getOpcode() == ISD::TRUNCATE) |
12418 | V = V.getOperand(i: 0); |
12419 | return V; |
12420 | } |
12421 | |
12422 | bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { |
12423 | if (V.getOpcode() != ISD::XOR) |
12424 | return false; |
12425 | V = peekThroughBitcasts(V: V.getOperand(i: 1)); |
12426 | unsigned NumBits = V.getScalarValueSizeInBits(); |
12427 | ConstantSDNode *C = |
12428 | isConstOrConstSplat(N: V, AllowUndefs, /*AllowTruncation*/ true); |
12429 | return C && (C->getAPIntValue().countr_one() >= NumBits); |
12430 | } |
12431 | |
12432 | ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, |
12433 | bool AllowTruncation) { |
12434 | EVT VT = N.getValueType(); |
12435 | APInt DemandedElts = VT.isFixedLengthVector() |
12436 | ? APInt::getAllOnes(numBits: VT.getVectorMinNumElements()) |
12437 | : APInt(1, 1); |
12438 | return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation); |
12439 | } |
12440 | |
12441 | ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, |
12442 | bool AllowUndefs, |
12443 | bool AllowTruncation) { |
12444 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) |
12445 | return CN; |
12446 | |
12447 | // SplatVectors can truncate their operands. Ignore that case here unless |
12448 | // AllowTruncation is set. |
12449 | if (N->getOpcode() == ISD::SPLAT_VECTOR) { |
12450 | EVT VecEltVT = N->getValueType(ResNo: 0).getVectorElementType(); |
12451 | if (auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0))) { |
12452 | EVT CVT = CN->getValueType(ResNo: 0); |
12453 | assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension"); |
12454 | if (AllowTruncation || CVT == VecEltVT) |
12455 | return CN; |
12456 | } |
12457 | } |
12458 | |
12459 | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N)) { |
12460 | BitVector UndefElements; |
12461 | ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, UndefElements: &UndefElements); |
12462 | |
12463 | // BuildVectors can truncate their operands. Ignore that case here unless |
12464 | // AllowTruncation is set. |
12465 | // TODO: Look into whether we should allow UndefElements in non-DemandedElts |
12466 | if (CN && (UndefElements.none() || AllowUndefs)) { |
12467 | EVT CVT = CN->getValueType(ResNo: 0); |
12468 | EVT NSVT = N.getValueType().getScalarType(); |
12469 | assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); |
12470 | if (AllowTruncation || (CVT == NSVT)) |
12471 | return CN; |
12472 | } |
12473 | } |
12474 | |
12475 | return nullptr; |
12476 | } |
12477 | |
12478 | ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { |
12479 | EVT VT = N.getValueType(); |
12480 | APInt DemandedElts = VT.isFixedLengthVector() |
12481 | ? APInt::getAllOnes(numBits: VT.getVectorMinNumElements()) |
12482 | : APInt(1, 1); |
12483 | return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs); |
12484 | } |
12485 | |
12486 | ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, |
12487 | const APInt &DemandedElts, |
12488 | bool AllowUndefs) { |
12489 | if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N)) |
12490 | return CN; |
12491 | |
12492 | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N)) { |
12493 | BitVector UndefElements; |
12494 | ConstantFPSDNode *CN = |
12495 | BV->getConstantFPSplatNode(DemandedElts, UndefElements: &UndefElements); |
12496 | // TODO: Look into whether we should allow UndefElements in non-DemandedElts |
12497 | if (CN && (UndefElements.none() || AllowUndefs)) |
12498 | return CN; |
12499 | } |
12500 | |
12501 | if (N.getOpcode() == ISD::SPLAT_VECTOR) |
12502 | if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val: N.getOperand(i: 0))) |
12503 | return CN; |
12504 | |
12505 | return nullptr; |
12506 | } |
12507 | |
12508 | bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { |
12509 | // TODO: may want to use peekThroughBitcast() here. |
12510 | ConstantSDNode *C = |
12511 | isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); |
12512 | return C && C->isZero(); |
12513 | } |
12514 | |
12515 | bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { |
12516 | ConstantSDNode *C = |
12517 | isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true); |
12518 | return C && C->isOne(); |
12519 | } |
12520 | |
12521 | bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { |
12522 | N = peekThroughBitcasts(V: N); |
12523 | unsigned BitWidth = N.getScalarValueSizeInBits(); |
12524 | ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); |
12525 | return C && C->isAllOnes() && C->getValueSizeInBits(ResNo: 0) == BitWidth; |
12526 | } |
12527 | |
12528 | HandleSDNode::~HandleSDNode() { |
12529 | DropOperands(); |
12530 | } |
12531 | |
12532 | MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, |
12533 | SDVTList VTs, EVT memvt, MachineMemOperand *mmo) |
12534 | : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { |
12535 | MemSDNodeBits.IsVolatile = MMO->isVolatile(); |
12536 | MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); |
12537 | MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); |
12538 | MemSDNodeBits.IsInvariant = MMO->isInvariant(); |
12539 | |
12540 | // We check here that the size of the memory operand fits within the size of |
12541 | // the MMO. This is because the MMO might indicate only a possible address |
12542 | // range instead of specifying the affected memory addresses precisely. |
12543 | assert( |
12544 | (!MMO->getType().isValid() || |
12545 | TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) && |
12546 | "Size mismatch!"); |
12547 | } |
12548 | |
12549 | /// Profile - Gather unique data for the node. |
12550 | /// |
12551 | void SDNode::Profile(FoldingSetNodeID &ID) const { |
12552 | AddNodeIDNode(ID, N: this); |
12553 | } |
12554 | |
12555 | namespace { |
12556 | |
12557 | struct EVTArray { |
12558 | std::vector<EVT> VTs; |
12559 | |
12560 | EVTArray() { |
12561 | VTs.reserve(n: MVT::VALUETYPE_SIZE); |
12562 | for (unsigned i = 0; i < MVT::VALUETYPE_SIZE; ++i) |
12563 | VTs.push_back(x: MVT((MVT::SimpleValueType)i)); |
12564 | } |
12565 | }; |
12566 | |
12567 | } // end anonymous namespace |
12568 | |
12569 | /// getValueTypeList - Return a pointer to the specified value type. |
12570 | /// |
12571 | const EVT *SDNode::getValueTypeList(MVT VT) { |
12572 | static EVTArray SimpleVTArray; |
12573 | |
12574 | assert(VT < MVT::VALUETYPE_SIZE && "Value type out of range!"); |
12575 | return &SimpleVTArray.VTs[VT.SimpleTy]; |
12576 | } |
12577 | |
12578 | /// hasAnyUseOfValue - Return true if there are any use of the indicated |
12579 | /// value. This method ignores uses of other values defined by this operation. |
12580 | bool SDNode::hasAnyUseOfValue(unsigned Value) const { |
12581 | assert(Value < getNumValues() && "Bad value!"); |
12582 | |
12583 | for (SDUse &U : uses()) |
12584 | if (U.getResNo() == Value) |
12585 | return true; |
12586 | |
12587 | return false; |
12588 | } |
12589 | |
12590 | /// isOnlyUserOf - Return true if this node is the only use of N. |
12591 | bool SDNode::isOnlyUserOf(const SDNode *N) const { |
12592 | bool Seen = false; |
12593 | for (const SDNode *User : N->users()) { |
12594 | if (User == this) |
12595 | Seen = true; |
12596 | else |
12597 | return false; |
12598 | } |
12599 | |
12600 | return Seen; |
12601 | } |
12602 | |
12603 | /// Return true if the only users of N are contained in Nodes. |
12604 | bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { |
12605 | bool Seen = false; |
12606 | for (const SDNode *User : N->users()) { |
12607 | if (llvm::is_contained(Range&: Nodes, Element: User)) |
12608 | Seen = true; |
12609 | else |
12610 | return false; |
12611 | } |
12612 | |
12613 | return Seen; |
12614 | } |
12615 | |
12616 | /// isOperand - Return true if this node is an operand of N. |
12617 | bool SDValue::isOperandOf(const SDNode *N) const { |
12618 | return is_contained(Range: N->op_values(), Element: *this); |
12619 | } |
12620 | |
12621 | bool SDNode::isOperandOf(const SDNode *N) const { |
12622 | return any_of(Range: N->op_values(), |
12623 | P: [this](SDValue Op) { return this == Op.getNode(); }); |
12624 | } |
12625 | |
12626 | /// reachesChainWithoutSideEffects - Return true if this operand (which must |
12627 | /// be a chain) reaches the specified operand without crossing any |
12628 | /// side-effecting instructions on any chain path. In practice, this looks |
12629 | /// through token factors and non-volatile loads. In order to remain efficient, |
12630 | /// this only looks a couple of nodes in, it does not do an exhaustive search. |
12631 | /// |
12632 | /// Note that we only need to examine chains when we're searching for |
12633 | /// side-effects; SelectionDAG requires that all side-effects are represented |
12634 | /// by chains, even if another operand would force a specific ordering. This |
12635 | /// constraint is necessary to allow transformations like splitting loads. |
12636 | bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, |
12637 | unsigned Depth) const { |
12638 | if (*this == Dest) return true; |
12639 | |
12640 | // Don't search too deeply, we just want to be able to see through |
12641 | // TokenFactor's etc. |
12642 | if (Depth == 0) return false; |
12643 | |
12644 | // If this is a token factor, all inputs to the TF happen in parallel. |
12645 | if (getOpcode() == ISD::TokenFactor) { |
12646 | // First, try a shallow search. |
12647 | if (is_contained(Range: (*this)->ops(), Element: Dest)) { |
12648 | // We found the chain we want as an operand of this TokenFactor. |
12649 | // Essentially, we reach the chain without side-effects if we could |
12650 | // serialize the TokenFactor into a simple chain of operations with |
12651 | // Dest as the last operation. This is automatically true if the |
12652 | // chain has one use: there are no other ordering constraints. |
12653 | // If the chain has more than one use, we give up: some other |
12654 | // use of Dest might force a side-effect between Dest and the current |
12655 | // node. |
12656 | if (Dest.hasOneUse()) |
12657 | return true; |
12658 | } |
12659 | // Next, try a deep search: check whether every operand of the TokenFactor |
12660 | // reaches Dest. |
12661 | return llvm::all_of(Range: (*this)->ops(), P: [=](SDValue Op) { |
12662 | return Op.reachesChainWithoutSideEffects(Dest, Depth: Depth - 1); |
12663 | }); |
12664 | } |
12665 | |
12666 | // Loads don't have side effects, look through them. |
12667 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val: *this)) { |
12668 | if (Ld->isUnordered()) |
12669 | return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth: Depth-1); |
12670 | } |
12671 | return false; |
12672 | } |
12673 | |
12674 | bool SDNode::hasPredecessor(const SDNode *N) const { |
12675 | SmallPtrSet<const SDNode *, 32> Visited; |
12676 | SmallVector<const SDNode *, 16> Worklist; |
12677 | Worklist.push_back(Elt: this); |
12678 | return hasPredecessorHelper(N, Visited, Worklist); |
12679 | } |
12680 | |
12681 | void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { |
12682 | this->Flags &= Flags; |
12683 | } |
12684 | |
12685 | SDValue |
12686 | SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, |
12687 | ArrayRef<ISD::NodeType> CandidateBinOps, |
12688 | bool AllowPartials) { |
12689 | // The pattern must end in an extract from index 0. |
12690 | if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
12691 | !isNullConstant(V: Extract->getOperand(Num: 1))) |
12692 | return SDValue(); |
12693 | |
12694 | // Match against one of the candidate binary ops. |
12695 | SDValue Op = Extract->getOperand(Num: 0); |
12696 | if (llvm::none_of(Range&: CandidateBinOps, P: [Op](ISD::NodeType BinOp) { |
12697 | return Op.getOpcode() == unsigned(BinOp); |
12698 | })) |
12699 | return SDValue(); |
12700 | |
12701 | // Floating-point reductions may require relaxed constraints on the final step |
12702 | // of the reduction because they may reorder intermediate operations. |
12703 | unsigned CandidateBinOp = Op.getOpcode(); |
12704 | if (Op.getValueType().isFloatingPoint()) { |
12705 | SDNodeFlags Flags = Op->getFlags(); |
12706 | switch (CandidateBinOp) { |
12707 | case ISD::FADD: |
12708 | if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) |
12709 | return SDValue(); |
12710 | break; |
12711 | default: |
12712 | llvm_unreachable("Unhandled FP opcode for binop reduction"); |
12713 | } |
12714 | } |
12715 | |
12716 | // Matching failed - attempt to see if we did enough stages that a partial |
12717 | // reduction from a subvector is possible. |
12718 | auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { |
12719 | if (!AllowPartials || !Op) |
12720 | return SDValue(); |
12721 | EVT OpVT = Op.getValueType(); |
12722 | EVT OpSVT = OpVT.getScalarType(); |
12723 | EVT SubVT = EVT::getVectorVT(Context&: *getContext(), VT: OpSVT, NumElements: NumSubElts); |
12724 | if (!TLI->isExtractSubvectorCheap(ResVT: SubVT, SrcVT: OpVT, Index: 0)) |
12725 | return SDValue(); |
12726 | BinOp = (ISD::NodeType)CandidateBinOp; |
12727 | return getExtractSubvector(DL: SDLoc(Op), VT: SubVT, Vec: Op, Idx: 0); |
12728 | }; |
12729 | |
12730 | // At each stage, we're looking for something that looks like: |
12731 | // %s = shufflevector <8 x i32> %op, <8 x i32> undef, |
12732 | // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, |
12733 | // i32 undef, i32 undef, i32 undef, i32 undef> |
12734 | // %a = binop <8 x i32> %op, %s |
12735 | // Where the mask changes according to the stage. E.g. for a 3-stage pyramid, |
12736 | // we expect something like: |
12737 | // <4,5,6,7,u,u,u,u> |
12738 | // <2,3,u,u,u,u,u,u> |
12739 | // <1,u,u,u,u,u,u,u> |
12740 | // While a partial reduction match would be: |
12741 | // <2,3,u,u,u,u,u,u> |
12742 | // <1,u,u,u,u,u,u,u> |
12743 | unsigned Stages = Log2_32(Value: Op.getValueType().getVectorNumElements()); |
12744 | SDValue PrevOp; |
12745 | for (unsigned i = 0; i < Stages; ++i) { |
12746 | unsigned MaskEnd = (1 << i); |
12747 | |
12748 | if (Op.getOpcode() != CandidateBinOp) |
12749 | return PartialReduction(PrevOp, MaskEnd); |
12750 | |
12751 | SDValue Op0 = Op.getOperand(i: 0); |
12752 | SDValue Op1 = Op.getOperand(i: 1); |
12753 | |
12754 | ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Val&: Op0); |
12755 | if (Shuffle) { |
12756 | Op = Op1; |
12757 | } else { |
12758 | Shuffle = dyn_cast<ShuffleVectorSDNode>(Val&: Op1); |
12759 | Op = Op0; |
12760 | } |
12761 | |
12762 | // The first operand of the shuffle should be the same as the other operand |
12763 | // of the binop. |
12764 | if (!Shuffle || Shuffle->getOperand(Num: 0) != Op) |
12765 | return PartialReduction(PrevOp, MaskEnd); |
12766 | |
12767 | // Verify the shuffle has the expected (at this stage of the pyramid) mask. |
12768 | for (int Index = 0; Index < (int)MaskEnd; ++Index) |
12769 | if (Shuffle->getMaskElt(Idx: Index) != (int)(MaskEnd + Index)) |
12770 | return PartialReduction(PrevOp, MaskEnd); |
12771 | |
12772 | PrevOp = Op; |
12773 | } |
12774 | |
12775 | // Handle subvector reductions, which tend to appear after the shuffle |
12776 | // reduction stages. |
12777 | while (Op.getOpcode() == CandidateBinOp) { |
12778 | unsigned NumElts = Op.getValueType().getVectorNumElements(); |
12779 | SDValue Op0 = Op.getOperand(i: 0); |
12780 | SDValue Op1 = Op.getOperand(i: 1); |
12781 | if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
12782 | Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
12783 | Op0.getOperand(i: 0) != Op1.getOperand(i: 0)) |
12784 | break; |
12785 | SDValue Src = Op0.getOperand(i: 0); |
12786 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
12787 | if (NumSrcElts != (2 * NumElts)) |
12788 | break; |
12789 | if (!(Op0.getConstantOperandAPInt(i: 1) == 0 && |
12790 | Op1.getConstantOperandAPInt(i: 1) == NumElts) && |
12791 | !(Op1.getConstantOperandAPInt(i: 1) == 0 && |
12792 | Op0.getConstantOperandAPInt(i: 1) == NumElts)) |
12793 | break; |
12794 | Op = Src; |
12795 | } |
12796 | |
12797 | BinOp = (ISD::NodeType)CandidateBinOp; |
12798 | return Op; |
12799 | } |
12800 | |
12801 | SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { |
12802 | EVT VT = N->getValueType(ResNo: 0); |
12803 | EVT EltVT = VT.getVectorElementType(); |
12804 | unsigned NE = VT.getVectorNumElements(); |
12805 | |
12806 | SDLoc dl(N); |
12807 | |
12808 | // If ResNE is 0, fully unroll the vector op. |
12809 | if (ResNE == 0) |
12810 | ResNE = NE; |
12811 | else if (NE > ResNE) |
12812 | NE = ResNE; |
12813 | |
12814 | if (N->getNumValues() == 2) { |
12815 | SmallVector<SDValue, 8> Scalars0, Scalars1; |
12816 | SmallVector<SDValue, 4> Operands(N->getNumOperands()); |
12817 | EVT VT1 = N->getValueType(ResNo: 1); |
12818 | EVT EltVT1 = VT1.getVectorElementType(); |
12819 | |
12820 | unsigned i; |
12821 | for (i = 0; i != NE; ++i) { |
12822 | for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { |
12823 | SDValue Operand = N->getOperand(Num: j); |
12824 | EVT OperandVT = Operand.getValueType(); |
12825 | |
12826 | // A vector operand; extract a single element. |
12827 | EVT OperandEltVT = OperandVT.getVectorElementType(); |
12828 | Operands[j] = getExtractVectorElt(DL: dl, VT: OperandEltVT, Vec: Operand, Idx: i); |
12829 | } |
12830 | |
12831 | SDValue EltOp = getNode(Opcode: N->getOpcode(), DL: dl, ResultTys: {EltVT, EltVT1}, Ops: Operands); |
12832 | Scalars0.push_back(Elt: EltOp); |
12833 | Scalars1.push_back(Elt: EltOp.getValue(R: 1)); |
12834 | } |
12835 | |
12836 | for (; i < ResNE; ++i) { |
12837 | Scalars0.push_back(Elt: getUNDEF(VT: EltVT)); |
12838 | Scalars1.push_back(Elt: getUNDEF(VT: EltVT1)); |
12839 | } |
12840 | |
12841 | EVT VecVT = EVT::getVectorVT(Context&: *getContext(), VT: EltVT, NumElements: ResNE); |
12842 | EVT VecVT1 = EVT::getVectorVT(Context&: *getContext(), VT: EltVT1, NumElements: ResNE); |
12843 | SDValue Vec0 = getBuildVector(VT: VecVT, DL: dl, Ops: Scalars0); |
12844 | SDValue Vec1 = getBuildVector(VT: VecVT1, DL: dl, Ops: Scalars1); |
12845 | return getMergeValues(Ops: {Vec0, Vec1}, dl); |
12846 | } |
12847 | |
12848 | assert(N->getNumValues() == 1 && |
12849 | "Can't unroll a vector with multiple results!"); |
12850 | |
12851 | SmallVector<SDValue, 8> Scalars; |
12852 | SmallVector<SDValue, 4> Operands(N->getNumOperands()); |
12853 | |
12854 | unsigned i; |
12855 | for (i= 0; i != NE; ++i) { |
12856 | for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { |
12857 | SDValue Operand = N->getOperand(Num: j); |
12858 | EVT OperandVT = Operand.getValueType(); |
12859 | if (OperandVT.isVector()) { |
12860 | // A vector operand; extract a single element. |
12861 | EVT OperandEltVT = OperandVT.getVectorElementType(); |
12862 | Operands[j] = getExtractVectorElt(DL: dl, VT: OperandEltVT, Vec: Operand, Idx: i); |
12863 | } else { |
12864 | // A scalar operand; just use it as is. |
12865 | Operands[j] = Operand; |
12866 | } |
12867 | } |
12868 | |
12869 | switch (N->getOpcode()) { |
12870 | default: { |
12871 | Scalars.push_back(Elt: getNode(Opcode: N->getOpcode(), DL: dl, VT: EltVT, Ops: Operands, |
12872 | Flags: N->getFlags())); |
12873 | break; |
12874 | } |
12875 | case ISD::VSELECT: |
12876 | Scalars.push_back(Elt: getNode(Opcode: ISD::SELECT, DL: dl, VT: EltVT, Ops: Operands)); |
12877 | break; |
12878 | case ISD::SHL: |
12879 | case ISD::SRA: |
12880 | case ISD::SRL: |
12881 | case ISD::ROTL: |
12882 | case ISD::ROTR: |
12883 | Scalars.push_back(Elt: getNode(Opcode: N->getOpcode(), DL: dl, VT: EltVT, N1: Operands[0], |
12884 | N2: getShiftAmountOperand(LHSTy: Operands[0].getValueType(), |
12885 | Op: Operands[1]))); |
12886 | break; |
12887 | case ISD::SIGN_EXTEND_INREG: { |
12888 | EVT ExtVT = cast<VTSDNode>(Val&: Operands[1])->getVT().getVectorElementType(); |
12889 | Scalars.push_back(Elt: getNode(Opcode: N->getOpcode(), DL: dl, VT: EltVT, |
12890 | N1: Operands[0], |
12891 | N2: getValueType(VT: ExtVT))); |
12892 | break; |
12893 | } |
12894 | case ISD::ADDRSPACECAST: { |
12895 | const auto *ASC = cast<AddrSpaceCastSDNode>(Val: N); |
12896 | Scalars.push_back(Elt: getAddrSpaceCast(dl, VT: EltVT, Ptr: Operands[0], |
12897 | SrcAS: ASC->getSrcAddressSpace(), |
12898 | DestAS: ASC->getDestAddressSpace())); |
12899 | break; |
12900 | } |
12901 | } |
12902 | } |
12903 | |
12904 | for (; i < ResNE; ++i) |
12905 | Scalars.push_back(Elt: getUNDEF(VT: EltVT)); |
12906 | |
12907 | EVT VecVT = EVT::getVectorVT(Context&: *getContext(), VT: EltVT, NumElements: ResNE); |
12908 | return getBuildVector(VT: VecVT, DL: dl, Ops: Scalars); |
12909 | } |
12910 | |
12911 | std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( |
12912 | SDNode *N, unsigned ResNE) { |
12913 | unsigned Opcode = N->getOpcode(); |
12914 | assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || |
12915 | Opcode == ISD::USUBO || Opcode == ISD::SSUBO || |
12916 | Opcode == ISD::UMULO || Opcode == ISD::SMULO) && |
12917 | "Expected an overflow opcode"); |
12918 | |
12919 | EVT ResVT = N->getValueType(ResNo: 0); |
12920 | EVT OvVT = N->getValueType(ResNo: 1); |
12921 | EVT ResEltVT = ResVT.getVectorElementType(); |
12922 | EVT OvEltVT = OvVT.getVectorElementType(); |
12923 | SDLoc dl(N); |
12924 | |
12925 | // If ResNE is 0, fully unroll the vector op. |
12926 | unsigned NE = ResVT.getVectorNumElements(); |
12927 | if (ResNE == 0) |
12928 | ResNE = NE; |
12929 | else if (NE > ResNE) |
12930 | NE = ResNE; |
12931 | |
12932 | SmallVector<SDValue, 8> LHSScalars; |
12933 | SmallVector<SDValue, 8> RHSScalars; |
12934 | ExtractVectorElements(Op: N->getOperand(Num: 0), Args&: LHSScalars, Start: 0, Count: NE); |
12935 | ExtractVectorElements(Op: N->getOperand(Num: 1), Args&: RHSScalars, Start: 0, Count: NE); |
12936 | |
12937 | EVT SVT = TLI->getSetCCResultType(DL: getDataLayout(), Context&: *getContext(), VT: ResEltVT); |
12938 | SDVTList VTs = getVTList(VT1: ResEltVT, VT2: SVT); |
12939 | SmallVector<SDValue, 8> ResScalars; |
12940 | SmallVector<SDValue, 8> OvScalars; |
12941 | for (unsigned i = 0; i < NE; ++i) { |
12942 | SDValue Res = getNode(Opcode, DL: dl, VTList: VTs, N1: LHSScalars[i], N2: RHSScalars[i]); |
12943 | SDValue Ov = |
12944 | getSelect(DL: dl, VT: OvEltVT, Cond: Res.getValue(R: 1), |
12945 | LHS: getBoolConstant(V: true, DL: dl, VT: OvEltVT, OpVT: ResVT), |
12946 | RHS: getConstant(Val: 0, DL: dl, VT: OvEltVT)); |
12947 | |
12948 | ResScalars.push_back(Elt: Res); |
12949 | OvScalars.push_back(Elt: Ov); |
12950 | } |
12951 | |
12952 | ResScalars.append(NumInputs: ResNE - NE, Elt: getUNDEF(VT: ResEltVT)); |
12953 | OvScalars.append(NumInputs: ResNE - NE, Elt: getUNDEF(VT: OvEltVT)); |
12954 | |
12955 | EVT NewResVT = EVT::getVectorVT(Context&: *getContext(), VT: ResEltVT, NumElements: ResNE); |
12956 | EVT NewOvVT = EVT::getVectorVT(Context&: *getContext(), VT: OvEltVT, NumElements: ResNE); |
12957 | return std::make_pair(x: getBuildVector(VT: NewResVT, DL: dl, Ops: ResScalars), |
12958 | y: getBuildVector(VT: NewOvVT, DL: dl, Ops: OvScalars)); |
12959 | } |
12960 | |
12961 | bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, |
12962 | LoadSDNode *Base, |
12963 | unsigned Bytes, |
12964 | int Dist) const { |
12965 | if (LD->isVolatile() || Base->isVolatile()) |
12966 | return false; |
12967 | // TODO: probably too restrictive for atomics, revisit |
12968 | if (!LD->isSimple()) |
12969 | return false; |
12970 | if (LD->isIndexed() || Base->isIndexed()) |
12971 | return false; |
12972 | if (LD->getChain() != Base->getChain()) |
12973 | return false; |
12974 | EVT VT = LD->getMemoryVT(); |
12975 | if (VT.getSizeInBits() / 8 != Bytes) |
12976 | return false; |
12977 | |
12978 | auto BaseLocDecomp = BaseIndexOffset::match(N: Base, DAG: *this); |
12979 | auto LocDecomp = BaseIndexOffset::match(N: LD, DAG: *this); |
12980 | |
12981 | int64_t Offset = 0; |
12982 | if (BaseLocDecomp.equalBaseIndex(Other: LocDecomp, DAG: *this, Off&: Offset)) |
12983 | return (Dist * (int64_t)Bytes == Offset); |
12984 | return false; |
12985 | } |
12986 | |
12987 | /// InferPtrAlignment - Infer alignment of a load / store address. Return |
12988 | /// std::nullopt if it cannot be inferred. |
12989 | MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { |
12990 | // If this is a GlobalAddress + cst, return the alignment. |
12991 | const GlobalValue *GV = nullptr; |
12992 | int64_t GVOffset = 0; |
12993 | if (TLI->isGAPlusOffset(N: Ptr.getNode(), GA&: GV, Offset&: GVOffset)) { |
12994 | unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); |
12995 | KnownBits Known(PtrWidth); |
12996 | llvm::computeKnownBits(V: GV, Known, DL: getDataLayout()); |
12997 | unsigned AlignBits = Known.countMinTrailingZeros(); |
12998 | if (AlignBits) |
12999 | return commonAlignment(A: Align(1ull << std::min(a: 31U, b: AlignBits)), Offset: GVOffset); |
13000 | } |
13001 | |
13002 | // If this is a direct reference to a stack slot, use information about the |
13003 | // stack slot's alignment. |
13004 | int FrameIdx = INT_MIN; |
13005 | int64_t FrameOffset = 0; |
13006 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Ptr)) { |
13007 | FrameIdx = FI->getIndex(); |
13008 | } else if (isBaseWithConstantOffset(Op: Ptr) && |
13009 | isa<FrameIndexSDNode>(Val: Ptr.getOperand(i: 0))) { |
13010 | // Handle FI+Cst |
13011 | FrameIdx = cast<FrameIndexSDNode>(Val: Ptr.getOperand(i: 0))->getIndex(); |
13012 | FrameOffset = Ptr.getConstantOperandVal(i: 1); |
13013 | } |
13014 | |
13015 | if (FrameIdx != INT_MIN) { |
13016 | const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); |
13017 | return commonAlignment(A: MFI.getObjectAlign(ObjectIdx: FrameIdx), Offset: FrameOffset); |
13018 | } |
13019 | |
13020 | return std::nullopt; |
13021 | } |
13022 | |
13023 | /// Split the scalar node with EXTRACT_ELEMENT using the provided |
13024 | /// VTs and return the low/high part. |
13025 | std::pair<SDValue, SDValue> SelectionDAG::SplitScalar(const SDValue &N, |
13026 | const SDLoc &DL, |
13027 | const EVT &LoVT, |
13028 | const EVT &HiVT) { |
13029 | assert(!LoVT.isVector() && !HiVT.isVector() && !N.getValueType().isVector() && |
13030 | "Split node must be a scalar type"); |
13031 | SDValue Lo = |
13032 | getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: LoVT, N1: N, N2: getIntPtrConstant(Val: 0, DL)); |
13033 | SDValue Hi = |
13034 | getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: HiVT, N1: N, N2: getIntPtrConstant(Val: 1, DL)); |
13035 | return std::make_pair(x&: Lo, y&: Hi); |
13036 | } |
13037 | |
13038 | /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type |
13039 | /// which is split (or expanded) into two not necessarily identical pieces. |
13040 | std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { |
13041 | // Currently all types are split in half. |
13042 | EVT LoVT, HiVT; |
13043 | if (!VT.isVector()) |
13044 | LoVT = HiVT = TLI->getTypeToTransformTo(Context&: *getContext(), VT); |
13045 | else |
13046 | LoVT = HiVT = VT.getHalfNumVectorElementsVT(Context&: *getContext()); |
13047 | |
13048 | return std::make_pair(x&: LoVT, y&: HiVT); |
13049 | } |
13050 | |
13051 | /// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a |
13052 | /// type, dependent on an enveloping VT that has been split into two identical |
13053 | /// pieces. Sets the HiIsEmpty flag when hi type has zero storage size. |
13054 | std::pair<EVT, EVT> |
13055 | SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, |
13056 | bool *HiIsEmpty) const { |
13057 | EVT EltTp = VT.getVectorElementType(); |
13058 | // Examples: |
13059 | // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) |
13060 | // custom VL=9 with enveloping VL=8/8 yields 8/1 |
13061 | // custom VL=10 with enveloping VL=8/8 yields 8/2 |
13062 | // etc. |
13063 | ElementCount VTNumElts = VT.getVectorElementCount(); |
13064 | ElementCount EnvNumElts = EnvVT.getVectorElementCount(); |
13065 | assert(VTNumElts.isScalable() == EnvNumElts.isScalable() && |
13066 | "Mixing fixed width and scalable vectors when enveloping a type"); |
13067 | EVT LoVT, HiVT; |
13068 | if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { |
13069 | LoVT = EVT::getVectorVT(Context&: *getContext(), VT: EltTp, EC: EnvNumElts); |
13070 | HiVT = EVT::getVectorVT(Context&: *getContext(), VT: EltTp, EC: VTNumElts - EnvNumElts); |
13071 | *HiIsEmpty = false; |
13072 | } else { |
13073 | // Flag that hi type has zero storage size, but return split envelop type |
13074 | // (this would be easier if vector types with zero elements were allowed). |
13075 | LoVT = EVT::getVectorVT(Context&: *getContext(), VT: EltTp, EC: VTNumElts); |
13076 | HiVT = EVT::getVectorVT(Context&: *getContext(), VT: EltTp, EC: EnvNumElts); |
13077 | *HiIsEmpty = true; |
13078 | } |
13079 | return std::make_pair(x&: LoVT, y&: HiVT); |
13080 | } |
13081 | |
13082 | /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the |
13083 | /// low/high part. |
13084 | std::pair<SDValue, SDValue> |
13085 | SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, |
13086 | const EVT &HiVT) { |
13087 | assert(LoVT.isScalableVector() == HiVT.isScalableVector() && |
13088 | LoVT.isScalableVector() == N.getValueType().isScalableVector() && |
13089 | "Splitting vector with an invalid mixture of fixed and scalable " |
13090 | "vector types"); |
13091 | assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <= |
13092 | N.getValueType().getVectorMinNumElements() && |
13093 | "More vector elements requested than available!"); |
13094 | SDValue Lo, Hi; |
13095 | Lo = getExtractSubvector(DL, VT: LoVT, Vec: N, Idx: 0); |
13096 | // For scalable vectors it is safe to use LoVT.getVectorMinNumElements() |
13097 | // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales |
13098 | // IDX with the runtime scaling factor of the result vector type. For |
13099 | // fixed-width result vectors, that runtime scaling factor is 1. |
13100 | Hi = getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HiVT, N1: N, |
13101 | N2: getVectorIdxConstant(Val: LoVT.getVectorMinNumElements(), DL)); |
13102 | return std::make_pair(x&: Lo, y&: Hi); |
13103 | } |
13104 | |
13105 | std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT, |
13106 | const SDLoc &DL) { |
13107 | // Split the vector length parameter. |
13108 | // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). |
13109 | EVT VT = N.getValueType(); |
13110 | assert(VecVT.getVectorElementCount().isKnownEven() && |
13111 | "Expecting the mask to be an evenly-sized vector"); |
13112 | unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; |
13113 | SDValue HalfNumElts = |
13114 | VecVT.isFixedLengthVector() |
13115 | ? getConstant(Val: HalfMinNumElts, DL, VT) |
13116 | : getVScale(DL, VT, MulImm: APInt(VT.getScalarSizeInBits(), HalfMinNumElts)); |
13117 | SDValue Lo = getNode(Opcode: ISD::UMIN, DL, VT, N1: N, N2: HalfNumElts); |
13118 | SDValue Hi = getNode(Opcode: ISD::USUBSAT, DL, VT, N1: N, N2: HalfNumElts); |
13119 | return std::make_pair(x&: Lo, y&: Hi); |
13120 | } |
13121 | |
13122 | /// Widen the vector up to the next power of two using INSERT_SUBVECTOR. |
13123 | SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { |
13124 | EVT VT = N.getValueType(); |
13125 | EVT WideVT = EVT::getVectorVT(Context&: *getContext(), VT: VT.getVectorElementType(), |
13126 | NumElements: NextPowerOf2(A: VT.getVectorNumElements())); |
13127 | return getInsertSubvector(DL, Vec: getUNDEF(VT: WideVT), SubVec: N, Idx: 0); |
13128 | } |
13129 | |
13130 | void SelectionDAG::ExtractVectorElements(SDValue Op, |
13131 | SmallVectorImpl<SDValue> &Args, |
13132 | unsigned Start, unsigned Count, |
13133 | EVT EltVT) { |
13134 | EVT VT = Op.getValueType(); |
13135 | if (Count == 0) |
13136 | Count = VT.getVectorNumElements(); |
13137 | if (EltVT == EVT()) |
13138 | EltVT = VT.getVectorElementType(); |
13139 | SDLoc SL(Op); |
13140 | for (unsigned i = Start, e = Start + Count; i != e; ++i) { |
13141 | Args.push_back(Elt: getExtractVectorElt(DL: SL, VT: EltVT, Vec: Op, Idx: i)); |
13142 | } |
13143 | } |
13144 | |
13145 | // getAddressSpace - Return the address space this GlobalAddress belongs to. |
13146 | unsigned GlobalAddressSDNode::getAddressSpace() const { |
13147 | return getGlobal()->getType()->getAddressSpace(); |
13148 | } |
13149 | |
13150 | Type *ConstantPoolSDNode::getType() const { |
13151 | if (isMachineConstantPoolEntry()) |
13152 | return Val.MachineCPVal->getType(); |
13153 | return Val.ConstVal->getType(); |
13154 | } |
13155 | |
13156 | bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, |
13157 | unsigned &SplatBitSize, |
13158 | bool &HasAnyUndefs, |
13159 | unsigned MinSplatBits, |
13160 | bool IsBigEndian) const { |
13161 | EVT VT = getValueType(ResNo: 0); |
13162 | assert(VT.isVector() && "Expected a vector type"); |
13163 | unsigned VecWidth = VT.getSizeInBits(); |
13164 | if (MinSplatBits > VecWidth) |
13165 | return false; |
13166 | |
13167 | // FIXME: The widths are based on this node's type, but build vectors can |
13168 | // truncate their operands. |
13169 | SplatValue = APInt(VecWidth, 0); |
13170 | SplatUndef = APInt(VecWidth, 0); |
13171 | |
13172 | // Get the bits. Bits with undefined values (when the corresponding element |
13173 | // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared |
13174 | // in SplatValue. If any of the values are not constant, give up and return |
13175 | // false. |
13176 | unsigned int NumOps = getNumOperands(); |
13177 | assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); |
13178 | unsigned EltWidth = VT.getScalarSizeInBits(); |
13179 | |
13180 | for (unsigned j = 0; j < NumOps; ++j) { |
13181 | unsigned i = IsBigEndian ? NumOps - 1 - j : j; |
13182 | SDValue OpVal = getOperand(Num: i); |
13183 | unsigned BitPos = j * EltWidth; |
13184 | |
13185 | if (OpVal.isUndef()) |
13186 | SplatUndef.setBits(loBit: BitPos, hiBit: BitPos + EltWidth); |
13187 | else if (auto *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) |
13188 | SplatValue.insertBits(SubBits: CN->getAPIntValue().zextOrTrunc(width: EltWidth), bitPosition: BitPos); |
13189 | else if (auto *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) |
13190 | SplatValue.insertBits(SubBits: CN->getValueAPF().bitcastToAPInt(), bitPosition: BitPos); |
13191 | else |
13192 | return false; |
13193 | } |
13194 | |
13195 | // The build_vector is all constants or undefs. Find the smallest element |
13196 | // size that splats the vector. |
13197 | HasAnyUndefs = (SplatUndef != 0); |
13198 | |
13199 | // FIXME: This does not work for vectors with elements less than 8 bits. |
13200 | while (VecWidth > 8) { |
13201 | // If we can't split in half, stop here. |
13202 | if (VecWidth & 1) |
13203 | break; |
13204 | |
13205 | unsigned HalfSize = VecWidth / 2; |
13206 | APInt HighValue = SplatValue.extractBits(numBits: HalfSize, bitPosition: HalfSize); |
13207 | APInt LowValue = SplatValue.extractBits(numBits: HalfSize, bitPosition: 0); |
13208 | APInt HighUndef = SplatUndef.extractBits(numBits: HalfSize, bitPosition: HalfSize); |
13209 | APInt LowUndef = SplatUndef.extractBits(numBits: HalfSize, bitPosition: 0); |
13210 | |
13211 | // If the two halves do not match (ignoring undef bits), stop here. |
13212 | if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || |
13213 | MinSplatBits > HalfSize) |
13214 | break; |
13215 | |
13216 | SplatValue = HighValue | LowValue; |
13217 | SplatUndef = HighUndef & LowUndef; |
13218 | |
13219 | VecWidth = HalfSize; |
13220 | } |
13221 | |
13222 | // FIXME: The loop above only tries to split in halves. But if the input |
13223 | // vector for example is <3 x i16> it wouldn't be able to detect a |
13224 | // SplatBitSize of 16. No idea if that is a design flaw currently limiting |
13225 | // optimizations. I guess that back in the days when this helper was created |
13226 | // vectors normally was power-of-2 sized. |
13227 | |
13228 | SplatBitSize = VecWidth; |
13229 | return true; |
13230 | } |
13231 | |
13232 | SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, |
13233 | BitVector *UndefElements) const { |
13234 | unsigned NumOps = getNumOperands(); |
13235 | if (UndefElements) { |
13236 | UndefElements->clear(); |
13237 | UndefElements->resize(N: NumOps); |
13238 | } |
13239 | assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); |
13240 | if (!DemandedElts) |
13241 | return SDValue(); |
13242 | SDValue Splatted; |
13243 | for (unsigned i = 0; i != NumOps; ++i) { |
13244 | if (!DemandedElts[i]) |
13245 | continue; |
13246 | SDValue Op = getOperand(Num: i); |
13247 | if (Op.isUndef()) { |
13248 | if (UndefElements) |
13249 | (*UndefElements)[i] = true; |
13250 | } else if (!Splatted) { |
13251 | Splatted = Op; |
13252 | } else if (Splatted != Op) { |
13253 | return SDValue(); |
13254 | } |
13255 | } |
13256 | |
13257 | if (!Splatted) { |
13258 | unsigned FirstDemandedIdx = DemandedElts.countr_zero(); |
13259 | assert(getOperand(FirstDemandedIdx).isUndef() && |
13260 | "Can only have a splat without a constant for all undefs."); |
13261 | return getOperand(Num: FirstDemandedIdx); |
13262 | } |
13263 | |
13264 | return Splatted; |
13265 | } |
13266 | |
13267 | SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { |
13268 | APInt DemandedElts = APInt::getAllOnes(numBits: getNumOperands()); |
13269 | return getSplatValue(DemandedElts, UndefElements); |
13270 | } |
13271 | |
13272 | bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts, |
13273 | SmallVectorImpl<SDValue> &Sequence, |
13274 | BitVector *UndefElements) const { |
13275 | unsigned NumOps = getNumOperands(); |
13276 | Sequence.clear(); |
13277 | if (UndefElements) { |
13278 | UndefElements->clear(); |
13279 | UndefElements->resize(N: NumOps); |
13280 | } |
13281 | assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); |
13282 | if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(Value: NumOps)) |
13283 | return false; |
13284 | |
13285 | // Set the undefs even if we don't find a sequence (like getSplatValue). |
13286 | if (UndefElements) |
13287 | for (unsigned I = 0; I != NumOps; ++I) |
13288 | if (DemandedElts[I] && getOperand(Num: I).isUndef()) |
13289 | (*UndefElements)[I] = true; |
13290 | |
13291 | // Iteratively widen the sequence length looking for repetitions. |
13292 | for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) { |
13293 | Sequence.append(NumInputs: SeqLen, Elt: SDValue()); |
13294 | for (unsigned I = 0; I != NumOps; ++I) { |
13295 | if (!DemandedElts[I]) |
13296 | continue; |
13297 | SDValue &SeqOp = Sequence[I % SeqLen]; |
13298 | SDValue Op = getOperand(Num: I); |
13299 | if (Op.isUndef()) { |
13300 | if (!SeqOp) |
13301 | SeqOp = Op; |
13302 | continue; |
13303 | } |
13304 | if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) { |
13305 | Sequence.clear(); |
13306 | break; |
13307 | } |
13308 | SeqOp = Op; |
13309 | } |
13310 | if (!Sequence.empty()) |
13311 | return true; |
13312 | } |
13313 | |
13314 | assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern"); |
13315 | return false; |
13316 | } |
13317 | |
13318 | bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, |
13319 | BitVector *UndefElements) const { |
13320 | APInt DemandedElts = APInt::getAllOnes(numBits: getNumOperands()); |
13321 | return getRepeatedSequence(DemandedElts, Sequence, UndefElements); |
13322 | } |
13323 | |
13324 | ConstantSDNode * |
13325 | BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, |
13326 | BitVector *UndefElements) const { |
13327 | return dyn_cast_or_null<ConstantSDNode>( |
13328 | Val: getSplatValue(DemandedElts, UndefElements)); |
13329 | } |
13330 | |
13331 | ConstantSDNode * |
13332 | BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { |
13333 | return dyn_cast_or_null<ConstantSDNode>(Val: getSplatValue(UndefElements)); |
13334 | } |
13335 | |
13336 | ConstantFPSDNode * |
13337 | BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, |
13338 | BitVector *UndefElements) const { |
13339 | return dyn_cast_or_null<ConstantFPSDNode>( |
13340 | Val: getSplatValue(DemandedElts, UndefElements)); |
13341 | } |
13342 | |
13343 | ConstantFPSDNode * |
13344 | BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { |
13345 | return dyn_cast_or_null<ConstantFPSDNode>(Val: getSplatValue(UndefElements)); |
13346 | } |
13347 | |
13348 | int32_t |
13349 | BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, |
13350 | uint32_t BitWidth) const { |
13351 | if (ConstantFPSDNode *CN = |
13352 | dyn_cast_or_null<ConstantFPSDNode>(Val: getSplatValue(UndefElements))) { |
13353 | bool IsExact; |
13354 | APSInt IntVal(BitWidth); |
13355 | const APFloat &APF = CN->getValueAPF(); |
13356 | if (APF.convertToInteger(Result&: IntVal, RM: APFloat::rmTowardZero, IsExact: &IsExact) != |
13357 | APFloat::opOK || |
13358 | !IsExact) |
13359 | return -1; |
13360 | |
13361 | return IntVal.exactLogBase2(); |
13362 | } |
13363 | return -1; |
13364 | } |
13365 | |
13366 | bool BuildVectorSDNode::getConstantRawBits( |
13367 | bool IsLittleEndian, unsigned DstEltSizeInBits, |
13368 | SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const { |
13369 | // Early-out if this contains anything but Undef/Constant/ConstantFP. |
13370 | if (!isConstant()) |
13371 | return false; |
13372 | |
13373 | unsigned NumSrcOps = getNumOperands(); |
13374 | unsigned SrcEltSizeInBits = getValueType(ResNo: 0).getScalarSizeInBits(); |
13375 | assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && |
13376 | "Invalid bitcast scale"); |
13377 | |
13378 | // Extract raw src bits. |
13379 | SmallVector<APInt> SrcBitElements(NumSrcOps, |
13380 | APInt::getZero(numBits: SrcEltSizeInBits)); |
13381 | BitVector SrcUndeElements(NumSrcOps, false); |
13382 | |
13383 | for (unsigned I = 0; I != NumSrcOps; ++I) { |
13384 | SDValue Op = getOperand(Num: I); |
13385 | if (Op.isUndef()) { |
13386 | SrcUndeElements.set(I); |
13387 | continue; |
13388 | } |
13389 | auto *CInt = dyn_cast<ConstantSDNode>(Val&: Op); |
13390 | auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op); |
13391 | assert((CInt || CFP) && "Unknown constant"); |
13392 | SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(width: SrcEltSizeInBits) |
13393 | : CFP->getValueAPF().bitcastToAPInt(); |
13394 | } |
13395 | |
13396 | // Recast to dst width. |
13397 | recastRawBits(IsLittleEndian, DstEltSizeInBits, DstBitElements&: RawBitElements, |
13398 | SrcBitElements, DstUndefElements&: UndefElements, SrcUndefElements: SrcUndeElements); |
13399 | return true; |
13400 | } |
13401 | |
13402 | void BuildVectorSDNode::recastRawBits(bool IsLittleEndian, |
13403 | unsigned DstEltSizeInBits, |
13404 | SmallVectorImpl<APInt> &DstBitElements, |
13405 | ArrayRef<APInt> SrcBitElements, |
13406 | BitVector &DstUndefElements, |
13407 | const BitVector &SrcUndefElements) { |
13408 | unsigned NumSrcOps = SrcBitElements.size(); |
13409 | unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth(); |
13410 | assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && |
13411 | "Invalid bitcast scale"); |
13412 | assert(NumSrcOps == SrcUndefElements.size() && |
13413 | "Vector size mismatch"); |
13414 | |
13415 | unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; |
13416 | DstUndefElements.clear(); |
13417 | DstUndefElements.resize(N: NumDstOps, t: false); |
13418 | DstBitElements.assign(NumElts: NumDstOps, Elt: APInt::getZero(numBits: DstEltSizeInBits)); |
13419 | |
13420 | // Concatenate src elements constant bits together into dst element. |
13421 | if (SrcEltSizeInBits <= DstEltSizeInBits) { |
13422 | unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; |
13423 | for (unsigned I = 0; I != NumDstOps; ++I) { |
13424 | DstUndefElements.set(I); |
13425 | APInt &DstBits = DstBitElements[I]; |
13426 | for (unsigned J = 0; J != Scale; ++J) { |
13427 | unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); |
13428 | if (SrcUndefElements[Idx]) |
13429 | continue; |
13430 | DstUndefElements.reset(Idx: I); |
13431 | const APInt &SrcBits = SrcBitElements[Idx]; |
13432 | assert(SrcBits.getBitWidth() == SrcEltSizeInBits && |
13433 | "Illegal constant bitwidths"); |
13434 | DstBits.insertBits(SubBits: SrcBits, bitPosition: J * SrcEltSizeInBits); |
13435 | } |
13436 | } |
13437 | return; |
13438 | } |
13439 | |
13440 | // Split src element constant bits into dst elements. |
13441 | unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; |
13442 | for (unsigned I = 0; I != NumSrcOps; ++I) { |
13443 | if (SrcUndefElements[I]) { |
13444 | DstUndefElements.set(I: I * Scale, E: (I + 1) * Scale); |
13445 | continue; |
13446 | } |
13447 | const APInt &SrcBits = SrcBitElements[I]; |
13448 | for (unsigned J = 0; J != Scale; ++J) { |
13449 | unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); |
13450 | APInt &DstBits = DstBitElements[Idx]; |
13451 | DstBits = SrcBits.extractBits(numBits: DstEltSizeInBits, bitPosition: J * DstEltSizeInBits); |
13452 | } |
13453 | } |
13454 | } |
13455 | |
13456 | bool BuildVectorSDNode::isConstant() const { |
13457 | for (const SDValue &Op : op_values()) { |
13458 | unsigned Opc = Op.getOpcode(); |
13459 | if (!Op.isUndef() && Opc != ISD::Constant && Opc != ISD::ConstantFP) |
13460 | return false; |
13461 | } |
13462 | return true; |
13463 | } |
13464 | |
13465 | std::optional<std::pair<APInt, APInt>> |
13466 | BuildVectorSDNode::isConstantSequence() const { |
13467 | unsigned NumOps = getNumOperands(); |
13468 | if (NumOps < 2) |
13469 | return std::nullopt; |
13470 | |
13471 | if (!isa<ConstantSDNode>(Val: getOperand(Num: 0)) || |
13472 | !isa<ConstantSDNode>(Val: getOperand(Num: 1))) |
13473 | return std::nullopt; |
13474 | |
13475 | unsigned EltSize = getValueType(ResNo: 0).getScalarSizeInBits(); |
13476 | APInt Start = getConstantOperandAPInt(Num: 0).trunc(width: EltSize); |
13477 | APInt Stride = getConstantOperandAPInt(Num: 1).trunc(width: EltSize) - Start; |
13478 | |
13479 | if (Stride.isZero()) |
13480 | return std::nullopt; |
13481 | |
13482 | for (unsigned i = 2; i < NumOps; ++i) { |
13483 | if (!isa<ConstantSDNode>(Val: getOperand(Num: i))) |
13484 | return std::nullopt; |
13485 | |
13486 | APInt Val = getConstantOperandAPInt(Num: i).trunc(width: EltSize); |
13487 | if (Val != (Start + (Stride * i))) |
13488 | return std::nullopt; |
13489 | } |
13490 | |
13491 | return std::make_pair(x&: Start, y&: Stride); |
13492 | } |
13493 | |
13494 | bool ShuffleVectorSDNode::isSplatMask(ArrayRef<int> Mask) { |
13495 | // Find the first non-undef value in the shuffle mask. |
13496 | unsigned i, e; |
13497 | for (i = 0, e = Mask.size(); i != e && Mask[i] < 0; ++i) |
13498 | /* search */; |
13499 | |
13500 | // If all elements are undefined, this shuffle can be considered a splat |
13501 | // (although it should eventually get simplified away completely). |
13502 | if (i == e) |
13503 | return true; |
13504 | |
13505 | // Make sure all remaining elements are either undef or the same as the first |
13506 | // non-undef value. |
13507 | for (int Idx = Mask[i]; i != e; ++i) |
13508 | if (Mask[i] >= 0 && Mask[i] != Idx) |
13509 | return false; |
13510 | return true; |
13511 | } |
13512 | |
13513 | // Returns true if it is a constant integer BuildVector or constant integer, |
13514 | // possibly hidden by a bitcast. |
13515 | bool SelectionDAG::isConstantIntBuildVectorOrConstantInt( |
13516 | SDValue N, bool AllowOpaques) const { |
13517 | N = peekThroughBitcasts(V: N); |
13518 | |
13519 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) |
13520 | return AllowOpaques || !C->isOpaque(); |
13521 | |
13522 | if (ISD::isBuildVectorOfConstantSDNodes(N: N.getNode())) |
13523 | return true; |
13524 | |
13525 | // Treat a GlobalAddress supporting constant offset folding as a |
13526 | // constant integer. |
13527 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: N)) |
13528 | if (GA->getOpcode() == ISD::GlobalAddress && |
13529 | TLI->isOffsetFoldingLegal(GA)) |
13530 | return true; |
13531 | |
13532 | if ((N.getOpcode() == ISD::SPLAT_VECTOR) && |
13533 | isa<ConstantSDNode>(Val: N.getOperand(i: 0))) |
13534 | return true; |
13535 | return false; |
13536 | } |
13537 | |
13538 | // Returns true if it is a constant float BuildVector or constant float. |
13539 | bool SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { |
13540 | if (isa<ConstantFPSDNode>(Val: N)) |
13541 | return true; |
13542 | |
13543 | if (ISD::isBuildVectorOfConstantFPSDNodes(N: N.getNode())) |
13544 | return true; |
13545 | |
13546 | if ((N.getOpcode() == ISD::SPLAT_VECTOR) && |
13547 | isa<ConstantFPSDNode>(Val: N.getOperand(i: 0))) |
13548 | return true; |
13549 | |
13550 | return false; |
13551 | } |
13552 | |
13553 | std::optional<bool> SelectionDAG::isBoolConstant(SDValue N, |
13554 | bool AllowTruncation) const { |
13555 | ConstantSDNode *Const = isConstOrConstSplat(N, AllowUndefs: false, AllowTruncation); |
13556 | if (!Const) |
13557 | return std::nullopt; |
13558 | |
13559 | const APInt &CVal = Const->getAPIntValue(); |
13560 | switch (TLI->getBooleanContents(Type: N.getValueType())) { |
13561 | case TargetLowering::ZeroOrOneBooleanContent: |
13562 | if (CVal.isOne()) |
13563 | return true; |
13564 | if (CVal.isZero()) |
13565 | return false; |
13566 | return std::nullopt; |
13567 | case TargetLowering::ZeroOrNegativeOneBooleanContent: |
13568 | if (CVal.isAllOnes()) |
13569 | return true; |
13570 | if (CVal.isZero()) |
13571 | return false; |
13572 | return std::nullopt; |
13573 | case TargetLowering::UndefinedBooleanContent: |
13574 | return CVal[0]; |
13575 | } |
13576 | llvm_unreachable("Unknown BooleanContent enum"); |
13577 | } |
13578 | |
13579 | void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { |
13580 | assert(!Node->OperandList && "Node already has operands"); |
13581 | assert(SDNode::getMaxNumOperands() >= Vals.size() && |
13582 | "too many operands to fit into SDNode"); |
13583 | SDUse *Ops = OperandRecycler.allocate( |
13584 | Cap: ArrayRecycler<SDUse>::Capacity::get(N: Vals.size()), Allocator&: OperandAllocator); |
13585 | |
13586 | bool IsDivergent = false; |
13587 | for (unsigned I = 0; I != Vals.size(); ++I) { |
13588 | Ops[I].setUser(Node); |
13589 | Ops[I].setInitial(Vals[I]); |
13590 | EVT VT = Ops[I].getValueType(); |
13591 | |
13592 | // Skip Chain. It does not carry divergence. |
13593 | if (VT != MVT::Other && |
13594 | (VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) && |
13595 | Ops[I].getNode()->isDivergent()) { |
13596 | IsDivergent = true; |
13597 | } |
13598 | } |
13599 | Node->NumOperands = Vals.size(); |
13600 | Node->OperandList = Ops; |
13601 | if (!TLI->isSDNodeAlwaysUniform(N: Node)) { |
13602 | IsDivergent |= TLI->isSDNodeSourceOfDivergence(N: Node, FLI, UA); |
13603 | Node->SDNodeBits.IsDivergent = IsDivergent; |
13604 | } |
13605 | checkForCycles(N: Node); |
13606 | } |
13607 | |
13608 | SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, |
13609 | SmallVectorImpl<SDValue> &Vals) { |
13610 | size_t Limit = SDNode::getMaxNumOperands(); |
13611 | while (Vals.size() > Limit) { |
13612 | unsigned SliceIdx = Vals.size() - Limit; |
13613 | auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(N: SliceIdx, M: Limit); |
13614 | SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); |
13615 | Vals.erase(CS: Vals.begin() + SliceIdx, CE: Vals.end()); |
13616 | Vals.emplace_back(Args&: NewTF); |
13617 | } |
13618 | return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); |
13619 | } |
13620 | |
13621 | SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, |
13622 | EVT VT, SDNodeFlags Flags) { |
13623 | switch (Opcode) { |
13624 | default: |
13625 | return SDValue(); |
13626 | case ISD::ADD: |
13627 | case ISD::OR: |
13628 | case ISD::XOR: |
13629 | case ISD::UMAX: |
13630 | return getConstant(Val: 0, DL, VT); |
13631 | case ISD::MUL: |
13632 | return getConstant(Val: 1, DL, VT); |
13633 | case ISD::AND: |
13634 | case ISD::UMIN: |
13635 | return getAllOnesConstant(DL, VT); |
13636 | case ISD::SMAX: |
13637 | return getConstant(Val: APInt::getSignedMinValue(numBits: VT.getSizeInBits()), DL, VT); |
13638 | case ISD::SMIN: |
13639 | return getConstant(Val: APInt::getSignedMaxValue(numBits: VT.getSizeInBits()), DL, VT); |
13640 | case ISD::FADD: |
13641 | // If flags allow, prefer positive zero since it's generally cheaper |
13642 | // to materialize on most targets. |
13643 | return getConstantFP(Val: Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT); |
13644 | case ISD::FMUL: |
13645 | return getConstantFP(Val: 1.0, DL, VT); |
13646 | case ISD::FMINNUM: |
13647 | case ISD::FMAXNUM: { |
13648 | // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. |
13649 | const fltSemantics &Semantics = VT.getFltSemantics(); |
13650 | APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Sem: Semantics) : |
13651 | !Flags.hasNoInfs() ? APFloat::getInf(Sem: Semantics) : |
13652 | APFloat::getLargest(Sem: Semantics); |
13653 | if (Opcode == ISD::FMAXNUM) |
13654 | NeutralAF.changeSign(); |
13655 | |
13656 | return getConstantFP(V: NeutralAF, DL, VT); |
13657 | } |
13658 | case ISD::FMINIMUM: |
13659 | case ISD::FMAXIMUM: { |
13660 | // Neutral element for fminimum is Inf or FLT_MAX, depending on FMF. |
13661 | const fltSemantics &Semantics = VT.getFltSemantics(); |
13662 | APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Sem: Semantics) |
13663 | : APFloat::getLargest(Sem: Semantics); |
13664 | if (Opcode == ISD::FMAXIMUM) |
13665 | NeutralAF.changeSign(); |
13666 | |
13667 | return getConstantFP(V: NeutralAF, DL, VT); |
13668 | } |
13669 | |
13670 | } |
13671 | } |
13672 | |
13673 | /// Helper used to make a call to a library function that has one argument of |
13674 | /// pointer type. |
13675 | /// |
13676 | /// Such functions include 'fegetmode', 'fesetenv' and some others, which are |
13677 | /// used to get or set floating-point state. They have one argument of pointer |
13678 | /// type, which points to the memory region containing bits of the |
13679 | /// floating-point state. The value returned by such function is ignored in the |
13680 | /// created call. |
13681 | /// |
13682 | /// \param LibFunc Reference to library function (value of RTLIB::Libcall). |
13683 | /// \param Ptr Pointer used to save/load state. |
13684 | /// \param InChain Ingoing token chain. |
13685 | /// \returns Outgoing chain token. |
13686 | SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, |
13687 | SDValue InChain, |
13688 | const SDLoc &DLoc) { |
13689 | assert(InChain.getValueType() == MVT::Other && "Expected token chain"); |
13690 | TargetLowering::ArgListTy Args; |
13691 | TargetLowering::ArgListEntry Entry; |
13692 | Entry.Node = Ptr; |
13693 | Entry.Ty = Ptr.getValueType().getTypeForEVT(Context&: *getContext()); |
13694 | Args.push_back(x: Entry); |
13695 | RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc); |
13696 | SDValue Callee = getExternalSymbol(Sym: TLI->getLibcallName(Call: LC), |
13697 | VT: TLI->getPointerTy(DL: getDataLayout())); |
13698 | TargetLowering::CallLoweringInfo CLI(*this); |
13699 | CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee( |
13700 | CC: TLI->getLibcallCallingConv(Call: LC), ResultType: Type::getVoidTy(C&: *getContext()), Target: Callee, |
13701 | ArgsList: std::move(Args)); |
13702 | return TLI->LowerCallTo(CLI).second; |
13703 | } |
13704 | |
13705 | void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { |
13706 | assert(From && To && "Invalid SDNode; empty source SDValue?"); |
13707 | auto I = SDEI.find(Val: From); |
13708 | if (I == SDEI.end()) |
13709 | return; |
13710 | |
13711 | // Use of operator[] on the DenseMap may cause an insertion, which invalidates |
13712 | // the iterator, hence the need to make a copy to prevent a use-after-free. |
13713 | NodeExtraInfo NEI = I->second; |
13714 | if (LLVM_LIKELY(!NEI.PCSections)) { |
13715 | // No deep copy required for the types of extra info set. |
13716 | // |
13717 | // FIXME: Investigate if other types of extra info also need deep copy. This |
13718 | // depends on the types of nodes they can be attached to: if some extra info |
13719 | // is only ever attached to nodes where a replacement To node is always the |
13720 | // node where later use and propagation of the extra info has the intended |
13721 | // semantics, no deep copy is required. |
13722 | SDEI[To] = std::move(NEI); |
13723 | return; |
13724 | } |
13725 | |
13726 | // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced |
13727 | // through the replacement of From with To. Otherwise, replacements of a node |
13728 | // (From) with more complex nodes (To and its operands) may result in lost |
13729 | // extra info where the root node (To) is insignificant in further propagating |
13730 | // and using extra info when further lowering to MIR. |
13731 | // |
13732 | // In the first step pre-populate the visited set with the nodes reachable |
13733 | // from the old From node. This avoids copying NodeExtraInfo to parts of the |
13734 | // DAG that is not new and should be left untouched. |
13735 | SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom. |
13736 | DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From. |
13737 | auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) { |
13738 | if (MaxDepth == 0) { |
13739 | // Remember this node in case we need to increase MaxDepth and continue |
13740 | // populating FromReach from this node. |
13741 | Leafs.emplace_back(Args&: N); |
13742 | return; |
13743 | } |
13744 | if (!FromReach.insert(V: N).second) |
13745 | return; |
13746 | for (const SDValue &Op : N->op_values()) |
13747 | Self(Self, Op.getNode(), MaxDepth - 1); |
13748 | }; |
13749 | |
13750 | // Copy extra info to To and all its transitive operands (that are new). |
13751 | SmallPtrSet<const SDNode *, 8> Visited; |
13752 | auto DeepCopyTo = [&](auto &&Self, const SDNode *N) { |
13753 | if (FromReach.contains(V: N)) |
13754 | return true; |
13755 | if (!Visited.insert(Ptr: N).second) |
13756 | return true; |
13757 | if (getEntryNode().getNode() == N) |
13758 | return false; |
13759 | for (const SDValue &Op : N->op_values()) { |
13760 | if (!Self(Self, Op.getNode())) |
13761 | return false; |
13762 | } |
13763 | // Copy only if entry node was not reached. |
13764 | SDEI[N] = NEI; |
13765 | return true; |
13766 | }; |
13767 | |
13768 | // We first try with a lower MaxDepth, assuming that the path to common |
13769 | // operands between From and To is relatively short. This significantly |
13770 | // improves performance in the common case. The initial MaxDepth is big |
13771 | // enough to avoid retry in the common case; the last MaxDepth is large |
13772 | // enough to avoid having to use the fallback below (and protects from |
13773 | // potential stack exhaustion from recursion). |
13774 | for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024; |
13775 | PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) { |
13776 | // StartFrom is the previous (or initial) set of leafs reachable at the |
13777 | // previous maximum depth. |
13778 | SmallVector<const SDNode *> StartFrom; |
13779 | std::swap(LHS&: StartFrom, RHS&: Leafs); |
13780 | for (const SDNode *N : StartFrom) |
13781 | VisitFrom(VisitFrom, N, MaxDepth - PrevDepth); |
13782 | if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To))) |
13783 | return; |
13784 | // This should happen very rarely (reached the entry node). |
13785 | LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth="<< MaxDepth << " too low\n"); |
13786 | assert(!Leafs.empty()); |
13787 | } |
13788 | |
13789 | // This should not happen - but if it did, that means the subgraph reachable |
13790 | // from From has depth greater or equal to maximum MaxDepth, and VisitFrom() |
13791 | // could not visit all reachable common operands. Consequently, we were able |
13792 | // to reach the entry node. |
13793 | errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n"; |
13794 | assert(false && "From subgraph too complex - increase max. MaxDepth?"); |
13795 | // Best-effort fallback if assertions disabled. |
13796 | SDEI[To] = std::move(NEI); |
13797 | } |
13798 | |
13799 | #ifndef NDEBUG |
13800 | static void checkForCyclesHelper(const SDNode *N, |
13801 | SmallPtrSetImpl<const SDNode*> &Visited, |
13802 | SmallPtrSetImpl<const SDNode*> &Checked, |
13803 | const llvm::SelectionDAG *DAG) { |
13804 | // If this node has already been checked, don't check it again. |
13805 | if (Checked.count(Ptr: N)) |
13806 | return; |
13807 | |
13808 | // If a node has already been visited on this depth-first walk, reject it as |
13809 | // a cycle. |
13810 | if (!Visited.insert(Ptr: N).second) { |
13811 | errs() << "Detected cycle in SelectionDAG\n"; |
13812 | dbgs() << "Offending node:\n"; |
13813 | N->dumprFull(G: DAG); dbgs() << "\n"; |
13814 | abort(); |
13815 | } |
13816 | |
13817 | for (const SDValue &Op : N->op_values()) |
13818 | checkForCyclesHelper(N: Op.getNode(), Visited, Checked, DAG); |
13819 | |
13820 | Checked.insert(Ptr: N); |
13821 | Visited.erase(Ptr: N); |
13822 | } |
13823 | #endif |
13824 | |
13825 | void llvm::checkForCycles(const llvm::SDNode *N, |
13826 | const llvm::SelectionDAG *DAG, |
13827 | bool force) { |
13828 | #ifndef NDEBUG |
13829 | bool check = force; |
13830 | #ifdef EXPENSIVE_CHECKS |
13831 | check = true; |
13832 | #endif // EXPENSIVE_CHECKS |
13833 | if (check) { |
13834 | assert(N && "Checking nonexistent SDNode"); |
13835 | SmallPtrSet<const SDNode*, 32> visited; |
13836 | SmallPtrSet<const SDNode*, 32> checked; |
13837 | checkForCyclesHelper(N, Visited&: visited, Checked&: checked, DAG); |
13838 | } |
13839 | #endif // !NDEBUG |
13840 | } |
13841 | |
13842 | void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { |
13843 | checkForCycles(N: DAG->getRoot().getNode(), DAG, force); |
13844 | } |
13845 |
Definitions
- makeVTList
- NodeDeleted
- NodeUpdated
- NodeInserted
- anchor
- anchor
- EnableMemCpyDAGOpt
- MaxLdStGlue
- MaxSteps
- NewSDValueDbgMsg
- getHasPredecessorMaxSteps
- isExactlyValue
- isValueValidForType
- isConstantSplatVector
- isConstantSplatVectorAllOnes
- isConstantSplatVectorAllZeros
- isBuildVectorAllOnes
- isBuildVectorAllZeros
- isBuildVectorOfConstantSDNodes
- isBuildVectorOfConstantFPSDNodes
- isVectorShrinkable
- allOperandsUndef
- isFreezeUndef
- matchUnaryPredicateImpl
- matchBinaryPredicate
- getInverseMinMaxOpcode
- getVecReduceBaseOpcode
- isVPOpcode
- isVPBinaryOp
- isVPReduction
- getVPMaskIdx
- getVPExplicitVectorLengthIdx
- getBaseOpcodeForVP
- getVPForBaseOpcode
- getExtForLoadExtType
- getSetCCSwappedOperands
- getSetCCInverseImpl
- getSetCCInverse
- getSetCCInverse
- isSignedOp
- getSetCCOrOperation
- getSetCCAndOperation
- AddNodeIDOpcode
- AddNodeIDValueTypes
- AddNodeIDOperands
- AddNodeIDOperands
- AddNodeIDNode
- AddNodeIDCustom
- AddNodeIDNode
- doNotCSE
- RemoveDeadNodes
- RemoveDeadNodes
- RemoveDeadNode
- DeleteNode
- DeleteNodeNotInCSEMaps
- add
- erase
- DeallocateNode
- verifyNode
- InsertNode
- RemoveNodeFromCSEMaps
- AddModifiedNodeToCSEMaps
- FindModifiedNodeSlot
- FindModifiedNodeSlot
- FindModifiedNodeSlot
- getEVTAlign
- SelectionDAG
- init
- ~SelectionDAG
- shouldOptForSize
- allnodes_clear
- FindNodeOrInsertPos
- FindNodeOrInsertPos
- clear
- getFPExtendOrRound
- getStrictFPExtendOrRound
- getAnyExtOrTrunc
- getSExtOrTrunc
- getZExtOrTrunc
- getBitcastedAnyExtOrTrunc
- getBitcastedSExtOrTrunc
- getBitcastedZExtOrTrunc
- getBoolExtOrTrunc
- getZeroExtendInReg
- getVPZeroExtendInReg
- getPtrExtOrTrunc
- getPtrExtendInReg
- getNegative
- getNOT
- getLogicalNOT
- getVPLogicalNOT
- getVPPtrExtOrTrunc
- getVPZExtOrTrunc
- getBoolConstant
- getConstant
- getConstant
- getConstant
- getSignedConstant
- getAllOnesConstant
- getIntPtrConstant
- getShiftAmountConstant
- getShiftAmountConstant
- getVectorIdxConstant
- getConstantFP
- getConstantFP
- getConstantFP
- getGlobalAddress
- getFrameIndex
- getJumpTable
- getJumpTableDebugInfo
- getConstantPool
- getConstantPool
- getBasicBlock
- getValueType
- getExternalSymbol
- getMCSymbol
- getTargetExternalSymbol
- getCondCode
- getVScale
- getElementCount
- getStepVector
- getStepVector
- commuteShuffle
- getVectorShuffle
- getCommutedVectorShuffle
- getRegister
- getRegisterMask
- getEHLabel
- getLabelNode
- getBlockAddress
- getSrcValue
- getMDNode
- getBitcast
- getAddrSpaceCast
- getFreeze
- getShiftAmountOperand
- canFoldStoreIntoLibCallOutputPointers
- expandMultipleResultFPLibCall
- expandVAArg
- expandVACopy
- getReducedAlign
- CreateStackTemporary
- CreateStackTemporary
- CreateStackTemporary
- FoldSetCC
- SignBitIsZero
- MaskedValueIsZero
- MaskedValueIsZero
- MaskedVectorIsZero
- MaskedValueIsAllOnes
- computeVectorKnownZeroElements
- isSplatValue
- isSplatValue
- getSplatSourceVector
- getSplatValue
- getValidShiftAmountRange
- getValidShiftAmount
- getValidShiftAmount
- getValidMinimumShiftAmount
- getValidMinimumShiftAmount
- getValidMaximumShiftAmount
- getValidMaximumShiftAmount
- computeKnownBits
- computeKnownBits
- mapOverflowResult
- computeOverflowForSignedAdd
- computeOverflowForUnsignedAdd
- computeOverflowForSignedSub
- computeOverflowForUnsignedSub
- computeOverflowForUnsignedMul
- computeOverflowForSignedMul
- isKnownToBeAPowerOfTwo
- isKnownToBeAPowerOfTwoFP
- ComputeNumSignBits
- ComputeNumSignBits
- ComputeMaxSignificantBits
- ComputeMaxSignificantBits
- isGuaranteedNotToBeUndefOrPoison
- isGuaranteedNotToBeUndefOrPoison
- canCreateUndefOrPoison
- canCreateUndefOrPoison
- isADDLike
- isBaseWithConstantOffset
- isKnownNeverNaN
- isKnownNeverNaN
- isKnownNeverZeroFloat
- isKnownNeverZero
- cannotBeOrderedNegativeFP
- isEqualTo
- getBitwiseNotOperand
- haveNoCommonBitsSetCommutative
- haveNoCommonBitsSet
- FoldSTEP_VECTOR
- FoldBUILD_VECTOR
- foldCONCAT_VECTORS
- getNode
- getNode
- getNode
- FoldValue
- FoldValueWithUndef
- FoldSymbolOffset
- isUndef
- FoldConstantArithmetic
- foldConstantFPMath
- getAssertAlign
- getNode
- canonicalizeCommutativeBinop
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getStackArgumentTokenFactor
- getMemsetValue
- getMemsetStringVal
- getMemBasePlusOffset
- getMemBasePlusOffset
- isMemSrcFromConstant
- shouldLowerMemFuncForSize
- chainLoadsAndStoresForMemcpy
- getMemcpyLoadsAndStores
- getMemmoveLoadsAndStores
- getMemsetStores
- checkAddrSpaceIsValidForLibcall
- getMemcpy
- getAtomicMemcpy
- getMemmove
- getAtomicMemmove
- getMemset
- getAtomicMemset
- getAtomic
- getAtomicCmpSwap
- getAtomic
- getAtomicLoad
- getMergeValues
- getMemIntrinsicNode
- getMemIntrinsicNode
- getLifetimeNode
- getPseudoProbeNode
- InferPointerInfo
- InferPointerInfo
- getLoad
- getLoad
- getLoad
- getLoad
- getExtLoad
- getExtLoad
- getIndexedLoad
- getStore
- getStore
- getStore
- getTruncStore
- getTruncStore
- getIndexedStore
- getLoadVP
- getLoadVP
- getLoadVP
- getLoadVP
- getExtLoadVP
- getExtLoadVP
- getIndexedLoadVP
- getStoreVP
- getTruncStoreVP
- getTruncStoreVP
- getIndexedStoreVP
- getStridedLoadVP
- getStridedLoadVP
- getExtStridedLoadVP
- getStridedStoreVP
- getTruncStridedStoreVP
- getGatherVP
- getScatterVP
- getMaskedLoad
- getIndexedMaskedLoad
- getMaskedStore
- getIndexedMaskedStore
- getMaskedGather
- getMaskedScatter
- getMaskedHistogram
- getGetFPEnv
- getSetFPEnv
- simplifySelect
- simplifyShift
- simplifyFPBinop
- getVAArg
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getNode
- getVTList
- getVTList
- getVTList
- getVTList
- getVTList
- UpdateNodeOperands
- UpdateNodeOperands
- UpdateNodeOperands
- UpdateNodeOperands
- UpdateNodeOperands
- UpdateNodeOperands
- DropOperands
- setNodeMemRefs
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- SelectNodeTo
- UpdateSDLocOnMergeSDNode
- MorphNodeTo
- mutateStrictFPToFP
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getMachineNode
- getTargetExtractSubreg
- getTargetInsertSubreg
- getNodeIfExists
- getNodeIfExists
- doesNodeExist
- getDbgValue
- getConstantDbgValue
- getFrameIndexDbgValue
- getFrameIndexDbgValue
- getVRegDbgValue
- getDbgValueList
- transferDbgValues
- salvageDebugInfo
- getDbgLabel
- RAUWUpdateListener
- NodeDeleted
- RAUWUpdateListener
- ReplaceAllUsesWith
- ReplaceAllUsesWith
- ReplaceAllUsesWith
- ReplaceAllUsesOfValueWith
- UseMemo
- operator<
- RAUOVWUpdateListener
- NodeDeleted
- RAUOVWUpdateListener
- gluePropagatesDivergence
- calculateDivergence
- updateDivergence
- CreateTopologicalOrder
- VerifyDAGDivergence
- ReplaceAllUsesOfValuesWith
- AssignTopologicalOrder
- AddDbgValue
- AddDbgLabel
- makeEquivalentMemoryOrdering
- makeEquivalentMemoryOrdering
- getSymbolFunctionGlobalAddress
- isNullConstant
- isNullConstantOrUndef
- isNullFPConstant
- isAllOnesConstant
- isOneConstant
- isMinSignedConstant
- isNeutralConstant
- peekThroughBitcasts
- peekThroughOneUseBitcasts
- peekThroughExtractSubvectors
- peekThroughTruncates
- isBitwiseNot
- isConstOrConstSplat
- isConstOrConstSplat
- isConstOrConstSplatFP
- isConstOrConstSplatFP
- isNullOrNullSplat
- isOneOrOneSplat
- isAllOnesOrAllOnesSplat
- ~HandleSDNode
- MemSDNode
- Profile
- EVTArray
- EVTArray
- getValueTypeList
- hasAnyUseOfValue
- isOnlyUserOf
- areOnlyUsersOf
- isOperandOf
- isOperandOf
- reachesChainWithoutSideEffects
- hasPredecessor
- intersectFlagsWith
- matchBinOpReduction
- UnrollVectorOp
- UnrollVectorOverflowOp
- areNonVolatileConsecutiveLoads
- InferPtrAlign
- SplitScalar
- GetSplitDestVTs
- GetDependentSplitDestVTs
- SplitVector
- SplitEVL
- WidenVector
- ExtractVectorElements
- getAddressSpace
- getType
- isConstantSplat
- getSplatValue
- getSplatValue
- getRepeatedSequence
- getRepeatedSequence
- getConstantSplatNode
- getConstantSplatNode
- getConstantFPSplatNode
- getConstantFPSplatNode
- getConstantFPSplatPow2ToLog2Int
- getConstantRawBits
- recastRawBits
- isConstant
- isConstantSequence
- isSplatMask
- isConstantIntBuildVectorOrConstantInt
- isConstantFPBuildVectorOrConstantFP
- isBoolConstant
- createOperands
- getTokenFactor
- getNeutralElement
- makeStateFunctionCall
- copyExtraInfo
- checkForCyclesHelper
- checkForCycles
Learn to use CMake with our Intro Training
Find out more