1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/VectorUtils.h"
16#include "llvm/CodeGen/CallingConvLower.h"
17#include "llvm/CodeGen/CodeGenCommonISel.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineJumpTableInfo.h"
21#include "llvm/CodeGen/MachineModuleInfoImpls.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/TargetRegisterInfo.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/Support/DivisionByConstantInfo.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/KnownBits.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Target/TargetMachine.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
40TargetLowering::TargetLowering(const TargetMachine &tm)
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
47bool TargetLowering::isPositionIndependent() const {
48 return getTargetMachine().isPositionIndependent();
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
53bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54 SDValue &Chain) const {
55 const Function &F = DAG.getMachineFunction().getFunction();
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
82bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(i: 0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
103 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
111void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SExt);
114 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ZExt);
115 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InReg);
116 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: StructRet);
117 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Nest);
118 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InAlloca);
121 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftError);
125 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
126 IndirectType = nullptr;
127 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
145TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
146 ArrayRef<SDValue> Ops,
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
153 TargetLowering::ArgListTy Args;
154 Args.reserve(n: Ops.size());
155
156 TargetLowering::ArgListEntry Entry;
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(Type: NewOp.getValueType(),
162 IsSigned: CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
166 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(x: Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error(reason: "Unsupported library call operation!");
174 SDValue Callee = DAG.getExternalSymbol(Sym: getLibcallName(Call: LC),
175 VT: getPointerTy(DL: DAG.getDataLayout()));
176
177 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
178 TargetLowering::CallLoweringInfo CLI(DAG);
179 bool signExtend = shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned: CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
183 !shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
191 .setDiscardResult(!CallOptions.IsReturnValueUsed)
192 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196}
197
198bool TargetLowering::findOptimalMemOpLowering(
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
215 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(VT: LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(VT: LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
242 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
243 isSafeMemOpType(NewVT.getSimpleVT()))
244 Found = true;
245 else if (NewVT == MVT::i64 &&
246 isOperationLegalOrCustom(Op: ISD::STORE, MVT::VT: f64) &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267 allowsMisalignedMemoryAccesses(
268 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269 Flags: MachineMemOperand::MONone, &Fast) &&
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(x: VT);
282 Size -= VTSize;
283 }
284
285 return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
290void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
297 OldRHS, Chain);
298}
299
300void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
403 EVT RetVT = getCmpLibcallReturnType();
404 SDValue Ops[2] = {NewLHS, NewRHS};
405 TargetLowering::MakeLibCallOptions CallOptions;
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
409 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
412
413 CCCode = getCmpLibcallCC(Call: LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
425 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
426 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
427 CCCode = getCmpLibcallCC(Call: LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
430 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
435 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
436 NewRHS = SDValue();
437 }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442unsigned TargetLowering::getJumpTableEncoding() const {
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
445 return MachineJumpTableInfo::EK_BlockAddress;
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
450
451 // Otherwise, use a label difference.
452 return MachineJumpTableInfo::EK_LabelDifference32;
453}
454
455SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
460 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
461 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
462 return DAG.getGLOBAL_OFFSET_TABLE(VT: getPointerTy(DL: DAG.getDataLayout()));
463
464 return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
470TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
476SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
481 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
488TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503}
504
505//===----------------------------------------------------------------------===//
506// Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
512bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
542 return false;
543
544 if (!C.isSubsetOf(RHS: DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC,
548 Flags: Op->getFlags());
549 return TLO.CombineTo(O: Op, N: NewOp);
550 }
551
552 break;
553 }
554 }
555
556 return false;
557}
558
559bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
560 const APInt &DemandedBits,
561 TargetLoweringOpt &TLO) const {
562 EVT VT = Op.getValueType();
563 APInt DemandedElts = VT.isVector()
564 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
565 : APInt(1, 1);
566 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
567}
568
569/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
570/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
571/// but it could be generalized for targets with other types of implicit
572/// widening casts.
573bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
574 const APInt &DemandedBits,
575 TargetLoweringOpt &TLO) const {
576 assert(Op.getNumOperands() == 2 &&
577 "ShrinkDemandedOp only supports binary operators!");
578 assert(Op.getNode()->getNumValues() == 1 &&
579 "ShrinkDemandedOp only supports nodes with one result!");
580
581 EVT VT = Op.getValueType();
582 SelectionDAG &DAG = TLO.DAG;
583 SDLoc dl(Op);
584
585 // Early return, as this function cannot handle vector types.
586 if (VT.isVector())
587 return false;
588
589 // Don't do this if the node has another user, which may require the
590 // full value.
591 if (!Op.getNode()->hasOneUse())
592 return false;
593
594 // Search for the smallest integer type with free casts to and from
595 // Op's type. For expedience, just check power-of-2 integer types.
596 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
597 unsigned DemandedSize = DemandedBits.getActiveBits();
598 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
599 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
600 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
601 if (TLI.isTruncateFree(FromVT: VT, ToVT: SmallVT) && TLI.isZExtFree(FromTy: SmallVT, ToTy: VT)) {
602 // We found a type with free casts.
603 SDValue X = DAG.getNode(
604 Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
605 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
606 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)));
607 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
608 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
609 return TLO.CombineTo(O: Op, N: Z);
610 }
611 }
612 return false;
613}
614
615bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
616 DAGCombinerInfo &DCI) const {
617 SelectionDAG &DAG = DCI.DAG;
618 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
619 !DCI.isBeforeLegalizeOps());
620 KnownBits Known;
621
622 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
623 if (Simplified) {
624 DCI.AddToWorklist(N: Op.getNode());
625 DCI.CommitTargetLoweringOpt(TLO);
626 }
627 return Simplified;
628}
629
630bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
631 const APInt &DemandedElts,
632 DAGCombinerInfo &DCI) const {
633 SelectionDAG &DAG = DCI.DAG;
634 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
635 !DCI.isBeforeLegalizeOps());
636 KnownBits Known;
637
638 bool Simplified =
639 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
640 if (Simplified) {
641 DCI.AddToWorklist(N: Op.getNode());
642 DCI.CommitTargetLoweringOpt(TLO);
643 }
644 return Simplified;
645}
646
647bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
648 KnownBits &Known,
649 TargetLoweringOpt &TLO,
650 unsigned Depth,
651 bool AssumeSingleUse) const {
652 EVT VT = Op.getValueType();
653
654 // Since the number of lanes in a scalable vector is unknown at compile time,
655 // we track one bit which is implicitly broadcast to all lanes. This means
656 // that all lanes in a scalable vector are considered demanded.
657 APInt DemandedElts = VT.isFixedLengthVector()
658 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
659 : APInt(1, 1);
660 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
661 AssumeSingleUse);
662}
663
664// TODO: Under what circumstances can we create nodes? Constant folding?
665SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
666 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
667 SelectionDAG &DAG, unsigned Depth) const {
668 EVT VT = Op.getValueType();
669
670 // Limit search depth.
671 if (Depth >= SelectionDAG::MaxRecursionDepth)
672 return SDValue();
673
674 // Ignore UNDEFs.
675 if (Op.isUndef())
676 return SDValue();
677
678 // Not demanding any bits/elts from Op.
679 if (DemandedBits == 0 || DemandedElts == 0)
680 return DAG.getUNDEF(VT);
681
682 bool IsLE = DAG.getDataLayout().isLittleEndian();
683 unsigned NumElts = DemandedElts.getBitWidth();
684 unsigned BitWidth = DemandedBits.getBitWidth();
685 KnownBits LHSKnown, RHSKnown;
686 switch (Op.getOpcode()) {
687 case ISD::BITCAST: {
688 if (VT.isScalableVector())
689 return SDValue();
690
691 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
692 EVT SrcVT = Src.getValueType();
693 EVT DstVT = Op.getValueType();
694 if (SrcVT == DstVT)
695 return Src;
696
697 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
698 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
699 if (NumSrcEltBits == NumDstEltBits)
700 if (SDValue V = SimplifyMultipleUseDemandedBits(
701 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
702 return DAG.getBitcast(VT: DstVT, V);
703
704 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
705 unsigned Scale = NumDstEltBits / NumSrcEltBits;
706 unsigned NumSrcElts = SrcVT.getVectorNumElements();
707 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
708 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
709 for (unsigned i = 0; i != Scale; ++i) {
710 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
711 unsigned BitOffset = EltOffset * NumSrcEltBits;
712 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
713 if (!Sub.isZero()) {
714 DemandedSrcBits |= Sub;
715 for (unsigned j = 0; j != NumElts; ++j)
716 if (DemandedElts[j])
717 DemandedSrcElts.setBit((j * Scale) + i);
718 }
719 }
720
721 if (SDValue V = SimplifyMultipleUseDemandedBits(
722 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
723 return DAG.getBitcast(VT: DstVT, V);
724 }
725
726 // TODO - bigendian once we have test coverage.
727 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
728 unsigned Scale = NumSrcEltBits / NumDstEltBits;
729 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
730 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
731 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
732 for (unsigned i = 0; i != NumElts; ++i)
733 if (DemandedElts[i]) {
734 unsigned Offset = (i % Scale) * NumDstEltBits;
735 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
736 DemandedSrcElts.setBit(i / Scale);
737 }
738
739 if (SDValue V = SimplifyMultipleUseDemandedBits(
740 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
741 return DAG.getBitcast(VT: DstVT, V);
742 }
743
744 break;
745 }
746 case ISD::FREEZE: {
747 SDValue N0 = Op.getOperand(i: 0);
748 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
749 /*PoisonOnly=*/false))
750 return N0;
751 break;
752 }
753 case ISD::AND: {
754 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
755 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
756
757 // If all of the demanded bits are known 1 on one side, return the other.
758 // These bits cannot contribute to the result of the 'and' in this
759 // context.
760 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
761 return Op.getOperand(i: 0);
762 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
763 return Op.getOperand(i: 1);
764 break;
765 }
766 case ISD::OR: {
767 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
768 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
769
770 // If all of the demanded bits are known zero on one side, return the
771 // other. These bits cannot contribute to the result of the 'or' in this
772 // context.
773 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
774 return Op.getOperand(i: 0);
775 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
776 return Op.getOperand(i: 1);
777 break;
778 }
779 case ISD::XOR: {
780 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
781 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
782
783 // If all of the demanded bits are known zero on one side, return the
784 // other.
785 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
786 return Op.getOperand(i: 0);
787 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
788 return Op.getOperand(i: 1);
789 break;
790 }
791 case ISD::SHL: {
792 // If we are only demanding sign bits then we can use the shift source
793 // directly.
794 if (const APInt *MaxSA =
795 DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
796 SDValue Op0 = Op.getOperand(i: 0);
797 unsigned ShAmt = MaxSA->getZExtValue();
798 unsigned NumSignBits =
799 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
800 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
801 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
802 return Op0;
803 }
804 break;
805 }
806 case ISD::SETCC: {
807 SDValue Op0 = Op.getOperand(i: 0);
808 SDValue Op1 = Op.getOperand(i: 1);
809 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
810 // If (1) we only need the sign-bit, (2) the setcc operands are the same
811 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
812 // -1, we may be able to bypass the setcc.
813 if (DemandedBits.isSignMask() &&
814 Op0.getScalarValueSizeInBits() == BitWidth &&
815 getBooleanContents(Type: Op0.getValueType()) ==
816 BooleanContent::ZeroOrNegativeOneBooleanContent) {
817 // If we're testing X < 0, then this compare isn't needed - just use X!
818 // FIXME: We're limiting to integer types here, but this should also work
819 // if we don't care about FP signed-zero. The use of SETLT with FP means
820 // that we don't care about NaNs.
821 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
822 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
823 return Op0;
824 }
825 break;
826 }
827 case ISD::SIGN_EXTEND_INREG: {
828 // If none of the extended bits are demanded, eliminate the sextinreg.
829 SDValue Op0 = Op.getOperand(i: 0);
830 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
831 unsigned ExBits = ExVT.getScalarSizeInBits();
832 if (DemandedBits.getActiveBits() <= ExBits &&
833 shouldRemoveRedundantExtend(Op))
834 return Op0;
835 // If the input is already sign extended, just drop the extension.
836 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
837 if (NumSignBits >= (BitWidth - ExBits + 1))
838 return Op0;
839 break;
840 }
841 case ISD::ANY_EXTEND_VECTOR_INREG:
842 case ISD::SIGN_EXTEND_VECTOR_INREG:
843 case ISD::ZERO_EXTEND_VECTOR_INREG: {
844 if (VT.isScalableVector())
845 return SDValue();
846
847 // If we only want the lowest element and none of extended bits, then we can
848 // return the bitcasted source vector.
849 SDValue Src = Op.getOperand(i: 0);
850 EVT SrcVT = Src.getValueType();
851 EVT DstVT = Op.getValueType();
852 if (IsLE && DemandedElts == 1 &&
853 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
854 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
855 return DAG.getBitcast(VT: DstVT, V: Src);
856 }
857 break;
858 }
859 case ISD::INSERT_VECTOR_ELT: {
860 if (VT.isScalableVector())
861 return SDValue();
862
863 // If we don't demand the inserted element, return the base vector.
864 SDValue Vec = Op.getOperand(i: 0);
865 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
866 EVT VecVT = Vec.getValueType();
867 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
868 !DemandedElts[CIdx->getZExtValue()])
869 return Vec;
870 break;
871 }
872 case ISD::INSERT_SUBVECTOR: {
873 if (VT.isScalableVector())
874 return SDValue();
875
876 SDValue Vec = Op.getOperand(i: 0);
877 SDValue Sub = Op.getOperand(i: 1);
878 uint64_t Idx = Op.getConstantOperandVal(i: 2);
879 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
880 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
881 // If we don't demand the inserted subvector, return the base vector.
882 if (DemandedSubElts == 0)
883 return Vec;
884 break;
885 }
886 case ISD::VECTOR_SHUFFLE: {
887 assert(!VT.isScalableVector());
888 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
889
890 // If all the demanded elts are from one operand and are inline,
891 // then we can use the operand directly.
892 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
893 for (unsigned i = 0; i != NumElts; ++i) {
894 int M = ShuffleMask[i];
895 if (M < 0 || !DemandedElts[i])
896 continue;
897 AllUndef = false;
898 IdentityLHS &= (M == (int)i);
899 IdentityRHS &= ((M - NumElts) == i);
900 }
901
902 if (AllUndef)
903 return DAG.getUNDEF(VT: Op.getValueType());
904 if (IdentityLHS)
905 return Op.getOperand(i: 0);
906 if (IdentityRHS)
907 return Op.getOperand(i: 1);
908 break;
909 }
910 default:
911 // TODO: Probably okay to remove after audit; here to reduce change size
912 // in initial enablement patch for scalable vectors
913 if (VT.isScalableVector())
914 return SDValue();
915
916 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
917 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
918 Op, DemandedBits, DemandedElts, DAG, Depth))
919 return V;
920 break;
921 }
922 return SDValue();
923}
924
925SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
926 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
927 unsigned Depth) const {
928 EVT VT = Op.getValueType();
929 // Since the number of lanes in a scalable vector is unknown at compile time,
930 // we track one bit which is implicitly broadcast to all lanes. This means
931 // that all lanes in a scalable vector are considered demanded.
932 APInt DemandedElts = VT.isFixedLengthVector()
933 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
934 : APInt(1, 1);
935 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936 Depth);
937}
938
939SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
940 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
941 unsigned Depth) const {
942 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
943 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
944 Depth);
945}
946
947// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
948// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
949static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
950 const TargetLowering &TLI,
951 const APInt &DemandedBits,
952 const APInt &DemandedElts,
953 unsigned Depth) {
954 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
955 "SRL or SRA node is required here!");
956 // Is the right shift using an immediate value of 1?
957 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
958 if (!N1C || !N1C->isOne())
959 return SDValue();
960
961 // We are looking for an avgfloor
962 // add(ext, ext)
963 // or one of these as a avgceil
964 // add(add(ext, ext), 1)
965 // add(add(ext, 1), ext)
966 // add(ext, add(ext, 1))
967 SDValue Add = Op.getOperand(i: 0);
968 if (Add.getOpcode() != ISD::ADD)
969 return SDValue();
970
971 SDValue ExtOpA = Add.getOperand(i: 0);
972 SDValue ExtOpB = Add.getOperand(i: 1);
973 SDValue Add2;
974 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
975 ConstantSDNode *ConstOp;
976 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
977 ConstOp->isOne()) {
978 ExtOpA = Op1;
979 ExtOpB = Op3;
980 Add2 = A;
981 return true;
982 }
983 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
984 ConstOp->isOne()) {
985 ExtOpA = Op1;
986 ExtOpB = Op2;
987 Add2 = A;
988 return true;
989 }
990 return false;
991 };
992 bool IsCeil =
993 (ExtOpA.getOpcode() == ISD::ADD &&
994 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
995 (ExtOpB.getOpcode() == ISD::ADD &&
996 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
997
998 // If the shift is signed (sra):
999 // - Needs >= 2 sign bit for both operands.
1000 // - Needs >= 2 zero bits.
1001 // If the shift is unsigned (srl):
1002 // - Needs >= 1 zero bit for both operands.
1003 // - Needs 1 demanded bit zero and >= 2 sign bits.
1004 unsigned ShiftOpc = Op.getOpcode();
1005 bool IsSigned = false;
1006 unsigned KnownBits;
1007 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1008 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1009 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1010 unsigned NumZeroA =
1011 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1012 unsigned NumZeroB =
1013 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1014 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1015
1016 switch (ShiftOpc) {
1017 default:
1018 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1019 case ISD::SRA: {
1020 if (NumZero >= 2 && NumSigned < NumZero) {
1021 IsSigned = false;
1022 KnownBits = NumZero;
1023 break;
1024 }
1025 if (NumSigned >= 1) {
1026 IsSigned = true;
1027 KnownBits = NumSigned;
1028 break;
1029 }
1030 return SDValue();
1031 }
1032 case ISD::SRL: {
1033 if (NumZero >= 1 && NumSigned < NumZero) {
1034 IsSigned = false;
1035 KnownBits = NumZero;
1036 break;
1037 }
1038 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1039 IsSigned = true;
1040 KnownBits = NumSigned;
1041 break;
1042 }
1043 return SDValue();
1044 }
1045 }
1046
1047 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1048 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1049
1050 // Find the smallest power-2 type that is legal for this vector size and
1051 // operation, given the original type size and the number of known sign/zero
1052 // bits.
1053 EVT VT = Op.getValueType();
1054 unsigned MinWidth =
1055 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1056 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1057 if (VT.isVector())
1058 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1059 if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT: NVT)) {
1060 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1061 // larger type size to do the transform.
1062 if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT))
1063 return SDValue();
1064 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1065 N1: Add.getOperand(i: 1)) &&
1066 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1067 N1: Add2.getOperand(i: 1))))
1068 NVT = VT;
1069 else
1070 return SDValue();
1071 }
1072
1073 SDLoc DL(Op);
1074 SDValue ResultAVG =
1075 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1076 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1077 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1078}
1079
1080/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1081/// result of Op are ever used downstream. If we can use this information to
1082/// simplify Op, create a new simplified DAG node and return true, returning the
1083/// original and new nodes in Old and New. Otherwise, analyze the expression and
1084/// return a mask of Known bits for the expression (used to simplify the
1085/// caller). The Known bits may only be accurate for those bits in the
1086/// OriginalDemandedBits and OriginalDemandedElts.
1087bool TargetLowering::SimplifyDemandedBits(
1088 SDValue Op, const APInt &OriginalDemandedBits,
1089 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1090 unsigned Depth, bool AssumeSingleUse) const {
1091 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1092 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1093 "Mask size mismatches value type size!");
1094
1095 // Don't know anything.
1096 Known = KnownBits(BitWidth);
1097
1098 EVT VT = Op.getValueType();
1099 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1100 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1101 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1102 "Unexpected vector size");
1103
1104 APInt DemandedBits = OriginalDemandedBits;
1105 APInt DemandedElts = OriginalDemandedElts;
1106 SDLoc dl(Op);
1107
1108 // Undef operand.
1109 if (Op.isUndef())
1110 return false;
1111
1112 // We can't simplify target constants.
1113 if (Op.getOpcode() == ISD::TargetConstant)
1114 return false;
1115
1116 if (Op.getOpcode() == ISD::Constant) {
1117 // We know all of the bits for a constant!
1118 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1119 return false;
1120 }
1121
1122 if (Op.getOpcode() == ISD::ConstantFP) {
1123 // We know all of the bits for a floating point constant!
1124 Known = KnownBits::makeConstant(
1125 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1126 return false;
1127 }
1128
1129 // Other users may use these bits.
1130 bool HasMultiUse = false;
1131 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1132 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1133 // Limit search depth.
1134 return false;
1135 }
1136 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1137 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1138 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1139 HasMultiUse = true;
1140 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1141 // Not demanding any bits/elts from Op.
1142 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1143 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1144 // Limit search depth.
1145 return false;
1146 }
1147
1148 KnownBits Known2;
1149 switch (Op.getOpcode()) {
1150 case ISD::SCALAR_TO_VECTOR: {
1151 if (VT.isScalableVector())
1152 return false;
1153 if (!DemandedElts[0])
1154 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1155
1156 KnownBits SrcKnown;
1157 SDValue Src = Op.getOperand(i: 0);
1158 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1159 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1160 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1161 return true;
1162
1163 // Upper elements are undef, so only get the knownbits if we just demand
1164 // the bottom element.
1165 if (DemandedElts == 1)
1166 Known = SrcKnown.anyextOrTrunc(BitWidth);
1167 break;
1168 }
1169 case ISD::BUILD_VECTOR:
1170 // Collect the known bits that are shared by every demanded element.
1171 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1172 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1173 return false; // Don't fall through, will infinitely loop.
1174 case ISD::SPLAT_VECTOR: {
1175 SDValue Scl = Op.getOperand(i: 0);
1176 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1177 KnownBits KnownScl;
1178 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1179 return true;
1180
1181 // Implicitly truncate the bits to match the official semantics of
1182 // SPLAT_VECTOR.
1183 Known = KnownScl.trunc(BitWidth);
1184 break;
1185 }
1186 case ISD::LOAD: {
1187 auto *LD = cast<LoadSDNode>(Val&: Op);
1188 if (getTargetConstantFromLoad(LD)) {
1189 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1190 return false; // Don't fall through, will infinitely loop.
1191 }
1192 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1193 // If this is a ZEXTLoad and we are looking at the loaded value.
1194 EVT MemVT = LD->getMemoryVT();
1195 unsigned MemBits = MemVT.getScalarSizeInBits();
1196 Known.Zero.setBitsFrom(MemBits);
1197 return false; // Don't fall through, will infinitely loop.
1198 }
1199 break;
1200 }
1201 case ISD::INSERT_VECTOR_ELT: {
1202 if (VT.isScalableVector())
1203 return false;
1204 SDValue Vec = Op.getOperand(i: 0);
1205 SDValue Scl = Op.getOperand(i: 1);
1206 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1207 EVT VecVT = Vec.getValueType();
1208
1209 // If index isn't constant, assume we need all vector elements AND the
1210 // inserted element.
1211 APInt DemandedVecElts(DemandedElts);
1212 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1213 unsigned Idx = CIdx->getZExtValue();
1214 DemandedVecElts.clearBit(BitPosition: Idx);
1215
1216 // Inserted element is not required.
1217 if (!DemandedElts[Idx])
1218 return TLO.CombineTo(O: Op, N: Vec);
1219 }
1220
1221 KnownBits KnownScl;
1222 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1223 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1224 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1225 return true;
1226
1227 Known = KnownScl.anyextOrTrunc(BitWidth);
1228
1229 KnownBits KnownVec;
1230 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1231 Depth: Depth + 1))
1232 return true;
1233
1234 if (!!DemandedVecElts)
1235 Known = Known.intersectWith(RHS: KnownVec);
1236
1237 return false;
1238 }
1239 case ISD::INSERT_SUBVECTOR: {
1240 if (VT.isScalableVector())
1241 return false;
1242 // Demand any elements from the subvector and the remainder from the src its
1243 // inserted into.
1244 SDValue Src = Op.getOperand(i: 0);
1245 SDValue Sub = Op.getOperand(i: 1);
1246 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1247 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1248 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1249 APInt DemandedSrcElts = DemandedElts;
1250 DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
1251
1252 KnownBits KnownSub, KnownSrc;
1253 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1254 Depth: Depth + 1))
1255 return true;
1256 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1257 Depth: Depth + 1))
1258 return true;
1259
1260 Known.Zero.setAllBits();
1261 Known.One.setAllBits();
1262 if (!!DemandedSubElts)
1263 Known = Known.intersectWith(RHS: KnownSub);
1264 if (!!DemandedSrcElts)
1265 Known = Known.intersectWith(RHS: KnownSrc);
1266
1267 // Attempt to avoid multi-use src if we don't need anything from it.
1268 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1269 !DemandedSrcElts.isAllOnes()) {
1270 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1271 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1272 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1273 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1274 if (NewSub || NewSrc) {
1275 NewSub = NewSub ? NewSub : Sub;
1276 NewSrc = NewSrc ? NewSrc : Src;
1277 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1278 N3: Op.getOperand(i: 2));
1279 return TLO.CombineTo(O: Op, N: NewOp);
1280 }
1281 }
1282 break;
1283 }
1284 case ISD::EXTRACT_SUBVECTOR: {
1285 if (VT.isScalableVector())
1286 return false;
1287 // Offset the demanded elts by the subvector index.
1288 SDValue Src = Op.getOperand(i: 0);
1289 if (Src.getValueType().isScalableVector())
1290 break;
1291 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1292 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1293 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1294
1295 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1296 Depth: Depth + 1))
1297 return true;
1298
1299 // Attempt to avoid multi-use src if we don't need anything from it.
1300 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1301 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1302 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1303 if (DemandedSrc) {
1304 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1305 N2: Op.getOperand(i: 1));
1306 return TLO.CombineTo(O: Op, N: NewOp);
1307 }
1308 }
1309 break;
1310 }
1311 case ISD::CONCAT_VECTORS: {
1312 if (VT.isScalableVector())
1313 return false;
1314 Known.Zero.setAllBits();
1315 Known.One.setAllBits();
1316 EVT SubVT = Op.getOperand(i: 0).getValueType();
1317 unsigned NumSubVecs = Op.getNumOperands();
1318 unsigned NumSubElts = SubVT.getVectorNumElements();
1319 for (unsigned i = 0; i != NumSubVecs; ++i) {
1320 APInt DemandedSubElts =
1321 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1322 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1323 Known&: Known2, TLO, Depth: Depth + 1))
1324 return true;
1325 // Known bits are shared by every demanded subvector element.
1326 if (!!DemandedSubElts)
1327 Known = Known.intersectWith(RHS: Known2);
1328 }
1329 break;
1330 }
1331 case ISD::VECTOR_SHUFFLE: {
1332 assert(!VT.isScalableVector());
1333 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1334
1335 // Collect demanded elements from shuffle operands..
1336 APInt DemandedLHS, DemandedRHS;
1337 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1338 DemandedRHS))
1339 break;
1340
1341 if (!!DemandedLHS || !!DemandedRHS) {
1342 SDValue Op0 = Op.getOperand(i: 0);
1343 SDValue Op1 = Op.getOperand(i: 1);
1344
1345 Known.Zero.setAllBits();
1346 Known.One.setAllBits();
1347 if (!!DemandedLHS) {
1348 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1349 Depth: Depth + 1))
1350 return true;
1351 Known = Known.intersectWith(RHS: Known2);
1352 }
1353 if (!!DemandedRHS) {
1354 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1355 Depth: Depth + 1))
1356 return true;
1357 Known = Known.intersectWith(RHS: Known2);
1358 }
1359
1360 // Attempt to avoid multi-use ops if we don't need anything from them.
1361 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1362 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1363 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1364 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1365 if (DemandedOp0 || DemandedOp1) {
1366 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1367 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1368 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1369 return TLO.CombineTo(O: Op, N: NewOp);
1370 }
1371 }
1372 break;
1373 }
1374 case ISD::AND: {
1375 SDValue Op0 = Op.getOperand(i: 0);
1376 SDValue Op1 = Op.getOperand(i: 1);
1377
1378 // If the RHS is a constant, check to see if the LHS would be zero without
1379 // using the bits from the RHS. Below, we use knowledge about the RHS to
1380 // simplify the LHS, here we're using information from the LHS to simplify
1381 // the RHS.
1382 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1)) {
1383 // Do not increment Depth here; that can cause an infinite loop.
1384 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1385 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1386 if ((LHSKnown.Zero & DemandedBits) ==
1387 (~RHSC->getAPIntValue() & DemandedBits))
1388 return TLO.CombineTo(O: Op, N: Op0);
1389
1390 // If any of the set bits in the RHS are known zero on the LHS, shrink
1391 // the constant.
1392 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1393 DemandedElts, TLO))
1394 return true;
1395
1396 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1397 // constant, but if this 'and' is only clearing bits that were just set by
1398 // the xor, then this 'and' can be eliminated by shrinking the mask of
1399 // the xor. For example, for a 32-bit X:
1400 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1401 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1402 LHSKnown.One == ~RHSC->getAPIntValue()) {
1403 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1404 return TLO.CombineTo(O: Op, N: Xor);
1405 }
1406 }
1407
1408 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1409 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1410 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1411 (Op0.getOperand(i: 0).isUndef() ||
1412 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1413 Op0->hasOneUse()) {
1414 unsigned NumSubElts =
1415 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1416 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1417 APInt DemandedSub =
1418 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1419 KnownBits KnownSubMask =
1420 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1421 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1422 SDValue NewAnd =
1423 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1424 SDValue NewInsert =
1425 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1426 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1427 return TLO.CombineTo(O: Op, N: NewInsert);
1428 }
1429 }
1430
1431 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1432 Depth: Depth + 1))
1433 return true;
1434 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1435 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1436 Known&: Known2, TLO, Depth: Depth + 1))
1437 return true;
1438 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1439
1440 // If all of the demanded bits are known one on one side, return the other.
1441 // These bits cannot contribute to the result of the 'and'.
1442 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1443 return TLO.CombineTo(O: Op, N: Op0);
1444 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1445 return TLO.CombineTo(O: Op, N: Op1);
1446 // If all of the demanded bits in the inputs are known zeros, return zero.
1447 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1448 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1449 // If the RHS is a constant, see if we can simplify it.
1450 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1451 TLO))
1452 return true;
1453 // If the operation can be done in a smaller type, do so.
1454 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1455 return true;
1456
1457 // Attempt to avoid multi-use ops if we don't need anything from them.
1458 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1459 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1460 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1461 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1462 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1463 if (DemandedOp0 || DemandedOp1) {
1464 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1465 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1466 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1467 return TLO.CombineTo(O: Op, N: NewOp);
1468 }
1469 }
1470
1471 Known &= Known2;
1472 break;
1473 }
1474 case ISD::OR: {
1475 SDValue Op0 = Op.getOperand(i: 0);
1476 SDValue Op1 = Op.getOperand(i: 1);
1477 SDNodeFlags Flags = Op.getNode()->getFlags();
1478 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1479 Depth: Depth + 1)) {
1480 if (Flags.hasDisjoint()) {
1481 Flags.setDisjoint(false);
1482 Op->setFlags(Flags);
1483 }
1484 return true;
1485 }
1486 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1487 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1488 Known&: Known2, TLO, Depth: Depth + 1)) {
1489 if (Flags.hasDisjoint()) {
1490 Flags.setDisjoint(false);
1491 Op->setFlags(Flags);
1492 }
1493 return true;
1494 }
1495 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1496
1497 // If all of the demanded bits are known zero on one side, return the other.
1498 // These bits cannot contribute to the result of the 'or'.
1499 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1500 return TLO.CombineTo(O: Op, N: Op0);
1501 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1502 return TLO.CombineTo(O: Op, N: Op1);
1503 // If the RHS is a constant, see if we can simplify it.
1504 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1505 return true;
1506 // If the operation can be done in a smaller type, do so.
1507 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1508 return true;
1509
1510 // Attempt to avoid multi-use ops if we don't need anything from them.
1511 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1512 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1513 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1514 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1515 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1516 if (DemandedOp0 || DemandedOp1) {
1517 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1518 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1519 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1520 return TLO.CombineTo(O: Op, N: NewOp);
1521 }
1522 }
1523
1524 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1525 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1526 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1527 Op0->hasOneUse() && Op1->hasOneUse()) {
1528 // Attempt to match all commutations - m_c_Or would've been useful!
1529 for (int I = 0; I != 2; ++I) {
1530 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1531 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1532 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1533 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1534 if (Alt.getOpcode() == ISD::OR) {
1535 for (int J = 0; J != 2; ++J) {
1536 if (X == Alt.getOperand(i: J)) {
1537 SDValue Y = Alt.getOperand(i: 1 - J);
1538 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1539 Ops: {C1, C2})) {
1540 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1541 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1542 return TLO.CombineTo(
1543 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1544 }
1545 }
1546 }
1547 }
1548 }
1549 }
1550
1551 Known |= Known2;
1552 break;
1553 }
1554 case ISD::XOR: {
1555 SDValue Op0 = Op.getOperand(i: 0);
1556 SDValue Op1 = Op.getOperand(i: 1);
1557
1558 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1559 Depth: Depth + 1))
1560 return true;
1561 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1562 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1563 Depth: Depth + 1))
1564 return true;
1565 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1566
1567 // If all of the demanded bits are known zero on one side, return the other.
1568 // These bits cannot contribute to the result of the 'xor'.
1569 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1570 return TLO.CombineTo(O: Op, N: Op0);
1571 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1572 return TLO.CombineTo(O: Op, N: Op1);
1573 // If the operation can be done in a smaller type, do so.
1574 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1575 return true;
1576
1577 // If all of the unknown bits are known to be zero on one side or the other
1578 // turn this into an *inclusive* or.
1579 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1580 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1581 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1582
1583 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1584 if (C) {
1585 // If one side is a constant, and all of the set bits in the constant are
1586 // also known set on the other side, turn this into an AND, as we know
1587 // the bits will be cleared.
1588 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1589 // NB: it is okay if more bits are known than are requested
1590 if (C->getAPIntValue() == Known2.One) {
1591 SDValue ANDC =
1592 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1593 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1594 }
1595
1596 // If the RHS is a constant, see if we can change it. Don't alter a -1
1597 // constant because that's a 'not' op, and that is better for combining
1598 // and codegen.
1599 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1600 // We're flipping all demanded bits. Flip the undemanded bits too.
1601 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1602 return TLO.CombineTo(O: Op, N: New);
1603 }
1604
1605 unsigned Op0Opcode = Op0.getOpcode();
1606 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1607 if (ConstantSDNode *ShiftC =
1608 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1609 // Don't crash on an oversized shift. We can not guarantee that a
1610 // bogus shift has been simplified to undef.
1611 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1612 uint64_t ShiftAmt = ShiftC->getZExtValue();
1613 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1614 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1615 : Ones.lshr(shiftAmt: ShiftAmt);
1616 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1617 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1618 TLI.isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1619 // If the xor constant is a demanded mask, do a 'not' before the
1620 // shift:
1621 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1622 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1623 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1624 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1625 N2: Op0.getOperand(i: 1)));
1626 }
1627 }
1628 }
1629 }
1630 }
1631
1632 // If we can't turn this into a 'not', try to shrink the constant.
1633 if (!C || !C->isAllOnes())
1634 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1635 return true;
1636
1637 // Attempt to avoid multi-use ops if we don't need anything from them.
1638 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1639 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1640 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1641 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1642 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1643 if (DemandedOp0 || DemandedOp1) {
1644 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1645 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1646 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1647 return TLO.CombineTo(O: Op, N: NewOp);
1648 }
1649 }
1650
1651 Known ^= Known2;
1652 break;
1653 }
1654 case ISD::SELECT:
1655 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1656 Known, TLO, Depth: Depth + 1))
1657 return true;
1658 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1659 Known&: Known2, TLO, Depth: Depth + 1))
1660 return true;
1661 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1662 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1663
1664 // If the operands are constants, see if we can simplify them.
1665 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1666 return true;
1667
1668 // Only known if known in both the LHS and RHS.
1669 Known = Known.intersectWith(RHS: Known2);
1670 break;
1671 case ISD::VSELECT:
1672 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1673 Known, TLO, Depth: Depth + 1))
1674 return true;
1675 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1676 Known&: Known2, TLO, Depth: Depth + 1))
1677 return true;
1678 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1679 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1680
1681 // Only known if known in both the LHS and RHS.
1682 Known = Known.intersectWith(RHS: Known2);
1683 break;
1684 case ISD::SELECT_CC:
1685 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1686 Known, TLO, Depth: Depth + 1))
1687 return true;
1688 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1689 Known&: Known2, TLO, Depth: Depth + 1))
1690 return true;
1691 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1692 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1693
1694 // If the operands are constants, see if we can simplify them.
1695 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1696 return true;
1697
1698 // Only known if known in both the LHS and RHS.
1699 Known = Known.intersectWith(RHS: Known2);
1700 break;
1701 case ISD::SETCC: {
1702 SDValue Op0 = Op.getOperand(i: 0);
1703 SDValue Op1 = Op.getOperand(i: 1);
1704 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1705 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1706 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1707 // -1, we may be able to bypass the setcc.
1708 if (DemandedBits.isSignMask() &&
1709 Op0.getScalarValueSizeInBits() == BitWidth &&
1710 getBooleanContents(Type: Op0.getValueType()) ==
1711 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1712 // If we're testing X < 0, then this compare isn't needed - just use X!
1713 // FIXME: We're limiting to integer types here, but this should also work
1714 // if we don't care about FP signed-zero. The use of SETLT with FP means
1715 // that we don't care about NaNs.
1716 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1717 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1718 return TLO.CombineTo(O: Op, N: Op0);
1719
1720 // TODO: Should we check for other forms of sign-bit comparisons?
1721 // Examples: X <= -1, X >= 0
1722 }
1723 if (getBooleanContents(Type: Op0.getValueType()) ==
1724 TargetLowering::ZeroOrOneBooleanContent &&
1725 BitWidth > 1)
1726 Known.Zero.setBitsFrom(1);
1727 break;
1728 }
1729 case ISD::SHL: {
1730 SDValue Op0 = Op.getOperand(i: 0);
1731 SDValue Op1 = Op.getOperand(i: 1);
1732 EVT ShiftVT = Op1.getValueType();
1733
1734 if (const APInt *SA =
1735 TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1736 unsigned ShAmt = SA->getZExtValue();
1737 if (ShAmt == 0)
1738 return TLO.CombineTo(O: Op, N: Op0);
1739
1740 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1741 // single shift. We can do this if the bottom bits (which are shifted
1742 // out) are never demanded.
1743 // TODO - support non-uniform vector amounts.
1744 if (Op0.getOpcode() == ISD::SRL) {
1745 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1746 if (const APInt *SA2 =
1747 TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1748 unsigned C1 = SA2->getZExtValue();
1749 unsigned Opc = ISD::SHL;
1750 int Diff = ShAmt - C1;
1751 if (Diff < 0) {
1752 Diff = -Diff;
1753 Opc = ISD::SRL;
1754 }
1755 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1756 return TLO.CombineTo(
1757 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1758 }
1759 }
1760 }
1761
1762 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1763 // are not demanded. This will likely allow the anyext to be folded away.
1764 // TODO - support non-uniform vector amounts.
1765 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1766 SDValue InnerOp = Op0.getOperand(i: 0);
1767 EVT InnerVT = InnerOp.getValueType();
1768 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1769 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1770 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1771 SDValue NarrowShl = TLO.DAG.getNode(
1772 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1773 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1774 return TLO.CombineTo(
1775 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1776 }
1777
1778 // Repeat the SHL optimization above in cases where an extension
1779 // intervenes: (shl (anyext (shr x, c1)), c2) to
1780 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1781 // aren't demanded (as above) and that the shifted upper c1 bits of
1782 // x aren't demanded.
1783 // TODO - support non-uniform vector amounts.
1784 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1785 InnerOp.hasOneUse()) {
1786 if (const APInt *SA2 =
1787 TLO.DAG.getValidShiftAmountConstant(V: InnerOp, DemandedElts)) {
1788 unsigned InnerShAmt = SA2->getZExtValue();
1789 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1790 DemandedBits.getActiveBits() <=
1791 (InnerBits - InnerShAmt + ShAmt) &&
1792 DemandedBits.countr_zero() >= ShAmt) {
1793 SDValue NewSA =
1794 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1795 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1796 Operand: InnerOp.getOperand(i: 0));
1797 return TLO.CombineTo(
1798 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1799 }
1800 }
1801 }
1802 }
1803
1804 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1805 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1806 Depth: Depth + 1)) {
1807 SDNodeFlags Flags = Op.getNode()->getFlags();
1808 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1809 // Disable the nsw and nuw flags. We can no longer guarantee that we
1810 // won't wrap after simplification.
1811 Flags.setNoSignedWrap(false);
1812 Flags.setNoUnsignedWrap(false);
1813 Op->setFlags(Flags);
1814 }
1815 return true;
1816 }
1817 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1818 Known.Zero <<= ShAmt;
1819 Known.One <<= ShAmt;
1820 // low bits known zero.
1821 Known.Zero.setLowBits(ShAmt);
1822
1823 // Attempt to avoid multi-use ops if we don't need anything from them.
1824 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1825 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1826 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1827 if (DemandedOp0) {
1828 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1829 return TLO.CombineTo(O: Op, N: NewOp);
1830 }
1831 }
1832
1833 // Try shrinking the operation as long as the shift amount will still be
1834 // in range.
1835 if ((ShAmt < DemandedBits.getActiveBits()) &&
1836 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1837 return true;
1838
1839 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1840 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1841 // Only do this if we demand the upper half so the knownbits are correct.
1842 unsigned HalfWidth = BitWidth / 2;
1843 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1844 DemandedBits.countLeadingOnes() >= HalfWidth) {
1845 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1846 if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1847 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1848 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1849 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1850 // If we're demanding the upper bits at all, we must ensure
1851 // that the upper bits of the shift result are known to be zero,
1852 // which is equivalent to the narrow shift being NUW.
1853 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1854 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1855 SDNodeFlags Flags;
1856 Flags.setNoSignedWrap(IsNSW);
1857 Flags.setNoUnsignedWrap(IsNUW);
1858 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1859 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1860 Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1861 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1862 N2: NewShiftAmt, Flags);
1863 SDValue NewExt =
1864 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1865 return TLO.CombineTo(O: Op, N: NewExt);
1866 }
1867 }
1868 }
1869 } else {
1870 // This is a variable shift, so we can't shift the demand mask by a known
1871 // amount. But if we are not demanding high bits, then we are not
1872 // demanding those bits from the pre-shifted operand either.
1873 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1874 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1875 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1876 Depth: Depth + 1)) {
1877 SDNodeFlags Flags = Op.getNode()->getFlags();
1878 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1879 // Disable the nsw and nuw flags. We can no longer guarantee that we
1880 // won't wrap after simplification.
1881 Flags.setNoSignedWrap(false);
1882 Flags.setNoUnsignedWrap(false);
1883 Op->setFlags(Flags);
1884 }
1885 return true;
1886 }
1887 Known.resetAll();
1888 }
1889 }
1890
1891 // If we are only demanding sign bits then we can use the shift source
1892 // directly.
1893 if (const APInt *MaxSA =
1894 TLO.DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
1895 unsigned ShAmt = MaxSA->getZExtValue();
1896 unsigned NumSignBits =
1897 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1898 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1899 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1900 return TLO.CombineTo(O: Op, N: Op0);
1901 }
1902 break;
1903 }
1904 case ISD::SRL: {
1905 SDValue Op0 = Op.getOperand(i: 0);
1906 SDValue Op1 = Op.getOperand(i: 1);
1907 EVT ShiftVT = Op1.getValueType();
1908
1909 // Try to match AVG patterns.
1910 if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
1911 DemandedElts, Depth: Depth + 1))
1912 return TLO.CombineTo(O: Op, N: AVG);
1913
1914 if (const APInt *SA =
1915 TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1916 unsigned ShAmt = SA->getZExtValue();
1917 if (ShAmt == 0)
1918 return TLO.CombineTo(O: Op, N: Op0);
1919
1920 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1921 // single shift. We can do this if the top bits (which are shifted out)
1922 // are never demanded.
1923 // TODO - support non-uniform vector amounts.
1924 if (Op0.getOpcode() == ISD::SHL) {
1925 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1926 if (const APInt *SA2 =
1927 TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1928 unsigned C1 = SA2->getZExtValue();
1929 unsigned Opc = ISD::SRL;
1930 int Diff = ShAmt - C1;
1931 if (Diff < 0) {
1932 Diff = -Diff;
1933 Opc = ISD::SHL;
1934 }
1935 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1936 return TLO.CombineTo(
1937 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1938 }
1939 }
1940 }
1941
1942 APInt InDemandedMask = (DemandedBits << ShAmt);
1943
1944 // If the shift is exact, then it does demand the low bits (and knows that
1945 // they are zero).
1946 if (Op->getFlags().hasExact())
1947 InDemandedMask.setLowBits(ShAmt);
1948
1949 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1950 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1951 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1952 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
1953 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
1954 if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1955 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
1956 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1957 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
1958 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1959 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
1960 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1961 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1962 Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1963 SDValue NewShift =
1964 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
1965 return TLO.CombineTo(
1966 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
1967 }
1968 }
1969
1970 // Compute the new bits that are at the top now.
1971 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1972 Depth: Depth + 1))
1973 return true;
1974 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1975 Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
1976 Known.One.lshrInPlace(ShiftAmt: ShAmt);
1977 // High bits known zero.
1978 Known.Zero.setHighBits(ShAmt);
1979
1980 // Attempt to avoid multi-use ops if we don't need anything from them.
1981 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1982 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1983 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1984 if (DemandedOp0) {
1985 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1986 return TLO.CombineTo(O: Op, N: NewOp);
1987 }
1988 }
1989 } else {
1990 // Use generic knownbits computation as it has support for non-uniform
1991 // shift amounts.
1992 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1993 }
1994 break;
1995 }
1996 case ISD::SRA: {
1997 SDValue Op0 = Op.getOperand(i: 0);
1998 SDValue Op1 = Op.getOperand(i: 1);
1999 EVT ShiftVT = Op1.getValueType();
2000
2001 // If we only want bits that already match the signbit then we don't need
2002 // to shift.
2003 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2004 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
2005 NumHiDemandedBits)
2006 return TLO.CombineTo(O: Op, N: Op0);
2007
2008 // If this is an arithmetic shift right and only the low-bit is set, we can
2009 // always convert this into a logical shr, even if the shift amount is
2010 // variable. The low bit of the shift cannot be an input sign bit unless
2011 // the shift amount is >= the size of the datatype, which is undefined.
2012 if (DemandedBits.isOne())
2013 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2014
2015 // Try to match AVG patterns.
2016 if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
2017 DemandedElts, Depth: Depth + 1))
2018 return TLO.CombineTo(O: Op, N: AVG);
2019
2020 if (const APInt *SA =
2021 TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
2022 unsigned ShAmt = SA->getZExtValue();
2023 if (ShAmt == 0)
2024 return TLO.CombineTo(O: Op, N: Op0);
2025
2026 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2027 // supports sext_inreg.
2028 if (Op0.getOpcode() == ISD::SHL) {
2029 if (const APInt *InnerSA =
2030 TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
2031 unsigned LowBits = BitWidth - ShAmt;
2032 EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2033 if (VT.isVector())
2034 ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2035 EC: VT.getVectorElementCount());
2036
2037 if (*InnerSA == ShAmt) {
2038 if (!TLO.LegalOperations() ||
2039 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2040 return TLO.CombineTo(
2041 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2042 N1: Op0.getOperand(i: 0),
2043 N2: TLO.DAG.getValueType(ExtVT)));
2044
2045 // Even if we can't convert to sext_inreg, we might be able to
2046 // remove this shift pair if the input is already sign extended.
2047 unsigned NumSignBits =
2048 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2049 if (NumSignBits > ShAmt)
2050 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2051 }
2052 }
2053 }
2054
2055 APInt InDemandedMask = (DemandedBits << ShAmt);
2056
2057 // If the shift is exact, then it does demand the low bits (and knows that
2058 // they are zero).
2059 if (Op->getFlags().hasExact())
2060 InDemandedMask.setLowBits(ShAmt);
2061
2062 // If any of the demanded bits are produced by the sign extension, we also
2063 // demand the input sign bit.
2064 if (DemandedBits.countl_zero() < ShAmt)
2065 InDemandedMask.setSignBit();
2066
2067 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2068 Depth: Depth + 1))
2069 return true;
2070 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2071 Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2072 Known.One.lshrInPlace(ShiftAmt: ShAmt);
2073
2074 // If the input sign bit is known to be zero, or if none of the top bits
2075 // are demanded, turn this into an unsigned shift right.
2076 if (Known.Zero[BitWidth - ShAmt - 1] ||
2077 DemandedBits.countl_zero() >= ShAmt) {
2078 SDNodeFlags Flags;
2079 Flags.setExact(Op->getFlags().hasExact());
2080 return TLO.CombineTo(
2081 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2082 }
2083
2084 int Log2 = DemandedBits.exactLogBase2();
2085 if (Log2 >= 0) {
2086 // The bit must come from the sign.
2087 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2088 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2089 }
2090
2091 if (Known.One[BitWidth - ShAmt - 1])
2092 // New bits are known one.
2093 Known.One.setHighBits(ShAmt);
2094
2095 // Attempt to avoid multi-use ops if we don't need anything from them.
2096 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2097 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2098 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2099 if (DemandedOp0) {
2100 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2101 return TLO.CombineTo(O: Op, N: NewOp);
2102 }
2103 }
2104 }
2105 break;
2106 }
2107 case ISD::FSHL:
2108 case ISD::FSHR: {
2109 SDValue Op0 = Op.getOperand(i: 0);
2110 SDValue Op1 = Op.getOperand(i: 1);
2111 SDValue Op2 = Op.getOperand(i: 2);
2112 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2113
2114 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2115 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2116
2117 // For fshl, 0-shift returns the 1st arg.
2118 // For fshr, 0-shift returns the 2nd arg.
2119 if (Amt == 0) {
2120 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2121 Known, TLO, Depth: Depth + 1))
2122 return true;
2123 break;
2124 }
2125
2126 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2127 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2128 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2129 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2130 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2131 Depth: Depth + 1))
2132 return true;
2133 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2134 Depth: Depth + 1))
2135 return true;
2136
2137 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2138 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2139 Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2140 Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2141 Known = Known.unionWith(RHS: Known2);
2142
2143 // Attempt to avoid multi-use ops if we don't need anything from them.
2144 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2145 !DemandedElts.isAllOnes()) {
2146 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2147 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2148 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2149 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2150 if (DemandedOp0 || DemandedOp1) {
2151 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2152 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2153 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2154 N2: DemandedOp1, N3: Op2);
2155 return TLO.CombineTo(O: Op, N: NewOp);
2156 }
2157 }
2158 }
2159
2160 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2161 if (isPowerOf2_32(Value: BitWidth)) {
2162 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2163 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2164 Known&: Known2, TLO, Depth: Depth + 1))
2165 return true;
2166 }
2167 break;
2168 }
2169 case ISD::ROTL:
2170 case ISD::ROTR: {
2171 SDValue Op0 = Op.getOperand(i: 0);
2172 SDValue Op1 = Op.getOperand(i: 1);
2173 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2174
2175 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2176 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2177 return TLO.CombineTo(O: Op, N: Op0);
2178
2179 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2180 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2181 unsigned RevAmt = BitWidth - Amt;
2182
2183 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2184 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2185 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2186 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2187 Depth: Depth + 1))
2188 return true;
2189
2190 // rot*(x, 0) --> x
2191 if (Amt == 0)
2192 return TLO.CombineTo(O: Op, N: Op0);
2193
2194 // See if we don't demand either half of the rotated bits.
2195 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2196 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2197 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2198 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2199 }
2200 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2201 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2202 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2203 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2204 }
2205 }
2206
2207 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2208 if (isPowerOf2_32(Value: BitWidth)) {
2209 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2210 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2211 Depth: Depth + 1))
2212 return true;
2213 }
2214 break;
2215 }
2216 case ISD::SMIN:
2217 case ISD::SMAX:
2218 case ISD::UMIN:
2219 case ISD::UMAX: {
2220 unsigned Opc = Op.getOpcode();
2221 SDValue Op0 = Op.getOperand(i: 0);
2222 SDValue Op1 = Op.getOperand(i: 1);
2223
2224 // If we're only demanding signbits, then we can simplify to OR/AND node.
2225 unsigned BitOp =
2226 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2227 unsigned NumSignBits =
2228 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2229 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2230 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2231 if (NumSignBits >= NumDemandedUpperBits)
2232 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2233
2234 // Check if one arg is always less/greater than (or equal) to the other arg.
2235 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2236 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2237 switch (Opc) {
2238 case ISD::SMIN:
2239 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2240 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2241 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2242 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2243 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2244 break;
2245 case ISD::SMAX:
2246 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2247 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2248 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2249 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2250 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2251 break;
2252 case ISD::UMIN:
2253 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2254 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2255 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2256 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2257 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2258 break;
2259 case ISD::UMAX:
2260 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2261 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2262 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2263 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2264 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2265 break;
2266 }
2267 break;
2268 }
2269 case ISD::BITREVERSE: {
2270 SDValue Src = Op.getOperand(i: 0);
2271 APInt DemandedSrcBits = DemandedBits.reverseBits();
2272 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2273 Depth: Depth + 1))
2274 return true;
2275 Known.One = Known2.One.reverseBits();
2276 Known.Zero = Known2.Zero.reverseBits();
2277 break;
2278 }
2279 case ISD::BSWAP: {
2280 SDValue Src = Op.getOperand(i: 0);
2281
2282 // If the only bits demanded come from one byte of the bswap result,
2283 // just shift the input byte into position to eliminate the bswap.
2284 unsigned NLZ = DemandedBits.countl_zero();
2285 unsigned NTZ = DemandedBits.countr_zero();
2286
2287 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2288 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2289 // have 14 leading zeros, round to 8.
2290 NLZ = alignDown(Value: NLZ, Align: 8);
2291 NTZ = alignDown(Value: NTZ, Align: 8);
2292 // If we need exactly one byte, we can do this transformation.
2293 if (BitWidth - NLZ - NTZ == 8) {
2294 // Replace this with either a left or right shift to get the byte into
2295 // the right place.
2296 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2297 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2298 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2299 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2300 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2301 return TLO.CombineTo(O: Op, N: NewOp);
2302 }
2303 }
2304
2305 APInt DemandedSrcBits = DemandedBits.byteSwap();
2306 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2307 Depth: Depth + 1))
2308 return true;
2309 Known.One = Known2.One.byteSwap();
2310 Known.Zero = Known2.Zero.byteSwap();
2311 break;
2312 }
2313 case ISD::CTPOP: {
2314 // If only 1 bit is demanded, replace with PARITY as long as we're before
2315 // op legalization.
2316 // FIXME: Limit to scalars for now.
2317 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2318 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2319 Operand: Op.getOperand(i: 0)));
2320
2321 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2322 break;
2323 }
2324 case ISD::SIGN_EXTEND_INREG: {
2325 SDValue Op0 = Op.getOperand(i: 0);
2326 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2327 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2328
2329 // If we only care about the highest bit, don't bother shifting right.
2330 if (DemandedBits.isSignMask()) {
2331 unsigned MinSignedBits =
2332 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2333 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2334 // However if the input is already sign extended we expect the sign
2335 // extension to be dropped altogether later and do not simplify.
2336 if (!AlreadySignExtended) {
2337 // Compute the correct shift amount type, which must be getShiftAmountTy
2338 // for scalar types after legalization.
2339 SDValue ShiftAmt =
2340 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2341 return TLO.CombineTo(O: Op,
2342 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2343 }
2344 }
2345
2346 // If none of the extended bits are demanded, eliminate the sextinreg.
2347 if (DemandedBits.getActiveBits() <= ExVTBits)
2348 return TLO.CombineTo(O: Op, N: Op0);
2349
2350 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2351
2352 // Since the sign extended bits are demanded, we know that the sign
2353 // bit is demanded.
2354 InputDemandedBits.setBit(ExVTBits - 1);
2355
2356 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2357 Depth: Depth + 1))
2358 return true;
2359 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2360
2361 // If the sign bit of the input is known set or clear, then we know the
2362 // top bits of the result.
2363
2364 // If the input sign bit is known zero, convert this into a zero extension.
2365 if (Known.Zero[ExVTBits - 1])
2366 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2367
2368 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2369 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2370 Known.One.setBitsFrom(ExVTBits);
2371 Known.Zero &= Mask;
2372 } else { // Input sign bit unknown
2373 Known.Zero &= Mask;
2374 Known.One &= Mask;
2375 }
2376 break;
2377 }
2378 case ISD::BUILD_PAIR: {
2379 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2380 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2381
2382 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2383 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2384
2385 KnownBits KnownLo, KnownHi;
2386
2387 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2388 return true;
2389
2390 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2391 return true;
2392
2393 Known = KnownHi.concat(Lo: KnownLo);
2394 break;
2395 }
2396 case ISD::ZERO_EXTEND_VECTOR_INREG:
2397 if (VT.isScalableVector())
2398 return false;
2399 [[fallthrough]];
2400 case ISD::ZERO_EXTEND: {
2401 SDValue Src = Op.getOperand(i: 0);
2402 EVT SrcVT = Src.getValueType();
2403 unsigned InBits = SrcVT.getScalarSizeInBits();
2404 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2405 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2406
2407 // If none of the top bits are demanded, convert this into an any_extend.
2408 if (DemandedBits.getActiveBits() <= InBits) {
2409 // If we only need the non-extended bits of the bottom element
2410 // then we can just bitcast to the result.
2411 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2412 VT.getSizeInBits() == SrcVT.getSizeInBits())
2413 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2414
2415 unsigned Opc =
2416 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2417 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2418 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2419 }
2420
2421 SDNodeFlags Flags = Op->getFlags();
2422 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2423 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2424 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2425 Depth: Depth + 1)) {
2426 if (Flags.hasNonNeg()) {
2427 Flags.setNonNeg(false);
2428 Op->setFlags(Flags);
2429 }
2430 return true;
2431 }
2432 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2433 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2434 Known = Known.zext(BitWidth);
2435
2436 // Attempt to avoid multi-use ops if we don't need anything from them.
2437 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2438 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2439 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2440 break;
2441 }
2442 case ISD::SIGN_EXTEND_VECTOR_INREG:
2443 if (VT.isScalableVector())
2444 return false;
2445 [[fallthrough]];
2446 case ISD::SIGN_EXTEND: {
2447 SDValue Src = Op.getOperand(i: 0);
2448 EVT SrcVT = Src.getValueType();
2449 unsigned InBits = SrcVT.getScalarSizeInBits();
2450 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2451 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2452
2453 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2454 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2455
2456 // Since some of the sign extended bits are demanded, we know that the sign
2457 // bit is demanded.
2458 InDemandedBits.setBit(InBits - 1);
2459
2460 // If none of the top bits are demanded, convert this into an any_extend.
2461 if (DemandedBits.getActiveBits() <= InBits) {
2462 // If we only need the non-extended bits of the bottom element
2463 // then we can just bitcast to the result.
2464 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2465 VT.getSizeInBits() == SrcVT.getSizeInBits())
2466 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2467
2468 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2469 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2470 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2471 InBits) {
2472 unsigned Opc =
2473 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2474 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2475 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2476 }
2477 }
2478
2479 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2480 Depth: Depth + 1))
2481 return true;
2482 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2483 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2484
2485 // If the sign bit is known one, the top bits match.
2486 Known = Known.sext(BitWidth);
2487
2488 // If the sign bit is known zero, convert this to a zero extend.
2489 if (Known.isNonNegative()) {
2490 unsigned Opc =
2491 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2492 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2493 SDNodeFlags Flags;
2494 if (!IsVecInReg)
2495 Flags.setNonNeg(true);
2496 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2497 }
2498 }
2499
2500 // Attempt to avoid multi-use ops if we don't need anything from them.
2501 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2503 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2504 break;
2505 }
2506 case ISD::ANY_EXTEND_VECTOR_INREG:
2507 if (VT.isScalableVector())
2508 return false;
2509 [[fallthrough]];
2510 case ISD::ANY_EXTEND: {
2511 SDValue Src = Op.getOperand(i: 0);
2512 EVT SrcVT = Src.getValueType();
2513 unsigned InBits = SrcVT.getScalarSizeInBits();
2514 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2515 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2516
2517 // If we only need the bottom element then we can just bitcast.
2518 // TODO: Handle ANY_EXTEND?
2519 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2520 VT.getSizeInBits() == SrcVT.getSizeInBits())
2521 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2522
2523 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2524 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2525 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2526 Depth: Depth + 1))
2527 return true;
2528 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2529 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2530 Known = Known.anyext(BitWidth);
2531
2532 // Attempt to avoid multi-use ops if we don't need anything from them.
2533 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2534 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2535 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2536 break;
2537 }
2538 case ISD::TRUNCATE: {
2539 SDValue Src = Op.getOperand(i: 0);
2540
2541 // Simplify the input, using demanded bit information, and compute the known
2542 // zero/one bits live out.
2543 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2544 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2545 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2546 Depth: Depth + 1))
2547 return true;
2548 Known = Known.trunc(BitWidth);
2549
2550 // Attempt to avoid multi-use ops if we don't need anything from them.
2551 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2552 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2553 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2554
2555 // If the input is only used by this truncate, see if we can shrink it based
2556 // on the known demanded bits.
2557 switch (Src.getOpcode()) {
2558 default:
2559 break;
2560 case ISD::SRL:
2561 // Shrink SRL by a constant if none of the high bits shifted in are
2562 // demanded.
2563 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2564 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2565 // undesirable.
2566 break;
2567
2568 if (Src.getNode()->hasOneUse()) {
2569 const APInt *ShAmtC =
2570 TLO.DAG.getValidShiftAmountConstant(V: Src, DemandedElts);
2571 if (!ShAmtC || ShAmtC->uge(RHS: BitWidth))
2572 break;
2573 uint64_t ShVal = ShAmtC->getZExtValue();
2574
2575 APInt HighBits =
2576 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2577 HighBits.lshrInPlace(ShiftAmt: ShVal);
2578 HighBits = HighBits.trunc(width: BitWidth);
2579
2580 if (!(HighBits & DemandedBits)) {
2581 // None of the shifted in bits are needed. Add a truncate of the
2582 // shift input, then shift it.
2583 SDValue NewShAmt =
2584 TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl, LegalTypes: TLO.LegalTypes());
2585 SDValue NewTrunc =
2586 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2587 return TLO.CombineTo(
2588 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2589 }
2590 }
2591 break;
2592 }
2593
2594 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2595 break;
2596 }
2597 case ISD::AssertZext: {
2598 // AssertZext demands all of the high bits, plus any of the low bits
2599 // demanded by its users.
2600 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2601 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2602 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2603 TLO, Depth: Depth + 1))
2604 return true;
2605 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2606
2607 Known.Zero |= ~InMask;
2608 Known.One &= (~Known.Zero);
2609 break;
2610 }
2611 case ISD::EXTRACT_VECTOR_ELT: {
2612 SDValue Src = Op.getOperand(i: 0);
2613 SDValue Idx = Op.getOperand(i: 1);
2614 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2615 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2616
2617 if (SrcEltCnt.isScalable())
2618 return false;
2619
2620 // Demand the bits from every vector element without a constant index.
2621 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2622 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2623 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2624 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2625 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2626
2627 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2628 // anything about the extended bits.
2629 APInt DemandedSrcBits = DemandedBits;
2630 if (BitWidth > EltBitWidth)
2631 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2632
2633 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2634 Depth: Depth + 1))
2635 return true;
2636
2637 // Attempt to avoid multi-use ops if we don't need anything from them.
2638 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2639 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2640 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2641 SDValue NewOp =
2642 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2643 return TLO.CombineTo(O: Op, N: NewOp);
2644 }
2645 }
2646
2647 Known = Known2;
2648 if (BitWidth > EltBitWidth)
2649 Known = Known.anyext(BitWidth);
2650 break;
2651 }
2652 case ISD::BITCAST: {
2653 if (VT.isScalableVector())
2654 return false;
2655 SDValue Src = Op.getOperand(i: 0);
2656 EVT SrcVT = Src.getValueType();
2657 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2658
2659 // If this is an FP->Int bitcast and if the sign bit is the only
2660 // thing demanded, turn this into a FGETSIGN.
2661 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2662 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2663 SrcVT.isFloatingPoint()) {
2664 bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2665 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2666 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2667 SrcVT != MVT::f128) {
2668 // Cannot eliminate/lower SHL for f128 yet.
2669 EVT Ty = OpVTLegal ? VT : MVT::i32;
2670 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2671 // place. We expect the SHL to be eliminated by other optimizations.
2672 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2673 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2674 if (!OpVTLegal && OpVTSizeInBits > 32)
2675 Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2676 unsigned ShVal = Op.getValueSizeInBits() - 1;
2677 SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2678 return TLO.CombineTo(O: Op,
2679 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2680 }
2681 }
2682
2683 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2684 // Demand the elt/bit if any of the original elts/bits are demanded.
2685 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2686 unsigned Scale = BitWidth / NumSrcEltBits;
2687 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2688 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2689 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2690 for (unsigned i = 0; i != Scale; ++i) {
2691 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2692 unsigned BitOffset = EltOffset * NumSrcEltBits;
2693 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2694 if (!Sub.isZero()) {
2695 DemandedSrcBits |= Sub;
2696 for (unsigned j = 0; j != NumElts; ++j)
2697 if (DemandedElts[j])
2698 DemandedSrcElts.setBit((j * Scale) + i);
2699 }
2700 }
2701
2702 APInt KnownSrcUndef, KnownSrcZero;
2703 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2704 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2705 return true;
2706
2707 KnownBits KnownSrcBits;
2708 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2709 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2710 return true;
2711 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2712 // TODO - bigendian once we have test coverage.
2713 unsigned Scale = NumSrcEltBits / BitWidth;
2714 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2715 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2716 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2717 for (unsigned i = 0; i != NumElts; ++i)
2718 if (DemandedElts[i]) {
2719 unsigned Offset = (i % Scale) * BitWidth;
2720 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2721 DemandedSrcElts.setBit(i / Scale);
2722 }
2723
2724 if (SrcVT.isVector()) {
2725 APInt KnownSrcUndef, KnownSrcZero;
2726 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2727 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2728 return true;
2729 }
2730
2731 KnownBits KnownSrcBits;
2732 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2733 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2734 return true;
2735
2736 // Attempt to avoid multi-use ops if we don't need anything from them.
2737 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2738 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2739 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2740 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2741 return TLO.CombineTo(O: Op, N: NewOp);
2742 }
2743 }
2744 }
2745
2746 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2747 // recursive call where Known may be useful to the caller.
2748 if (Depth > 0) {
2749 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2750 return false;
2751 }
2752 break;
2753 }
2754 case ISD::MUL:
2755 if (DemandedBits.isPowerOf2()) {
2756 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2757 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2758 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2759 unsigned CTZ = DemandedBits.countr_zero();
2760 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2761 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2762 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2763 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2764 return TLO.CombineTo(O: Op, N: Shl);
2765 }
2766 }
2767 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2768 // X * X is odd iff X is odd.
2769 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2770 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2771 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2772 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2773 return TLO.CombineTo(O: Op, N: And1);
2774 }
2775 [[fallthrough]];
2776 case ISD::ADD:
2777 case ISD::SUB: {
2778 // Add, Sub, and Mul don't demand any bits in positions beyond that
2779 // of the highest bit demanded of them.
2780 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2781 SDNodeFlags Flags = Op.getNode()->getFlags();
2782 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2783 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2784 KnownBits KnownOp0, KnownOp1;
2785 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO,
2786 Depth: Depth + 1) ||
2787 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2788 Depth: Depth + 1) ||
2789 // See if the operation should be performed at a smaller bit width.
2790 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2791 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2792 // Disable the nsw and nuw flags. We can no longer guarantee that we
2793 // won't wrap after simplification.
2794 Flags.setNoSignedWrap(false);
2795 Flags.setNoUnsignedWrap(false);
2796 Op->setFlags(Flags);
2797 }
2798 return true;
2799 }
2800
2801 // neg x with only low bit demanded is simply x.
2802 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2803 isNullConstant(V: Op0))
2804 return TLO.CombineTo(O: Op, N: Op1);
2805
2806 // Attempt to avoid multi-use ops if we don't need anything from them.
2807 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2808 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2809 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2810 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2811 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2812 if (DemandedOp0 || DemandedOp1) {
2813 Flags.setNoSignedWrap(false);
2814 Flags.setNoUnsignedWrap(false);
2815 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2816 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2817 SDValue NewOp =
2818 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1, Flags);
2819 return TLO.CombineTo(O: Op, N: NewOp);
2820 }
2821 }
2822
2823 // If we have a constant operand, we may be able to turn it into -1 if we
2824 // do not demand the high bits. This can make the constant smaller to
2825 // encode, allow more general folding, or match specialized instruction
2826 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2827 // is probably not useful (and could be detrimental).
2828 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2829 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2830 if (C && !C->isAllOnes() && !C->isOne() &&
2831 (C->getAPIntValue() | HighMask).isAllOnes()) {
2832 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2833 // Disable the nsw and nuw flags. We can no longer guarantee that we
2834 // won't wrap after simplification.
2835 Flags.setNoSignedWrap(false);
2836 Flags.setNoUnsignedWrap(false);
2837 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1, Flags);
2838 return TLO.CombineTo(O: Op, N: NewOp);
2839 }
2840
2841 // Match a multiply with a disguised negated-power-of-2 and convert to a
2842 // an equivalent shift-left amount.
2843 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2844 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2845 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2846 return 0;
2847
2848 // Don't touch opaque constants. Also, ignore zero and power-of-2
2849 // multiplies. Those will get folded later.
2850 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
2851 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2852 !MulC->getAPIntValue().isPowerOf2()) {
2853 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2854 if (UnmaskedC.isNegatedPowerOf2())
2855 return (-UnmaskedC).logBase2();
2856 }
2857 return 0;
2858 };
2859
2860 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2861 unsigned ShlAmt) {
2862 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2863 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2864 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2865 return TLO.CombineTo(O: Op, N: Res);
2866 };
2867
2868 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2869 if (Op.getOpcode() == ISD::ADD) {
2870 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2871 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2872 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
2873 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2874 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2875 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
2876 }
2877 if (Op.getOpcode() == ISD::SUB) {
2878 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2879 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2880 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
2881 }
2882 }
2883
2884 if (Op.getOpcode() == ISD::MUL) {
2885 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2886 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2887 Known = KnownBits::computeForAddSub(
2888 Add: Op.getOpcode() == ISD::ADD, NSW: Flags.hasNoSignedWrap(),
2889 NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
2890 }
2891 break;
2892 }
2893 default:
2894 // We also ask the target about intrinsics (which could be specific to it).
2895 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2896 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2897 // TODO: Probably okay to remove after audit; here to reduce change size
2898 // in initial enablement patch for scalable vectors
2899 if (Op.getValueType().isScalableVector())
2900 break;
2901 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2902 Known, TLO, Depth))
2903 return true;
2904 break;
2905 }
2906
2907 // Just use computeKnownBits to compute output bits.
2908 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2909 break;
2910 }
2911
2912 // If we know the value of all of the demanded bits, return this as a
2913 // constant.
2914 if (!isTargetCanonicalConstantNode(Op) &&
2915 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
2916 // Avoid folding to a constant if any OpaqueConstant is involved.
2917 const SDNode *N = Op.getNode();
2918 for (SDNode *Op :
2919 llvm::make_range(x: SDNodeIterator::begin(N), y: SDNodeIterator::end(N))) {
2920 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op))
2921 if (C->isOpaque())
2922 return false;
2923 }
2924 if (VT.isInteger())
2925 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
2926 if (VT.isFloatingPoint())
2927 return TLO.CombineTo(
2928 O: Op,
2929 N: TLO.DAG.getConstantFP(
2930 Val: APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), DL: dl, VT));
2931 }
2932
2933 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2934 // Try again just for the original demanded elts.
2935 // Ensure we do this AFTER constant folding above.
2936 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2937 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
2938
2939 return false;
2940}
2941
2942bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2943 const APInt &DemandedElts,
2944 DAGCombinerInfo &DCI) const {
2945 SelectionDAG &DAG = DCI.DAG;
2946 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2947 !DCI.isBeforeLegalizeOps());
2948
2949 APInt KnownUndef, KnownZero;
2950 bool Simplified =
2951 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
2952 if (Simplified) {
2953 DCI.AddToWorklist(N: Op.getNode());
2954 DCI.CommitTargetLoweringOpt(TLO);
2955 }
2956
2957 return Simplified;
2958}
2959
2960/// Given a vector binary operation and known undefined elements for each input
2961/// operand, compute whether each element of the output is undefined.
2962static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2963 const APInt &UndefOp0,
2964 const APInt &UndefOp1) {
2965 EVT VT = BO.getValueType();
2966 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2967 "Vector binop only");
2968
2969 EVT EltVT = VT.getVectorElementType();
2970 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2971 assert(UndefOp0.getBitWidth() == NumElts &&
2972 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2973
2974 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2975 const APInt &UndefVals) {
2976 if (UndefVals[Index])
2977 return DAG.getUNDEF(VT: EltVT);
2978
2979 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
2980 // Try hard to make sure that the getNode() call is not creating temporary
2981 // nodes. Ignore opaque integers because they do not constant fold.
2982 SDValue Elt = BV->getOperand(Num: Index);
2983 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
2984 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2985 return Elt;
2986 }
2987
2988 return SDValue();
2989 };
2990
2991 APInt KnownUndef = APInt::getZero(numBits: NumElts);
2992 for (unsigned i = 0; i != NumElts; ++i) {
2993 // If both inputs for this element are either constant or undef and match
2994 // the element type, compute the constant/undef result for this element of
2995 // the vector.
2996 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2997 // not handle FP constants. The code within getNode() should be refactored
2998 // to avoid the danger of creating a bogus temporary node here.
2999 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
3000 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
3001 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3002 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3003 KnownUndef.setBit(i);
3004 }
3005 return KnownUndef;
3006}
3007
3008bool TargetLowering::SimplifyDemandedVectorElts(
3009 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3010 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3011 bool AssumeSingleUse) const {
3012 EVT VT = Op.getValueType();
3013 unsigned Opcode = Op.getOpcode();
3014 APInt DemandedElts = OriginalDemandedElts;
3015 unsigned NumElts = DemandedElts.getBitWidth();
3016 assert(VT.isVector() && "Expected vector op");
3017
3018 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3019
3020 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3021 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3022 return false;
3023
3024 // TODO: For now we assume we know nothing about scalable vectors.
3025 if (VT.isScalableVector())
3026 return false;
3027
3028 assert(VT.getVectorNumElements() == NumElts &&
3029 "Mask size mismatches value type element count!");
3030
3031 // Undef operand.
3032 if (Op.isUndef()) {
3033 KnownUndef.setAllBits();
3034 return false;
3035 }
3036
3037 // If Op has other users, assume that all elements are needed.
3038 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3039 DemandedElts.setAllBits();
3040
3041 // Not demanding any elements from Op.
3042 if (DemandedElts == 0) {
3043 KnownUndef.setAllBits();
3044 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3045 }
3046
3047 // Limit search depth.
3048 if (Depth >= SelectionDAG::MaxRecursionDepth)
3049 return false;
3050
3051 SDLoc DL(Op);
3052 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3053 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3054
3055 // Helper for demanding the specified elements and all the bits of both binary
3056 // operands.
3057 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3058 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3059 DAG&: TLO.DAG, Depth: Depth + 1);
3060 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3061 DAG&: TLO.DAG, Depth: Depth + 1);
3062 if (NewOp0 || NewOp1) {
3063 SDValue NewOp =
3064 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3065 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3066 return TLO.CombineTo(O: Op, N: NewOp);
3067 }
3068 return false;
3069 };
3070
3071 switch (Opcode) {
3072 case ISD::SCALAR_TO_VECTOR: {
3073 if (!DemandedElts[0]) {
3074 KnownUndef.setAllBits();
3075 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3076 }
3077 SDValue ScalarSrc = Op.getOperand(i: 0);
3078 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3079 SDValue Src = ScalarSrc.getOperand(i: 0);
3080 SDValue Idx = ScalarSrc.getOperand(i: 1);
3081 EVT SrcVT = Src.getValueType();
3082
3083 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3084
3085 if (SrcEltCnt.isScalable())
3086 return false;
3087
3088 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3089 if (isNullConstant(V: Idx)) {
3090 APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: 0);
3091 APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3092 APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3093 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3094 TLO, Depth: Depth + 1))
3095 return true;
3096 }
3097 }
3098 KnownUndef.setHighBits(NumElts - 1);
3099 break;
3100 }
3101 case ISD::BITCAST: {
3102 SDValue Src = Op.getOperand(i: 0);
3103 EVT SrcVT = Src.getValueType();
3104
3105 // We only handle vectors here.
3106 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3107 if (!SrcVT.isVector())
3108 break;
3109
3110 // Fast handling of 'identity' bitcasts.
3111 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3112 if (NumSrcElts == NumElts)
3113 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3114 KnownZero, TLO, Depth: Depth + 1);
3115
3116 APInt SrcDemandedElts, SrcZero, SrcUndef;
3117
3118 // Bitcast from 'large element' src vector to 'small element' vector, we
3119 // must demand a source element if any DemandedElt maps to it.
3120 if ((NumElts % NumSrcElts) == 0) {
3121 unsigned Scale = NumElts / NumSrcElts;
3122 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3123 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3124 TLO, Depth: Depth + 1))
3125 return true;
3126
3127 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3128 // of the large element.
3129 // TODO - bigendian once we have test coverage.
3130 if (IsLE) {
3131 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3132 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3133 for (unsigned i = 0; i != NumElts; ++i)
3134 if (DemandedElts[i]) {
3135 unsigned Ofs = (i % Scale) * EltSizeInBits;
3136 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3137 }
3138
3139 KnownBits Known;
3140 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3141 TLO, Depth: Depth + 1))
3142 return true;
3143
3144 // The bitcast has split each wide element into a number of
3145 // narrow subelements. We have just computed the Known bits
3146 // for wide elements. See if element splitting results in
3147 // some subelements being zero. Only for demanded elements!
3148 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3149 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3150 .isAllOnes())
3151 continue;
3152 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3153 unsigned Elt = Scale * SrcElt + SubElt;
3154 if (DemandedElts[Elt])
3155 KnownZero.setBit(Elt);
3156 }
3157 }
3158 }
3159
3160 // If the src element is zero/undef then all the output elements will be -
3161 // only demanded elements are guaranteed to be correct.
3162 for (unsigned i = 0; i != NumSrcElts; ++i) {
3163 if (SrcDemandedElts[i]) {
3164 if (SrcZero[i])
3165 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3166 if (SrcUndef[i])
3167 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3168 }
3169 }
3170 }
3171
3172 // Bitcast from 'small element' src vector to 'large element' vector, we
3173 // demand all smaller source elements covered by the larger demanded element
3174 // of this vector.
3175 if ((NumSrcElts % NumElts) == 0) {
3176 unsigned Scale = NumSrcElts / NumElts;
3177 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3178 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3179 TLO, Depth: Depth + 1))
3180 return true;
3181
3182 // If all the src elements covering an output element are zero/undef, then
3183 // the output element will be as well, assuming it was demanded.
3184 for (unsigned i = 0; i != NumElts; ++i) {
3185 if (DemandedElts[i]) {
3186 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3187 KnownZero.setBit(i);
3188 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3189 KnownUndef.setBit(i);
3190 }
3191 }
3192 }
3193 break;
3194 }
3195 case ISD::FREEZE: {
3196 SDValue N0 = Op.getOperand(i: 0);
3197 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3198 /*PoisonOnly=*/false))
3199 return TLO.CombineTo(O: Op, N: N0);
3200
3201 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3202 // freeze(op(x, ...)) -> op(freeze(x), ...).
3203 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3204 return TLO.CombineTo(
3205 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3206 Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: 0))));
3207 break;
3208 }
3209 case ISD::BUILD_VECTOR: {
3210 // Check all elements and simplify any unused elements with UNDEF.
3211 if (!DemandedElts.isAllOnes()) {
3212 // Don't simplify BROADCASTS.
3213 if (llvm::any_of(Range: Op->op_values(),
3214 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3215 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3216 bool Updated = false;
3217 for (unsigned i = 0; i != NumElts; ++i) {
3218 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3219 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3220 KnownUndef.setBit(i);
3221 Updated = true;
3222 }
3223 }
3224 if (Updated)
3225 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3226 }
3227 }
3228 for (unsigned i = 0; i != NumElts; ++i) {
3229 SDValue SrcOp = Op.getOperand(i);
3230 if (SrcOp.isUndef()) {
3231 KnownUndef.setBit(i);
3232 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3233 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3234 KnownZero.setBit(i);
3235 }
3236 }
3237 break;
3238 }
3239 case ISD::CONCAT_VECTORS: {
3240 EVT SubVT = Op.getOperand(i: 0).getValueType();
3241 unsigned NumSubVecs = Op.getNumOperands();
3242 unsigned NumSubElts = SubVT.getVectorNumElements();
3243 for (unsigned i = 0; i != NumSubVecs; ++i) {
3244 SDValue SubOp = Op.getOperand(i);
3245 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3246 APInt SubUndef, SubZero;
3247 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3248 Depth: Depth + 1))
3249 return true;
3250 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3251 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3252 }
3253
3254 // Attempt to avoid multi-use ops if we don't need anything from them.
3255 if (!DemandedElts.isAllOnes()) {
3256 bool FoundNewSub = false;
3257 SmallVector<SDValue, 2> DemandedSubOps;
3258 for (unsigned i = 0; i != NumSubVecs; ++i) {
3259 SDValue SubOp = Op.getOperand(i);
3260 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3261 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3262 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3263 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3264 FoundNewSub = NewSubOp ? true : FoundNewSub;
3265 }
3266 if (FoundNewSub) {
3267 SDValue NewOp =
3268 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3269 return TLO.CombineTo(O: Op, N: NewOp);
3270 }
3271 }
3272 break;
3273 }
3274 case ISD::INSERT_SUBVECTOR: {
3275 // Demand any elements from the subvector and the remainder from the src its
3276 // inserted into.
3277 SDValue Src = Op.getOperand(i: 0);
3278 SDValue Sub = Op.getOperand(i: 1);
3279 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3280 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3281 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3282 APInt DemandedSrcElts = DemandedElts;
3283 DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
3284
3285 APInt SubUndef, SubZero;
3286 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3287 Depth: Depth + 1))
3288 return true;
3289
3290 // If none of the src operand elements are demanded, replace it with undef.
3291 if (!DemandedSrcElts && !Src.isUndef())
3292 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3293 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3294 N3: Op.getOperand(i: 2)));
3295
3296 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3297 TLO, Depth: Depth + 1))
3298 return true;
3299 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3300 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3301
3302 // Attempt to avoid multi-use ops if we don't need anything from them.
3303 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3304 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3305 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3306 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3307 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3308 if (NewSrc || NewSub) {
3309 NewSrc = NewSrc ? NewSrc : Src;
3310 NewSub = NewSub ? NewSub : Sub;
3311 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3312 N2: NewSub, N3: Op.getOperand(i: 2));
3313 return TLO.CombineTo(O: Op, N: NewOp);
3314 }
3315 }
3316 break;
3317 }
3318 case ISD::EXTRACT_SUBVECTOR: {
3319 // Offset the demanded elts by the subvector index.
3320 SDValue Src = Op.getOperand(i: 0);
3321 if (Src.getValueType().isScalableVector())
3322 break;
3323 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3324 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3325 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3326
3327 APInt SrcUndef, SrcZero;
3328 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3329 Depth: Depth + 1))
3330 return true;
3331 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3332 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3333
3334 // Attempt to avoid multi-use ops if we don't need anything from them.
3335 if (!DemandedElts.isAllOnes()) {
3336 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3337 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3338 if (NewSrc) {
3339 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3340 N2: Op.getOperand(i: 1));
3341 return TLO.CombineTo(O: Op, N: NewOp);
3342 }
3343 }
3344 break;
3345 }
3346 case ISD::INSERT_VECTOR_ELT: {
3347 SDValue Vec = Op.getOperand(i: 0);
3348 SDValue Scl = Op.getOperand(i: 1);
3349 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3350
3351 // For a legal, constant insertion index, if we don't need this insertion
3352 // then strip it, else remove it from the demanded elts.
3353 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3354 unsigned Idx = CIdx->getZExtValue();
3355 if (!DemandedElts[Idx])
3356 return TLO.CombineTo(O: Op, N: Vec);
3357
3358 APInt DemandedVecElts(DemandedElts);
3359 DemandedVecElts.clearBit(BitPosition: Idx);
3360 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3361 KnownZero, TLO, Depth: Depth + 1))
3362 return true;
3363
3364 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3365
3366 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3367 break;
3368 }
3369
3370 APInt VecUndef, VecZero;
3371 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3372 Depth: Depth + 1))
3373 return true;
3374 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3375 break;
3376 }
3377 case ISD::VSELECT: {
3378 SDValue Sel = Op.getOperand(i: 0);
3379 SDValue LHS = Op.getOperand(i: 1);
3380 SDValue RHS = Op.getOperand(i: 2);
3381
3382 // Try to transform the select condition based on the current demanded
3383 // elements.
3384 APInt UndefSel, ZeroSel;
3385 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3386 Depth: Depth + 1))
3387 return true;
3388
3389 // See if we can simplify either vselect operand.
3390 APInt DemandedLHS(DemandedElts);
3391 APInt DemandedRHS(DemandedElts);
3392 APInt UndefLHS, ZeroLHS;
3393 APInt UndefRHS, ZeroRHS;
3394 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3395 Depth: Depth + 1))
3396 return true;
3397 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3398 Depth: Depth + 1))
3399 return true;
3400
3401 KnownUndef = UndefLHS & UndefRHS;
3402 KnownZero = ZeroLHS & ZeroRHS;
3403
3404 // If we know that the selected element is always zero, we don't need the
3405 // select value element.
3406 APInt DemandedSel = DemandedElts & ~KnownZero;
3407 if (DemandedSel != DemandedElts)
3408 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3409 Depth: Depth + 1))
3410 return true;
3411
3412 break;
3413 }
3414 case ISD::VECTOR_SHUFFLE: {
3415 SDValue LHS = Op.getOperand(i: 0);
3416 SDValue RHS = Op.getOperand(i: 1);
3417 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3418
3419 // Collect demanded elements from shuffle operands..
3420 APInt DemandedLHS(NumElts, 0);
3421 APInt DemandedRHS(NumElts, 0);
3422 for (unsigned i = 0; i != NumElts; ++i) {
3423 int M = ShuffleMask[i];
3424 if (M < 0 || !DemandedElts[i])
3425 continue;
3426 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3427 if (M < (int)NumElts)
3428 DemandedLHS.setBit(M);
3429 else
3430 DemandedRHS.setBit(M - NumElts);
3431 }
3432
3433 // See if we can simplify either shuffle operand.
3434 APInt UndefLHS, ZeroLHS;
3435 APInt UndefRHS, ZeroRHS;
3436 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3437 Depth: Depth + 1))
3438 return true;
3439 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3440 Depth: Depth + 1))
3441 return true;
3442
3443 // Simplify mask using undef elements from LHS/RHS.
3444 bool Updated = false;
3445 bool IdentityLHS = true, IdentityRHS = true;
3446 SmallVector<int, 32> NewMask(ShuffleMask);
3447 for (unsigned i = 0; i != NumElts; ++i) {
3448 int &M = NewMask[i];
3449 if (M < 0)
3450 continue;
3451 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3452 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3453 Updated = true;
3454 M = -1;
3455 }
3456 IdentityLHS &= (M < 0) || (M == (int)i);
3457 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3458 }
3459
3460 // Update legal shuffle masks based on demanded elements if it won't reduce
3461 // to Identity which can cause premature removal of the shuffle mask.
3462 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3463 SDValue LegalShuffle =
3464 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3465 if (LegalShuffle)
3466 return TLO.CombineTo(O: Op, N: LegalShuffle);
3467 }
3468
3469 // Propagate undef/zero elements from LHS/RHS.
3470 for (unsigned i = 0; i != NumElts; ++i) {
3471 int M = ShuffleMask[i];
3472 if (M < 0) {
3473 KnownUndef.setBit(i);
3474 } else if (M < (int)NumElts) {
3475 if (UndefLHS[M])
3476 KnownUndef.setBit(i);
3477 if (ZeroLHS[M])
3478 KnownZero.setBit(i);
3479 } else {
3480 if (UndefRHS[M - NumElts])
3481 KnownUndef.setBit(i);
3482 if (ZeroRHS[M - NumElts])
3483 KnownZero.setBit(i);
3484 }
3485 }
3486 break;
3487 }
3488 case ISD::ANY_EXTEND_VECTOR_INREG:
3489 case ISD::SIGN_EXTEND_VECTOR_INREG:
3490 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3491 APInt SrcUndef, SrcZero;
3492 SDValue Src = Op.getOperand(i: 0);
3493 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3494 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3495 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3496 Depth: Depth + 1))
3497 return true;
3498 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3499 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3500
3501 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3502 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3503 DemandedSrcElts == 1) {
3504 // aext - if we just need the bottom element then we can bitcast.
3505 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3506 }
3507
3508 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3509 // zext(undef) upper bits are guaranteed to be zero.
3510 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3511 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3512 KnownUndef.clearAllBits();
3513
3514 // zext - if we just need the bottom element then we can mask:
3515 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3516 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3517 Op->isOnlyUserOf(N: Src.getNode()) &&
3518 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3519 SDLoc DL(Op);
3520 EVT SrcVT = Src.getValueType();
3521 EVT SrcSVT = SrcVT.getScalarType();
3522 SmallVector<SDValue> MaskElts;
3523 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3524 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3525 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3526 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3527 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3528 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3529 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3530 }
3531 }
3532 }
3533 break;
3534 }
3535
3536 // TODO: There are more binop opcodes that could be handled here - MIN,
3537 // MAX, saturated math, etc.
3538 case ISD::ADD: {
3539 SDValue Op0 = Op.getOperand(i: 0);
3540 SDValue Op1 = Op.getOperand(i: 1);
3541 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3542 APInt UndefLHS, ZeroLHS;
3543 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3544 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3545 return true;
3546 }
3547 [[fallthrough]];
3548 }
3549 case ISD::AVGCEILS:
3550 case ISD::AVGCEILU:
3551 case ISD::AVGFLOORS:
3552 case ISD::AVGFLOORU:
3553 case ISD::OR:
3554 case ISD::XOR:
3555 case ISD::SUB:
3556 case ISD::FADD:
3557 case ISD::FSUB:
3558 case ISD::FMUL:
3559 case ISD::FDIV:
3560 case ISD::FREM: {
3561 SDValue Op0 = Op.getOperand(i: 0);
3562 SDValue Op1 = Op.getOperand(i: 1);
3563
3564 APInt UndefRHS, ZeroRHS;
3565 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3566 Depth: Depth + 1))
3567 return true;
3568 APInt UndefLHS, ZeroLHS;
3569 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3570 Depth: Depth + 1))
3571 return true;
3572
3573 KnownZero = ZeroLHS & ZeroRHS;
3574 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3575
3576 // Attempt to avoid multi-use ops if we don't need anything from them.
3577 // TODO - use KnownUndef to relax the demandedelts?
3578 if (!DemandedElts.isAllOnes())
3579 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3580 return true;
3581 break;
3582 }
3583 case ISD::SHL:
3584 case ISD::SRL:
3585 case ISD::SRA:
3586 case ISD::ROTL:
3587 case ISD::ROTR: {
3588 SDValue Op0 = Op.getOperand(i: 0);
3589 SDValue Op1 = Op.getOperand(i: 1);
3590
3591 APInt UndefRHS, ZeroRHS;
3592 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3593 Depth: Depth + 1))
3594 return true;
3595 APInt UndefLHS, ZeroLHS;
3596 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3597 Depth: Depth + 1))
3598 return true;
3599
3600 KnownZero = ZeroLHS;
3601 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3602
3603 // Attempt to avoid multi-use ops if we don't need anything from them.
3604 // TODO - use KnownUndef to relax the demandedelts?
3605 if (!DemandedElts.isAllOnes())
3606 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3607 return true;
3608 break;
3609 }
3610 case ISD::MUL:
3611 case ISD::MULHU:
3612 case ISD::MULHS:
3613 case ISD::AND: {
3614 SDValue Op0 = Op.getOperand(i: 0);
3615 SDValue Op1 = Op.getOperand(i: 1);
3616
3617 APInt SrcUndef, SrcZero;
3618 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3619 Depth: Depth + 1))
3620 return true;
3621 // If we know that a demanded element was zero in Op1 we don't need to
3622 // demand it in Op0 - its guaranteed to be zero.
3623 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3624 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3625 TLO, Depth: Depth + 1))
3626 return true;
3627
3628 KnownUndef &= DemandedElts0;
3629 KnownZero &= DemandedElts0;
3630
3631 // If every element pair has a zero/undef then just fold to zero.
3632 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3633 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3634 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3635 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3636
3637 // If either side has a zero element, then the result element is zero, even
3638 // if the other is an UNDEF.
3639 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3640 // and then handle 'and' nodes with the rest of the binop opcodes.
3641 KnownZero |= SrcZero;
3642 KnownUndef &= SrcUndef;
3643 KnownUndef &= ~KnownZero;
3644
3645 // Attempt to avoid multi-use ops if we don't need anything from them.
3646 if (!DemandedElts.isAllOnes())
3647 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3648 return true;
3649 break;
3650 }
3651 case ISD::TRUNCATE:
3652 case ISD::SIGN_EXTEND:
3653 case ISD::ZERO_EXTEND:
3654 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3655 KnownZero, TLO, Depth: Depth + 1))
3656 return true;
3657
3658 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3659 // zext(undef) upper bits are guaranteed to be zero.
3660 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3661 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3662 KnownUndef.clearAllBits();
3663 }
3664 break;
3665 default: {
3666 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3667 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3668 KnownZero, TLO, Depth))
3669 return true;
3670 } else {
3671 KnownBits Known;
3672 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3673 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3674 TLO, Depth, AssumeSingleUse))
3675 return true;
3676 }
3677 break;
3678 }
3679 }
3680 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3681
3682 // Constant fold all undef cases.
3683 // TODO: Handle zero cases as well.
3684 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3685 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3686
3687 return false;
3688}
3689
3690/// Determine which of the bits specified in Mask are known to be either zero or
3691/// one and return them in the Known.
3692void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3693 KnownBits &Known,
3694 const APInt &DemandedElts,
3695 const SelectionDAG &DAG,
3696 unsigned Depth) const {
3697 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3698 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3699 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3700 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3701 "Should use MaskedValueIsZero if you don't know whether Op"
3702 " is a target node!");
3703 Known.resetAll();
3704}
3705
3706void TargetLowering::computeKnownBitsForTargetInstr(
3707 GISelKnownBits &Analysis, Register R, KnownBits &Known,
3708 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3709 unsigned Depth) const {
3710 Known.resetAll();
3711}
3712
3713void TargetLowering::computeKnownBitsForFrameIndex(
3714 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3715 // The low bits are known zero if the pointer is aligned.
3716 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3717}
3718
3719Align TargetLowering::computeKnownAlignForTargetInstr(
3720 GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3721 unsigned Depth) const {
3722 return Align(1);
3723}
3724
3725/// This method can be implemented by targets that want to expose additional
3726/// information about sign bits to the DAG Combiner.
3727unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3728 const APInt &,
3729 const SelectionDAG &,
3730 unsigned Depth) const {
3731 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3732 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3733 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3734 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3735 "Should use ComputeNumSignBits if you don't know whether Op"
3736 " is a target node!");
3737 return 1;
3738}
3739
3740unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3741 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3742 const MachineRegisterInfo &MRI, unsigned Depth) const {
3743 return 1;
3744}
3745
3746bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3747 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3748 TargetLoweringOpt &TLO, unsigned Depth) const {
3749 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3750 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3752 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3754 " is a target node!");
3755 return false;
3756}
3757
3758bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3759 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3760 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3761 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3762 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3763 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3764 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3765 "Should use SimplifyDemandedBits if you don't know whether Op"
3766 " is a target node!");
3767 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3768 return false;
3769}
3770
3771SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3772 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3773 SelectionDAG &DAG, unsigned Depth) const {
3774 assert(
3775 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3776 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3777 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3778 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3779 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3780 " is a target node!");
3781 return SDValue();
3782}
3783
3784SDValue
3785TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3786 SDValue N1, MutableArrayRef<int> Mask,
3787 SelectionDAG &DAG) const {
3788 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3789 if (!LegalMask) {
3790 std::swap(a&: N0, b&: N1);
3791 ShuffleVectorSDNode::commuteMask(Mask);
3792 LegalMask = isShuffleMaskLegal(Mask, VT);
3793 }
3794
3795 if (!LegalMask)
3796 return SDValue();
3797
3798 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3799}
3800
3801const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3802 return nullptr;
3803}
3804
3805bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3806 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3807 bool PoisonOnly, unsigned Depth) const {
3808 assert(
3809 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3810 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3811 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3812 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3813 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3814 " is a target node!");
3815
3816 // If Op can't create undef/poison and none of its operands are undef/poison
3817 // then Op is never undef/poison.
3818 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3819 /*ConsiderFlags*/ true, Depth) &&
3820 all_of(Range: Op->ops(), P: [&](SDValue V) {
3821 return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
3822 Depth: Depth + 1);
3823 });
3824}
3825
3826bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3827 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3828 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3829 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3830 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3831 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3832 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3833 "Should use canCreateUndefOrPoison if you don't know whether Op"
3834 " is a target node!");
3835 // Be conservative and return true.
3836 return true;
3837}
3838
3839bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3840 const SelectionDAG &DAG,
3841 bool SNaN,
3842 unsigned Depth) const {
3843 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3844 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3845 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3846 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3847 "Should use isKnownNeverNaN if you don't know whether Op"
3848 " is a target node!");
3849 return false;
3850}
3851
3852bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3853 const APInt &DemandedElts,
3854 APInt &UndefElts,
3855 const SelectionDAG &DAG,
3856 unsigned Depth) const {
3857 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3858 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3859 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3860 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3861 "Should use isSplatValue if you don't know whether Op"
3862 " is a target node!");
3863 return false;
3864}
3865
3866// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3867// work with truncating build vectors and vectors with elements of less than
3868// 8 bits.
3869bool TargetLowering::isConstTrueVal(SDValue N) const {
3870 if (!N)
3871 return false;
3872
3873 unsigned EltWidth;
3874 APInt CVal;
3875 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3876 /*AllowTruncation=*/true)) {
3877 CVal = CN->getAPIntValue();
3878 EltWidth = N.getValueType().getScalarSizeInBits();
3879 } else
3880 return false;
3881
3882 // If this is a truncating splat, truncate the splat value.
3883 // Otherwise, we may fail to match the expected values below.
3884 if (EltWidth < CVal.getBitWidth())
3885 CVal = CVal.trunc(width: EltWidth);
3886
3887 switch (getBooleanContents(Type: N.getValueType())) {
3888 case UndefinedBooleanContent:
3889 return CVal[0];
3890 case ZeroOrOneBooleanContent:
3891 return CVal.isOne();
3892 case ZeroOrNegativeOneBooleanContent:
3893 return CVal.isAllOnes();
3894 }
3895
3896 llvm_unreachable("Invalid boolean contents");
3897}
3898
3899bool TargetLowering::isConstFalseVal(SDValue N) const {
3900 if (!N)
3901 return false;
3902
3903 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
3904 if (!CN) {
3905 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
3906 if (!BV)
3907 return false;
3908
3909 // Only interested in constant splats, we don't care about undef
3910 // elements in identifying boolean constants and getConstantSplatNode
3911 // returns NULL if all ops are undef;
3912 CN = BV->getConstantSplatNode();
3913 if (!CN)
3914 return false;
3915 }
3916
3917 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
3918 return !CN->getAPIntValue()[0];
3919
3920 return CN->isZero();
3921}
3922
3923bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3924 bool SExt) const {
3925 if (VT == MVT::i1)
3926 return N->isOne();
3927
3928 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
3929 switch (Cnt) {
3930 case TargetLowering::ZeroOrOneBooleanContent:
3931 // An extended value of 1 is always true, unless its original type is i1,
3932 // in which case it will be sign extended to -1.
3933 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3934 case TargetLowering::UndefinedBooleanContent:
3935 case TargetLowering::ZeroOrNegativeOneBooleanContent:
3936 return N->isAllOnes() && SExt;
3937 }
3938 llvm_unreachable("Unexpected enumeration.");
3939}
3940
3941/// This helper function of SimplifySetCC tries to optimize the comparison when
3942/// either operand of the SetCC node is a bitwise-and instruction.
3943SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3944 ISD::CondCode Cond, const SDLoc &DL,
3945 DAGCombinerInfo &DCI) const {
3946 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3947 std::swap(a&: N0, b&: N1);
3948
3949 SelectionDAG &DAG = DCI.DAG;
3950 EVT OpVT = N0.getValueType();
3951 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3952 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3953 return SDValue();
3954
3955 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3956 // iff everything but LSB is known zero:
3957 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
3958 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
3959 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3960 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3961 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
3962 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
3963 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
3964 }
3965
3966 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3967 // test in a narrow type that we can truncate to with no cost. Examples:
3968 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3969 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3970 // TODO: This conservatively checks for type legality on the source and
3971 // destination types. That may inhibit optimizations, but it also
3972 // allows setcc->shift transforms that may be more beneficial.
3973 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
3974 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
3975 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
3976 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
3977 BitWidth: AndC->getAPIntValue().getActiveBits());
3978 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
3979 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
3980 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
3981 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
3982 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3983 }
3984 }
3985
3986 // Match these patterns in any of their permutations:
3987 // (X & Y) == Y
3988 // (X & Y) != Y
3989 SDValue X, Y;
3990 if (N0.getOperand(i: 0) == N1) {
3991 X = N0.getOperand(i: 1);
3992 Y = N0.getOperand(i: 0);
3993 } else if (N0.getOperand(i: 1) == N1) {
3994 X = N0.getOperand(i: 0);
3995 Y = N0.getOperand(i: 1);
3996 } else {
3997 return SDValue();
3998 }
3999
4000 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4001 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4002 // its liable to create and infinite loop.
4003 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
4004 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4005 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4006 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4007 // Note that where Y is variable and is known to have at most one bit set
4008 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4009 // equivalent when Y == 0.
4010 assert(OpVT.isInteger());
4011 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4012 if (DCI.isBeforeLegalizeOps() ||
4013 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4014 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4015 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4016 // If the target supports an 'and-not' or 'and-complement' logic operation,
4017 // try to use that to make a comparison operation more efficient.
4018 // But don't do this transform if the mask is a single bit because there are
4019 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4020 // 'rlwinm' on PPC).
4021
4022 // Bail out if the compare operand that we want to turn into a zero is
4023 // already a zero (otherwise, infinite loop).
4024 if (isNullConstant(V: Y))
4025 return SDValue();
4026
4027 // Transform this into: ~X & Y == 0.
4028 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
4029 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
4030 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4031 }
4032
4033 return SDValue();
4034}
4035
4036/// There are multiple IR patterns that could be checking whether certain
4037/// truncation of a signed number would be lossy or not. The pattern which is
4038/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4039/// We are looking for the following pattern: (KeptBits is a constant)
4040/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4041/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4042/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4043/// We will unfold it into the natural trunc+sext pattern:
4044/// ((%x << C) a>> C) dstcond %x
4045/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4046SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4047 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4048 const SDLoc &DL) const {
4049 // We must be comparing with a constant.
4050 ConstantSDNode *C1;
4051 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4052 return SDValue();
4053
4054 // N0 should be: add %x, (1 << (KeptBits-1))
4055 if (N0->getOpcode() != ISD::ADD)
4056 return SDValue();
4057
4058 // And we must be 'add'ing a constant.
4059 ConstantSDNode *C01;
4060 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4061 return SDValue();
4062
4063 SDValue X = N0->getOperand(Num: 0);
4064 EVT XVT = X.getValueType();
4065
4066 // Validate constants ...
4067
4068 APInt I1 = C1->getAPIntValue();
4069
4070 ISD::CondCode NewCond;
4071 if (Cond == ISD::CondCode::SETULT) {
4072 NewCond = ISD::CondCode::SETEQ;
4073 } else if (Cond == ISD::CondCode::SETULE) {
4074 NewCond = ISD::CondCode::SETEQ;
4075 // But need to 'canonicalize' the constant.
4076 I1 += 1;
4077 } else if (Cond == ISD::CondCode::SETUGT) {
4078 NewCond = ISD::CondCode::SETNE;
4079 // But need to 'canonicalize' the constant.
4080 I1 += 1;
4081 } else if (Cond == ISD::CondCode::SETUGE) {
4082 NewCond = ISD::CondCode::SETNE;
4083 } else
4084 return SDValue();
4085
4086 APInt I01 = C01->getAPIntValue();
4087
4088 auto checkConstants = [&I1, &I01]() -> bool {
4089 // Both of them must be power-of-two, and the constant from setcc is bigger.
4090 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4091 };
4092
4093 if (checkConstants()) {
4094 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4095 } else {
4096 // What if we invert constants? (and the target predicate)
4097 I1.negate();
4098 I01.negate();
4099 assert(XVT.isInteger());
4100 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4101 if (!checkConstants())
4102 return SDValue();
4103 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4104 }
4105
4106 // They are power-of-two, so which bit is set?
4107 const unsigned KeptBits = I1.logBase2();
4108 const unsigned KeptBitsMinusOne = I01.logBase2();
4109
4110 // Magic!
4111 if (KeptBits != (KeptBitsMinusOne + 1))
4112 return SDValue();
4113 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4114
4115 // We don't want to do this in every single case.
4116 SelectionDAG &DAG = DCI.DAG;
4117 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4118 XVT, KeptBits))
4119 return SDValue();
4120
4121 // Unfold into: sext_inreg(%x) cond %x
4122 // Where 'cond' will be either 'eq' or 'ne'.
4123 SDValue SExtInReg = DAG.getNode(
4124 Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4125 N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4126 return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4127}
4128
4129// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4130SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4131 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4132 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4133 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4134 "Should be a comparison with 0.");
4135 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4136 "Valid only for [in]equality comparisons.");
4137
4138 unsigned NewShiftOpcode;
4139 SDValue X, C, Y;
4140
4141 SelectionDAG &DAG = DCI.DAG;
4142 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4143
4144 // Look for '(C l>>/<< Y)'.
4145 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4146 // The shift should be one-use.
4147 if (!V.hasOneUse())
4148 return false;
4149 unsigned OldShiftOpcode = V.getOpcode();
4150 switch (OldShiftOpcode) {
4151 case ISD::SHL:
4152 NewShiftOpcode = ISD::SRL;
4153 break;
4154 case ISD::SRL:
4155 NewShiftOpcode = ISD::SHL;
4156 break;
4157 default:
4158 return false; // must be a logical shift.
4159 }
4160 // We should be shifting a constant.
4161 // FIXME: best to use isConstantOrConstantVector().
4162 C = V.getOperand(i: 0);
4163 ConstantSDNode *CC =
4164 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4165 if (!CC)
4166 return false;
4167 Y = V.getOperand(i: 1);
4168
4169 ConstantSDNode *XC =
4170 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4171 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4172 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4173 };
4174
4175 // LHS of comparison should be an one-use 'and'.
4176 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4177 return SDValue();
4178
4179 X = N0.getOperand(i: 0);
4180 SDValue Mask = N0.getOperand(i: 1);
4181
4182 // 'and' is commutative!
4183 if (!Match(Mask)) {
4184 std::swap(a&: X, b&: Mask);
4185 if (!Match(Mask))
4186 return SDValue();
4187 }
4188
4189 EVT VT = X.getValueType();
4190
4191 // Produce:
4192 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4193 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4194 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4195 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4196 return T2;
4197}
4198
4199/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4200/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4201/// handle the commuted versions of these patterns.
4202SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4203 ISD::CondCode Cond, const SDLoc &DL,
4204 DAGCombinerInfo &DCI) const {
4205 unsigned BOpcode = N0.getOpcode();
4206 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4207 "Unexpected binop");
4208 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4209
4210 // (X + Y) == X --> Y == 0
4211 // (X - Y) == X --> Y == 0
4212 // (X ^ Y) == X --> Y == 0
4213 SelectionDAG &DAG = DCI.DAG;
4214 EVT OpVT = N0.getValueType();
4215 SDValue X = N0.getOperand(i: 0);
4216 SDValue Y = N0.getOperand(i: 1);
4217 if (X == N1)
4218 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4219
4220 if (Y != N1)
4221 return SDValue();
4222
4223 // (X + Y) == Y --> X == 0
4224 // (X ^ Y) == Y --> X == 0
4225 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4226 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4227
4228 // The shift would not be valid if the operands are boolean (i1).
4229 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4230 return SDValue();
4231
4232 // (X - Y) == Y --> X == Y << 1
4233 SDValue One =
4234 DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL, LegalTypes: !DCI.isBeforeLegalize());
4235 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4236 if (!DCI.isCalledByLegalizer())
4237 DCI.AddToWorklist(N: YShl1.getNode());
4238 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4239}
4240
4241static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4242 SDValue N0, const APInt &C1,
4243 ISD::CondCode Cond, const SDLoc &dl,
4244 SelectionDAG &DAG) {
4245 // Look through truncs that don't change the value of a ctpop.
4246 // FIXME: Add vector support? Need to be careful with setcc result type below.
4247 SDValue CTPOP = N0;
4248 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4249 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4250 CTPOP = N0.getOperand(i: 0);
4251
4252 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4253 return SDValue();
4254
4255 EVT CTVT = CTPOP.getValueType();
4256 SDValue CTOp = CTPOP.getOperand(i: 0);
4257
4258 // Expand a power-of-2-or-zero comparison based on ctpop:
4259 // (ctpop x) u< 2 -> (x & x-1) == 0
4260 // (ctpop x) u> 1 -> (x & x-1) != 0
4261 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4262 // Keep the CTPOP if it is a cheap vector op.
4263 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4264 return SDValue();
4265
4266 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4267 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4268 return SDValue();
4269 if (C1 == 0 && (Cond == ISD::SETULT))
4270 return SDValue(); // This is handled elsewhere.
4271
4272 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4273
4274 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4275 SDValue Result = CTOp;
4276 for (unsigned i = 0; i < Passes; i++) {
4277 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4278 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4279 }
4280 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4281 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4282 }
4283
4284 // Expand a power-of-2 comparison based on ctpop
4285 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4286 // Keep the CTPOP if it is cheap.
4287 if (TLI.isCtpopFast(VT: CTVT))
4288 return SDValue();
4289
4290 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4291 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4292 assert(CTVT.isInteger());
4293 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4294
4295 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4296 // check before emitting a potentially unnecessary op.
4297 if (DAG.isKnownNeverZero(Op: CTOp)) {
4298 // (ctpop x) == 1 --> (x & x-1) == 0
4299 // (ctpop x) != 1 --> (x & x-1) != 0
4300 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4301 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4302 return RHS;
4303 }
4304
4305 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4306 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4307 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4308 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4309 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4310 }
4311
4312 return SDValue();
4313}
4314
4315static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4316 ISD::CondCode Cond, const SDLoc &dl,
4317 SelectionDAG &DAG) {
4318 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4319 return SDValue();
4320
4321 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4322 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4323 return SDValue();
4324
4325 auto getRotateSource = [](SDValue X) {
4326 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4327 return X.getOperand(i: 0);
4328 return SDValue();
4329 };
4330
4331 // Peek through a rotated value compared against 0 or -1:
4332 // (rot X, Y) == 0/-1 --> X == 0/-1
4333 // (rot X, Y) != 0/-1 --> X != 0/-1
4334 if (SDValue R = getRotateSource(N0))
4335 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4336
4337 // Peek through an 'or' of a rotated value compared against 0:
4338 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4339 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4340 //
4341 // TODO: Add the 'and' with -1 sibling.
4342 // TODO: Recurse through a series of 'or' ops to find the rotate.
4343 EVT OpVT = N0.getValueType();
4344 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4345 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4346 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4347 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4348 }
4349 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4350 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4351 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4352 }
4353 }
4354
4355 return SDValue();
4356}
4357
4358static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4359 ISD::CondCode Cond, const SDLoc &dl,
4360 SelectionDAG &DAG) {
4361 // If we are testing for all-bits-clear, we might be able to do that with
4362 // less shifting since bit-order does not matter.
4363 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4364 return SDValue();
4365
4366 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4367 if (!C1 || !C1->isZero())
4368 return SDValue();
4369
4370 if (!N0.hasOneUse() ||
4371 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4372 return SDValue();
4373
4374 unsigned BitWidth = N0.getScalarValueSizeInBits();
4375 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4376 if (!ShAmtC || ShAmtC->getAPIntValue().uge(RHS: BitWidth))
4377 return SDValue();
4378
4379 // Canonicalize fshr as fshl to reduce pattern-matching.
4380 unsigned ShAmt = ShAmtC->getZExtValue();
4381 if (N0.getOpcode() == ISD::FSHR)
4382 ShAmt = BitWidth - ShAmt;
4383
4384 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4385 SDValue X, Y;
4386 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4387 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4388 return false;
4389 if (Or.getOperand(i: 0) == Other) {
4390 X = Or.getOperand(i: 0);
4391 Y = Or.getOperand(i: 1);
4392 return true;
4393 }
4394 if (Or.getOperand(i: 1) == Other) {
4395 X = Or.getOperand(i: 1);
4396 Y = Or.getOperand(i: 0);
4397 return true;
4398 }
4399 return false;
4400 };
4401
4402 EVT OpVT = N0.getValueType();
4403 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4404 SDValue F0 = N0.getOperand(i: 0);
4405 SDValue F1 = N0.getOperand(i: 1);
4406 if (matchOr(F0, F1)) {
4407 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4408 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4409 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4410 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4411 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4412 }
4413 if (matchOr(F1, F0)) {
4414 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4415 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4416 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4417 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4418 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4419 }
4420
4421 return SDValue();
4422}
4423
4424/// Try to simplify a setcc built with the specified operands and cc. If it is
4425/// unable to simplify it, return a null SDValue.
4426SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4427 ISD::CondCode Cond, bool foldBooleans,
4428 DAGCombinerInfo &DCI,
4429 const SDLoc &dl) const {
4430 SelectionDAG &DAG = DCI.DAG;
4431 const DataLayout &Layout = DAG.getDataLayout();
4432 EVT OpVT = N0.getValueType();
4433 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4434
4435 // Constant fold or commute setcc.
4436 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4437 return Fold;
4438
4439 bool N0ConstOrSplat =
4440 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4441 bool N1ConstOrSplat =
4442 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4443
4444 // Canonicalize toward having the constant on the RHS.
4445 // TODO: Handle non-splat vector constants. All undef causes trouble.
4446 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4447 // infinite loop here when we encounter one.
4448 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4449 if (N0ConstOrSplat && !N1ConstOrSplat &&
4450 (DCI.isBeforeLegalizeOps() ||
4451 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4452 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4453
4454 // If we have a subtract with the same 2 non-constant operands as this setcc
4455 // -- but in reverse order -- then try to commute the operands of this setcc
4456 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4457 // instruction on some targets.
4458 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4459 (DCI.isBeforeLegalizeOps() ||
4460 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4461 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4462 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4463 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4464
4465 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4466 return V;
4467
4468 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4469 return V;
4470
4471 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4472 const APInt &C1 = N1C->getAPIntValue();
4473
4474 // Optimize some CTPOP cases.
4475 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4476 return V;
4477
4478 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4479 // X * Y == 0 --> (X == 0) || (Y == 0)
4480 // X * Y != 0 --> (X != 0) && (Y != 0)
4481 // TODO: This bails out if minsize is set, but if the target doesn't have a
4482 // single instruction multiply for this type, it would likely be
4483 // smaller to decompose.
4484 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4485 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4486 (N0->getFlags().hasNoUnsignedWrap() ||
4487 N0->getFlags().hasNoSignedWrap()) &&
4488 !Attr.hasFnAttr(Attribute::MinSize)) {
4489 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4490 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4491 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4492 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4493 }
4494
4495 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4496 // equality comparison, then we're just comparing whether X itself is
4497 // zero.
4498 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4499 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4500 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4501 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4502 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4503 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4504 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4505 // (srl (ctlz x), 5) == 0 -> X != 0
4506 // (srl (ctlz x), 5) != 1 -> X != 0
4507 Cond = ISD::SETNE;
4508 } else {
4509 // (srl (ctlz x), 5) != 0 -> X == 0
4510 // (srl (ctlz x), 5) == 1 -> X == 0
4511 Cond = ISD::SETEQ;
4512 }
4513 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4514 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4515 Cond);
4516 }
4517 }
4518 }
4519 }
4520
4521 // FIXME: Support vectors.
4522 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4523 const APInt &C1 = N1C->getAPIntValue();
4524
4525 // (zext x) == C --> x == (trunc C)
4526 // (sext x) == C --> x == (trunc C)
4527 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4528 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4529 unsigned MinBits = N0.getValueSizeInBits();
4530 SDValue PreExt;
4531 bool Signed = false;
4532 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4533 // ZExt
4534 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4535 PreExt = N0->getOperand(Num: 0);
4536 } else if (N0->getOpcode() == ISD::AND) {
4537 // DAGCombine turns costly ZExts into ANDs
4538 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4539 if ((C->getAPIntValue()+1).isPowerOf2()) {
4540 MinBits = C->getAPIntValue().countr_one();
4541 PreExt = N0->getOperand(Num: 0);
4542 }
4543 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4544 // SExt
4545 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4546 PreExt = N0->getOperand(Num: 0);
4547 Signed = true;
4548 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4549 // ZEXTLOAD / SEXTLOAD
4550 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4551 MinBits = LN0->getMemoryVT().getSizeInBits();
4552 PreExt = N0;
4553 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4554 Signed = true;
4555 MinBits = LN0->getMemoryVT().getSizeInBits();
4556 PreExt = N0;
4557 }
4558 }
4559
4560 // Figure out how many bits we need to preserve this constant.
4561 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4562
4563 // Make sure we're not losing bits from the constant.
4564 if (MinBits > 0 &&
4565 MinBits < C1.getBitWidth() &&
4566 MinBits >= ReqdBits) {
4567 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4568 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4569 // Will get folded away.
4570 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4571 if (MinBits == 1 && C1 == 1)
4572 // Invert the condition.
4573 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4574 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4575 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4576 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4577 }
4578
4579 // If truncating the setcc operands is not desirable, we can still
4580 // simplify the expression in some cases:
4581 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4582 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4583 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4584 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4585 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4586 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4587 SDValue TopSetCC = N0->getOperand(Num: 0);
4588 unsigned N0Opc = N0->getOpcode();
4589 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4590 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4591 TopSetCC.getOpcode() == ISD::SETCC &&
4592 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4593 (isConstFalseVal(N1) ||
4594 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4595
4596 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4597 (!N1C->isZero() && Cond == ISD::SETNE);
4598
4599 if (!Inverse)
4600 return TopSetCC;
4601
4602 ISD::CondCode InvCond = ISD::getSetCCInverse(
4603 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4604 Type: TopSetCC.getOperand(i: 0).getValueType());
4605 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4606 RHS: TopSetCC.getOperand(i: 1),
4607 Cond: InvCond);
4608 }
4609 }
4610 }
4611
4612 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4613 // equality or unsigned, and all 1 bits of the const are in the same
4614 // partial word, see if we can shorten the load.
4615 if (DCI.isBeforeLegalize() &&
4616 !ISD::isSignedIntSetCC(Code: Cond) &&
4617 N0.getOpcode() == ISD::AND && C1 == 0 &&
4618 N0.getNode()->hasOneUse() &&
4619 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4620 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4621 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4622 LoadSDNode *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4623 APInt bestMask;
4624 unsigned bestWidth = 0, bestOffset = 0;
4625 if (Lod->isSimple() && Lod->isUnindexed() &&
4626 (Lod->getMemoryVT().isByteSized() ||
4627 isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4628 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4629 unsigned origWidth = N0.getValueSizeInBits();
4630 unsigned maskWidth = origWidth;
4631 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4632 // 8 bits, but have to be careful...
4633 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4634 origWidth = Lod->getMemoryVT().getSizeInBits();
4635 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4636 // Only consider power-of-2 widths (and at least one byte) as candiates
4637 // for the narrowed load.
4638 for (unsigned width = 8; width < origWidth; width *= 2) {
4639 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4640 if (!shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT))
4641 continue;
4642 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4643 // Avoid accessing any padding here for now (we could use memWidth
4644 // instead of origWidth here otherwise).
4645 unsigned maxOffset = origWidth - width;
4646 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4647 if (Mask.isSubsetOf(RHS: newMask)) {
4648 unsigned ptrOffset =
4649 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4650 unsigned IsFast = 0;
4651 Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / 8);
4652 if (allowsMemoryAccess(
4653 Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4654 Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4655 IsFast) {
4656 bestOffset = ptrOffset / 8;
4657 bestMask = Mask.lshr(shiftAmt: offset);
4658 bestWidth = width;
4659 break;
4660 }
4661 }
4662 newMask <<= 8;
4663 }
4664 if (bestWidth)
4665 break;
4666 }
4667 }
4668 if (bestWidth) {
4669 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4670 SDValue Ptr = Lod->getBasePtr();
4671 if (bestOffset != 0)
4672 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4673 SDValue NewLoad =
4674 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4675 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4676 Alignment: Lod->getOriginalAlign());
4677 SDValue And =
4678 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4679 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4680 return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
4681 }
4682 }
4683
4684 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4685 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4686 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
4687
4688 // If the comparison constant has bits in the upper part, the
4689 // zero-extended value could never match.
4690 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4691 hiBitsSet: C1.getBitWidth() - InSize))) {
4692 switch (Cond) {
4693 case ISD::SETUGT:
4694 case ISD::SETUGE:
4695 case ISD::SETEQ:
4696 return DAG.getConstant(Val: 0, DL: dl, VT);
4697 case ISD::SETULT:
4698 case ISD::SETULE:
4699 case ISD::SETNE:
4700 return DAG.getConstant(Val: 1, DL: dl, VT);
4701 case ISD::SETGT:
4702 case ISD::SETGE:
4703 // True if the sign bit of C1 is set.
4704 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4705 case ISD::SETLT:
4706 case ISD::SETLE:
4707 // True if the sign bit of C1 isn't set.
4708 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4709 default:
4710 break;
4711 }
4712 }
4713
4714 // Otherwise, we can perform the comparison with the low bits.
4715 switch (Cond) {
4716 case ISD::SETEQ:
4717 case ISD::SETNE:
4718 case ISD::SETUGT:
4719 case ISD::SETUGE:
4720 case ISD::SETULT:
4721 case ISD::SETULE: {
4722 EVT newVT = N0.getOperand(i: 0).getValueType();
4723 if (DCI.isBeforeLegalizeOps() ||
4724 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4725 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()))) {
4726 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4727 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4728
4729 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
4730 RHS: NewConst, Cond);
4731 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4732 }
4733 break;
4734 }
4735 default:
4736 break; // todo, be more careful with signed comparisons
4737 }
4738 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4739 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4740 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
4741 ToTy: OpVT)) {
4742 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
4743 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4744 EVT ExtDstTy = N0.getValueType();
4745 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4746
4747 // If the constant doesn't fit into the number of bits for the source of
4748 // the sign extension, it is impossible for both sides to be equal.
4749 if (C1.getSignificantBits() > ExtSrcTyBits)
4750 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4751
4752 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4753 ExtDstTy != ExtSrcTy && "Unexpected types!");
4754 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4755 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
4756 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4757 if (!DCI.isCalledByLegalizer())
4758 DCI.AddToWorklist(N: ZextOp.getNode());
4759 // Otherwise, make this a use of a zext.
4760 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4761 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4762 } else if ((N1C->isZero() || N1C->isOne()) &&
4763 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4764 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4765 // excluded as they are handled below whilst checking for foldBooleans.
4766 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4767 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4768 (N0.getValueType() == MVT::i1 ||
4769 getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4770 DAG.MaskedValueIsZero(
4771 N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4772 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4773 if (TrueWhenTrue)
4774 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
4775 // Invert the condition.
4776 if (N0.getOpcode() == ISD::SETCC) {
4777 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
4778 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
4779 if (DCI.isBeforeLegalizeOps() ||
4780 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
4781 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
4782 }
4783 }
4784
4785 if ((N0.getOpcode() == ISD::XOR ||
4786 (N0.getOpcode() == ISD::AND &&
4787 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
4788 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
4789 isOneConstant(V: N0.getOperand(i: 1))) {
4790 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4791 // can only do this if the top bits are known zero.
4792 unsigned BitWidth = N0.getValueSizeInBits();
4793 if (DAG.MaskedValueIsZero(Op: N0,
4794 Mask: APInt::getHighBitsSet(numBits: BitWidth,
4795 hiBitsSet: BitWidth-1))) {
4796 // Okay, get the un-inverted input value.
4797 SDValue Val;
4798 if (N0.getOpcode() == ISD::XOR) {
4799 Val = N0.getOperand(i: 0);
4800 } else {
4801 assert(N0.getOpcode() == ISD::AND &&
4802 N0.getOperand(0).getOpcode() == ISD::XOR);
4803 // ((X^1)&1)^1 -> X & 1
4804 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
4805 N1: N0.getOperand(i: 0).getOperand(i: 0),
4806 N2: N0.getOperand(i: 1));
4807 }
4808
4809 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
4810 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4811 }
4812 } else if (N1C->isOne()) {
4813 SDValue Op0 = N0;
4814 if (Op0.getOpcode() == ISD::TRUNCATE)
4815 Op0 = Op0.getOperand(i: 0);
4816
4817 if ((Op0.getOpcode() == ISD::XOR) &&
4818 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
4819 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
4820 SDValue XorLHS = Op0.getOperand(i: 0);
4821 SDValue XorRHS = Op0.getOperand(i: 1);
4822 // Ensure that the input setccs return an i1 type or 0/1 value.
4823 if (Op0.getValueType() == MVT::i1 ||
4824 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4825 ZeroOrOneBooleanContent &&
4826 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4827 ZeroOrOneBooleanContent)) {
4828 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4829 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4830 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
4831 }
4832 }
4833 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
4834 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4835 if (Op0.getValueType().bitsGT(VT))
4836 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4837 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
4838 N2: DAG.getConstant(Val: 1, DL: dl, VT));
4839 else if (Op0.getValueType().bitsLT(VT))
4840 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4841 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
4842 N2: DAG.getConstant(Val: 1, DL: dl, VT));
4843
4844 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4845 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
4846 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4847 }
4848 if (Op0.getOpcode() == ISD::AssertZext &&
4849 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4850 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4851 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
4852 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4853 }
4854 }
4855
4856 // Given:
4857 // icmp eq/ne (urem %x, %y), 0
4858 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4859 // icmp eq/ne %x, 0
4860 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4861 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4862 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
4863 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
4864 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4865 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4866 }
4867
4868 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4869 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4870 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4871 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
4872 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
4873 N1C && N1C->isAllOnes()) {
4874 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
4875 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
4876 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4877 }
4878
4879 if (SDValue V =
4880 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
4881 return V;
4882 }
4883
4884 // These simplifications apply to splat vectors as well.
4885 // TODO: Handle more splat vector cases.
4886 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4887 const APInt &C1 = N1C->getAPIntValue();
4888
4889 APInt MinVal, MaxVal;
4890 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
4891 if (ISD::isSignedIntSetCC(Code: Cond)) {
4892 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
4893 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
4894 } else {
4895 MinVal = APInt::getMinValue(numBits: OperandBitSize);
4896 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
4897 }
4898
4899 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4900 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4901 // X >= MIN --> true
4902 if (C1 == MinVal)
4903 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4904
4905 if (!VT.isVector()) { // TODO: Support this for vectors.
4906 // X >= C0 --> X > (C0 - 1)
4907 APInt C = C1 - 1;
4908 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4909 if ((DCI.isBeforeLegalizeOps() ||
4910 isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4911 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4912 isLegalICmpImmediate(C.getSExtValue())))) {
4913 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4914 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4915 Cond: NewCC);
4916 }
4917 }
4918 }
4919
4920 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4921 // X <= MAX --> true
4922 if (C1 == MaxVal)
4923 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4924
4925 // X <= C0 --> X < (C0 + 1)
4926 if (!VT.isVector()) { // TODO: Support this for vectors.
4927 APInt C = C1 + 1;
4928 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4929 if ((DCI.isBeforeLegalizeOps() ||
4930 isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4931 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4932 isLegalICmpImmediate(C.getSExtValue())))) {
4933 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4934 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4935 Cond: NewCC);
4936 }
4937 }
4938 }
4939
4940 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4941 if (C1 == MinVal)
4942 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
4943
4944 // TODO: Support this for vectors after legalize ops.
4945 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4946 // Canonicalize setlt X, Max --> setne X, Max
4947 if (C1 == MaxVal)
4948 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4949
4950 // If we have setult X, 1, turn it into seteq X, 0
4951 if (C1 == MinVal+1)
4952 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4953 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
4954 Cond: ISD::SETEQ);
4955 }
4956 }
4957
4958 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4959 if (C1 == MaxVal)
4960 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
4961
4962 // TODO: Support this for vectors after legalize ops.
4963 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4964 // Canonicalize setgt X, Min --> setne X, Min
4965 if (C1 == MinVal)
4966 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4967
4968 // If we have setugt X, Max-1, turn it into seteq X, Max
4969 if (C1 == MaxVal-1)
4970 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4971 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
4972 Cond: ISD::SETEQ);
4973 }
4974 }
4975
4976 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4977 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4978 if (C1.isZero())
4979 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4980 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
4981 return CC;
4982
4983 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4984 // For example, when high 32-bits of i64 X are known clear:
4985 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4986 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4987 bool CmpZero = N1C->isZero();
4988 bool CmpNegOne = N1C->isAllOnes();
4989 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4990 // Match or(lo,shl(hi,bw/2)) pattern.
4991 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4992 unsigned EltBits = V.getScalarValueSizeInBits();
4993 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4994 return false;
4995 SDValue LHS = V.getOperand(i: 0);
4996 SDValue RHS = V.getOperand(i: 1);
4997 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
4998 // Unshifted element must have zero upperbits.
4999 if (RHS.getOpcode() == ISD::SHL &&
5000 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
5001 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5002 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5003 Lo = LHS;
5004 Hi = RHS.getOperand(i: 0);
5005 return true;
5006 }
5007 if (LHS.getOpcode() == ISD::SHL &&
5008 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
5009 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5010 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5011 Lo = RHS;
5012 Hi = LHS.getOperand(i: 0);
5013 return true;
5014 }
5015 return false;
5016 };
5017
5018 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5019 unsigned EltBits = N0.getScalarValueSizeInBits();
5020 unsigned HalfBits = EltBits / 2;
5021 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5022 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5023 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5024 SDValue NewN0 =
5025 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5026 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
5027 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5028 };
5029
5030 SDValue Lo, Hi;
5031 if (IsConcat(N0, Lo, Hi))
5032 return MergeConcat(Lo, Hi);
5033
5034 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5035 SDValue Lo0, Lo1, Hi0, Hi1;
5036 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
5037 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
5038 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5039 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5040 }
5041 }
5042 }
5043 }
5044
5045 // If we have "setcc X, C0", check to see if we can shrink the immediate
5046 // by changing cc.
5047 // TODO: Support this for vectors after legalize ops.
5048 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5049 // SETUGT X, SINTMAX -> SETLT X, 0
5050 // SETUGE X, SINTMIN -> SETLT X, 0
5051 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5052 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5053 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5054 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5055 Cond: ISD::SETLT);
5056
5057 // SETULT X, SINTMIN -> SETGT X, -1
5058 // SETULE X, SINTMAX -> SETGT X, -1
5059 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5060 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5061 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5062 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5063 Cond: ISD::SETGT);
5064 }
5065 }
5066
5067 // Back to non-vector simplifications.
5068 // TODO: Can we do these for vector splats?
5069 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5070 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5071 const APInt &C1 = N1C->getAPIntValue();
5072 EVT ShValTy = N0.getValueType();
5073
5074 // Fold bit comparisons when we can. This will result in an
5075 // incorrect value when boolean false is negative one, unless
5076 // the bitsize is 1 in which case the false value is the same
5077 // in practice regardless of the representation.
5078 if ((VT.getSizeInBits() == 1 ||
5079 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5080 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5081 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5082 N0.getOpcode() == ISD::AND) {
5083 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5084 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5085 // Perform the xform if the AND RHS is a single bit.
5086 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5087 if (AndRHS->getAPIntValue().isPowerOf2() &&
5088 !TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5089 return DAG.getNode(
5090 Opcode: ISD::TRUNCATE, DL: dl, VT,
5091 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5092 N2: DAG.getShiftAmountConstant(
5093 Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5094 }
5095 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5096 // (X & 8) == 8 --> (X & 8) >> 3
5097 // Perform the xform if C1 is a single bit.
5098 unsigned ShCt = C1.logBase2();
5099 if (C1.isPowerOf2() &&
5100 !TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5101 return DAG.getNode(
5102 Opcode: ISD::TRUNCATE, DL: dl, VT,
5103 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5104 N2: DAG.getShiftAmountConstant(
5105 Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5106 }
5107 }
5108 }
5109 }
5110
5111 if (C1.getSignificantBits() <= 64 &&
5112 !isLegalICmpImmediate(C1.getSExtValue())) {
5113 // (X & -256) == 256 -> (X >> 8) == 1
5114 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5115 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5116 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5117 const APInt &AndRHSC = AndRHS->getAPIntValue();
5118 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5119 unsigned ShiftBits = AndRHSC.countr_zero();
5120 if (!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5121 SDValue Shift = DAG.getNode(
5122 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5123 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5124 LegalTypes: !DCI.isBeforeLegalize()));
5125 SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5126 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5127 }
5128 }
5129 }
5130 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5131 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5132 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5133 // X < 0x100000000 -> (X >> 32) < 1
5134 // X >= 0x100000000 -> (X >> 32) >= 1
5135 // X <= 0x0ffffffff -> (X >> 32) < 1
5136 // X > 0x0ffffffff -> (X >> 32) >= 1
5137 unsigned ShiftBits;
5138 APInt NewC = C1;
5139 ISD::CondCode NewCond = Cond;
5140 if (AdjOne) {
5141 ShiftBits = C1.countr_one();
5142 NewC = NewC + 1;
5143 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5144 } else {
5145 ShiftBits = C1.countr_zero();
5146 }
5147 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5148 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5149 isLegalICmpImmediate(NewC.getSExtValue()) &&
5150 !TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5151 SDValue Shift =
5152 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5153 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5154 LegalTypes: !DCI.isBeforeLegalize()));
5155 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5156 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5157 }
5158 }
5159 }
5160 }
5161
5162 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5163 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5164 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5165
5166 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5167 // constant if knowing that the operand is non-nan is enough. We prefer to
5168 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5169 // materialize 0.0.
5170 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5171 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5172
5173 // setcc (fneg x), C -> setcc swap(pred) x, -C
5174 if (N0.getOpcode() == ISD::FNEG) {
5175 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5176 if (DCI.isBeforeLegalizeOps() ||
5177 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5178 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5179 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5180 }
5181 }
5182
5183 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5184 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5185 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5186 bool IsFabs = N0.getOpcode() == ISD::FABS;
5187 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5188 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5189 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5190 : (IsFabs ? fcInf : fcPosInf);
5191 if (Cond == ISD::SETUEQ)
5192 Flag |= fcNan;
5193 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5194 DAG.getTargetConstant(Flag, dl, MVT::i32));
5195 }
5196 }
5197
5198 // If the condition is not legal, see if we can find an equivalent one
5199 // which is legal.
5200 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5201 // If the comparison was an awkward floating-point == or != and one of
5202 // the comparison operands is infinity or negative infinity, convert the
5203 // condition to a less-awkward <= or >=.
5204 if (CFP->getValueAPF().isInfinity()) {
5205 bool IsNegInf = CFP->getValueAPF().isNegative();
5206 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5207 switch (Cond) {
5208 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5209 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5210 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5211 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5212 default: break;
5213 }
5214 if (NewCond != ISD::SETCC_INVALID &&
5215 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5216 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5217 }
5218 }
5219 }
5220
5221 if (N0 == N1) {
5222 // The sext(setcc()) => setcc() optimization relies on the appropriate
5223 // constant being emitted.
5224 assert(!N0.getValueType().isInteger() &&
5225 "Integer types should be handled by FoldSetCC");
5226
5227 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5228 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5229 if (UOF == 2) // FP operators that are undefined on NaNs.
5230 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5231 if (UOF == unsigned(EqTrue))
5232 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5233 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5234 // if it is not already.
5235 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5236 if (NewCond != Cond &&
5237 (DCI.isBeforeLegalizeOps() ||
5238 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5239 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5240 }
5241
5242 // ~X > ~Y --> Y > X
5243 // ~X < ~Y --> Y < X
5244 // ~X < C --> X > ~C
5245 // ~X > C --> X < ~C
5246 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5247 N0.getValueType().isInteger()) {
5248 if (isBitwiseNot(V: N0)) {
5249 if (isBitwiseNot(V: N1))
5250 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5251
5252 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5253 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5254 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5255 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5256 }
5257 }
5258 }
5259
5260 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5261 N0.getValueType().isInteger()) {
5262 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5263 N0.getOpcode() == ISD::XOR) {
5264 // Simplify (X+Y) == (X+Z) --> Y == Z
5265 if (N0.getOpcode() == N1.getOpcode()) {
5266 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5267 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5268 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5269 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5270 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5271 // If X op Y == Y op X, try other combinations.
5272 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5273 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5274 Cond);
5275 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5276 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5277 Cond);
5278 }
5279 }
5280
5281 // If RHS is a legal immediate value for a compare instruction, we need
5282 // to be careful about increasing register pressure needlessly.
5283 bool LegalRHSImm = false;
5284
5285 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5286 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5287 // Turn (X+C1) == C2 --> X == C2-C1
5288 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5289 return DAG.getSetCC(
5290 DL: dl, VT, LHS: N0.getOperand(i: 0),
5291 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5292 DL: dl, VT: N0.getValueType()),
5293 Cond);
5294
5295 // Turn (X^C1) == C2 --> X == C1^C2
5296 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5297 return DAG.getSetCC(
5298 DL: dl, VT, LHS: N0.getOperand(i: 0),
5299 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5300 DL: dl, VT: N0.getValueType()),
5301 Cond);
5302 }
5303
5304 // Turn (C1-X) == C2 --> X == C1-C2
5305 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5306 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5307 return DAG.getSetCC(
5308 DL: dl, VT, LHS: N0.getOperand(i: 1),
5309 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5310 DL: dl, VT: N0.getValueType()),
5311 Cond);
5312
5313 // Could RHSC fold directly into a compare?
5314 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5315 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5316 }
5317
5318 // (X+Y) == X --> Y == 0 and similar folds.
5319 // Don't do this if X is an immediate that can fold into a cmp
5320 // instruction and X+Y has other uses. It could be an induction variable
5321 // chain, and the transform would increase register pressure.
5322 if (!LegalRHSImm || N0.hasOneUse())
5323 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5324 return V;
5325 }
5326
5327 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5328 N1.getOpcode() == ISD::XOR)
5329 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5330 return V;
5331
5332 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5333 return V;
5334 }
5335
5336 // Fold remainder of division by a constant.
5337 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5338 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5339 // When division is cheap or optimizing for minimum size,
5340 // fall through to DIVREM creation by skipping this fold.
5341 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5342 if (N0.getOpcode() == ISD::UREM) {
5343 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5344 return Folded;
5345 } else if (N0.getOpcode() == ISD::SREM) {
5346 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5347 return Folded;
5348 }
5349 }
5350 }
5351
5352 // Fold away ALL boolean setcc's.
5353 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5354 SDValue Temp;
5355 switch (Cond) {
5356 default: llvm_unreachable("Unknown integer setcc!");
5357 case ISD::SETEQ: // X == Y -> ~(X^Y)
5358 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5359 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5360 if (!DCI.isCalledByLegalizer())
5361 DCI.AddToWorklist(N: Temp.getNode());
5362 break;
5363 case ISD::SETNE: // X != Y --> (X^Y)
5364 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5365 break;
5366 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5367 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5368 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5369 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5370 if (!DCI.isCalledByLegalizer())
5371 DCI.AddToWorklist(N: Temp.getNode());
5372 break;
5373 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5374 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5375 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5376 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5377 if (!DCI.isCalledByLegalizer())
5378 DCI.AddToWorklist(N: Temp.getNode());
5379 break;
5380 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5381 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5382 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5383 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5384 if (!DCI.isCalledByLegalizer())
5385 DCI.AddToWorklist(N: Temp.getNode());
5386 break;
5387 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5388 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5389 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5390 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5391 break;
5392 }
5393 if (VT.getScalarType() != MVT::i1) {
5394 if (!DCI.isCalledByLegalizer())
5395 DCI.AddToWorklist(N: N0.getNode());
5396 // FIXME: If running after legalize, we probably can't do this.
5397 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5398 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5399 }
5400 return N0;
5401 }
5402
5403 // Could not fold it.
5404 return SDValue();
5405}
5406
5407/// Returns true (and the GlobalValue and the offset) if the node is a
5408/// GlobalAddress + offset.
5409bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5410 int64_t &Offset) const {
5411
5412 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5413
5414 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5415 GA = GASD->getGlobal();
5416 Offset += GASD->getOffset();
5417 return true;
5418 }
5419
5420 if (N->getOpcode() == ISD::ADD) {
5421 SDValue N1 = N->getOperand(Num: 0);
5422 SDValue N2 = N->getOperand(Num: 1);
5423 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5424 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5425 Offset += V->getSExtValue();
5426 return true;
5427 }
5428 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5429 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5430 Offset += V->getSExtValue();
5431 return true;
5432 }
5433 }
5434 }
5435
5436 return false;
5437}
5438
5439SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5440 DAGCombinerInfo &DCI) const {
5441 // Default implementation: no optimization.
5442 return SDValue();
5443}
5444
5445//===----------------------------------------------------------------------===//
5446// Inline Assembler Implementation Methods
5447//===----------------------------------------------------------------------===//
5448
5449TargetLowering::ConstraintType
5450TargetLowering::getConstraintType(StringRef Constraint) const {
5451 unsigned S = Constraint.size();
5452
5453 if (S == 1) {
5454 switch (Constraint[0]) {
5455 default: break;
5456 case 'r':
5457 return C_RegisterClass;
5458 case 'm': // memory
5459 case 'o': // offsetable
5460 case 'V': // not offsetable
5461 return C_Memory;
5462 case 'p': // Address.
5463 return C_Address;
5464 case 'n': // Simple Integer
5465 case 'E': // Floating Point Constant
5466 case 'F': // Floating Point Constant
5467 return C_Immediate;
5468 case 'i': // Simple Integer or Relocatable Constant
5469 case 's': // Relocatable Constant
5470 case 'X': // Allow ANY value.
5471 case 'I': // Target registers.
5472 case 'J':
5473 case 'K':
5474 case 'L':
5475 case 'M':
5476 case 'N':
5477 case 'O':
5478 case 'P':
5479 case '<':
5480 case '>':
5481 return C_Other;
5482 }
5483 }
5484
5485 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5486 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5487 return C_Memory;
5488 return C_Register;
5489 }
5490 return C_Unknown;
5491}
5492
5493/// Try to replace an X constraint, which matches anything, with another that
5494/// has more specific requirements based on the type of the corresponding
5495/// operand.
5496const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5497 if (ConstraintVT.isInteger())
5498 return "r";
5499 if (ConstraintVT.isFloatingPoint())
5500 return "f"; // works for many targets
5501 return nullptr;
5502}
5503
5504SDValue TargetLowering::LowerAsmOutputForConstraint(
5505 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5506 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5507 return SDValue();
5508}
5509
5510/// Lower the specified operand into the Ops vector.
5511/// If it is invalid, don't add anything to Ops.
5512void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5513 StringRef Constraint,
5514 std::vector<SDValue> &Ops,
5515 SelectionDAG &DAG) const {
5516
5517 if (Constraint.size() > 1)
5518 return;
5519
5520 char ConstraintLetter = Constraint[0];
5521 switch (ConstraintLetter) {
5522 default: break;
5523 case 'X': // Allows any operand
5524 case 'i': // Simple Integer or Relocatable Constant
5525 case 'n': // Simple Integer
5526 case 's': { // Relocatable Constant
5527
5528 ConstantSDNode *C;
5529 uint64_t Offset = 0;
5530
5531 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5532 // etc., since getelementpointer is variadic. We can't use
5533 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5534 // while in this case the GA may be furthest from the root node which is
5535 // likely an ISD::ADD.
5536 while (true) {
5537 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5538 // gcc prints these as sign extended. Sign extend value to 64 bits
5539 // now; without this it would get ZExt'd later in
5540 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5541 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5542 BooleanContent BCont = getBooleanContents(MVT::i64);
5543 ISD::NodeType ExtOpc =
5544 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5545 int64_t ExtVal =
5546 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5547 Ops.push_back(
5548 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5549 return;
5550 }
5551 if (ConstraintLetter != 'n') {
5552 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5553 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5554 VT: GA->getValueType(ResNo: 0),
5555 offset: Offset + GA->getOffset()));
5556 return;
5557 }
5558 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5559 Ops.push_back(x: DAG.getTargetBlockAddress(
5560 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5561 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5562 return;
5563 }
5564 if (isa<BasicBlockSDNode>(Val: Op)) {
5565 Ops.push_back(x: Op);
5566 return;
5567 }
5568 }
5569 const unsigned OpCode = Op.getOpcode();
5570 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5571 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5572 Op = Op.getOperand(i: 1);
5573 // Subtraction is not commutative.
5574 else if (OpCode == ISD::ADD &&
5575 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5576 Op = Op.getOperand(i: 0);
5577 else
5578 return;
5579 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5580 continue;
5581 }
5582 return;
5583 }
5584 break;
5585 }
5586 }
5587}
5588
5589void TargetLowering::CollectTargetIntrinsicOperands(
5590 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5591}
5592
5593std::pair<unsigned, const TargetRegisterClass *>
5594TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5595 StringRef Constraint,
5596 MVT VT) const {
5597 if (!Constraint.starts_with(Prefix: "{"))
5598 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5599 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5600
5601 // Remove the braces from around the name.
5602 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5603
5604 std::pair<unsigned, const TargetRegisterClass *> R =
5605 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5606
5607 // Figure out which register class contains this reg.
5608 for (const TargetRegisterClass *RC : RI->regclasses()) {
5609 // If none of the value types for this register class are valid, we
5610 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5611 if (!isLegalRC(TRI: *RI, RC: *RC))
5612 continue;
5613
5614 for (const MCPhysReg &PR : *RC) {
5615 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5616 std::pair<unsigned, const TargetRegisterClass *> S =
5617 std::make_pair(x: PR, y&: RC);
5618
5619 // If this register class has the requested value type, return it,
5620 // otherwise keep searching and return the first class found
5621 // if no other is found which explicitly has the requested type.
5622 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5623 return S;
5624 if (!R.second)
5625 R = S;
5626 }
5627 }
5628 }
5629
5630 return R;
5631}
5632
5633//===----------------------------------------------------------------------===//
5634// Constraint Selection.
5635
5636/// Return true of this is an input operand that is a matching constraint like
5637/// "4".
5638bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5639 assert(!ConstraintCode.empty() && "No known constraint!");
5640 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5641}
5642
5643/// If this is an input matching constraint, this method returns the output
5644/// operand it matches.
5645unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5646 assert(!ConstraintCode.empty() && "No known constraint!");
5647 return atoi(nptr: ConstraintCode.c_str());
5648}
5649
5650/// Split up the constraint string from the inline assembly value into the
5651/// specific constraints and their prefixes, and also tie in the associated
5652/// operand values.
5653/// If this returns an empty vector, and if the constraint string itself
5654/// isn't empty, there was an error parsing.
5655TargetLowering::AsmOperandInfoVector
5656TargetLowering::ParseConstraints(const DataLayout &DL,
5657 const TargetRegisterInfo *TRI,
5658 const CallBase &Call) const {
5659 /// Information about all of the constraints.
5660 AsmOperandInfoVector ConstraintOperands;
5661 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5662 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5663
5664 // Do a prepass over the constraints, canonicalizing them, and building up the
5665 // ConstraintOperands list.
5666 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5667 unsigned ResNo = 0; // ResNo - The result number of the next output.
5668 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5669
5670 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5671 ConstraintOperands.emplace_back(args: std::move(CI));
5672 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5673
5674 // Update multiple alternative constraint count.
5675 if (OpInfo.multipleAlternatives.size() > maCount)
5676 maCount = OpInfo.multipleAlternatives.size();
5677
5678 OpInfo.ConstraintVT = MVT::Other;
5679
5680 // Compute the value type for each operand.
5681 switch (OpInfo.Type) {
5682 case InlineAsm::isOutput:
5683 // Indirect outputs just consume an argument.
5684 if (OpInfo.isIndirect) {
5685 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5686 break;
5687 }
5688
5689 // The return value of the call is this value. As such, there is no
5690 // corresponding argument.
5691 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5692 if (StructType *STy = dyn_cast<StructType>(Val: Call.getType())) {
5693 OpInfo.ConstraintVT =
5694 getSimpleValueType(DL, Ty: STy->getElementType(N: ResNo));
5695 } else {
5696 assert(ResNo == 0 && "Asm only has one result!");
5697 OpInfo.ConstraintVT =
5698 getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5699 }
5700 ++ResNo;
5701 break;
5702 case InlineAsm::isInput:
5703 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5704 break;
5705 case InlineAsm::isLabel:
5706 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5707 ++LabelNo;
5708 continue;
5709 case InlineAsm::isClobber:
5710 // Nothing to do.
5711 break;
5712 }
5713
5714 if (OpInfo.CallOperandVal) {
5715 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5716 if (OpInfo.isIndirect) {
5717 OpTy = Call.getParamElementType(ArgNo);
5718 assert(OpTy && "Indirect operand must have elementtype attribute");
5719 }
5720
5721 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5722 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5723 if (STy->getNumElements() == 1)
5724 OpTy = STy->getElementType(N: 0);
5725
5726 // If OpTy is not a single value, it may be a struct/union that we
5727 // can tile with integers.
5728 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5729 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5730 switch (BitSize) {
5731 default: break;
5732 case 1:
5733 case 8:
5734 case 16:
5735 case 32:
5736 case 64:
5737 case 128:
5738 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5739 break;
5740 }
5741 }
5742
5743 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5744 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5745 ArgNo++;
5746 }
5747 }
5748
5749 // If we have multiple alternative constraints, select the best alternative.
5750 if (!ConstraintOperands.empty()) {
5751 if (maCount) {
5752 unsigned bestMAIndex = 0;
5753 int bestWeight = -1;
5754 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5755 int weight = -1;
5756 unsigned maIndex;
5757 // Compute the sums of the weights for each alternative, keeping track
5758 // of the best (highest weight) one so far.
5759 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5760 int weightSum = 0;
5761 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5762 cIndex != eIndex; ++cIndex) {
5763 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5764 if (OpInfo.Type == InlineAsm::isClobber)
5765 continue;
5766
5767 // If this is an output operand with a matching input operand,
5768 // look up the matching input. If their types mismatch, e.g. one
5769 // is an integer, the other is floating point, or their sizes are
5770 // different, flag it as an maCantMatch.
5771 if (OpInfo.hasMatchingInput()) {
5772 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5773 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5774 if ((OpInfo.ConstraintVT.isInteger() !=
5775 Input.ConstraintVT.isInteger()) ||
5776 (OpInfo.ConstraintVT.getSizeInBits() !=
5777 Input.ConstraintVT.getSizeInBits())) {
5778 weightSum = -1; // Can't match.
5779 break;
5780 }
5781 }
5782 }
5783 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
5784 if (weight == -1) {
5785 weightSum = -1;
5786 break;
5787 }
5788 weightSum += weight;
5789 }
5790 // Update best.
5791 if (weightSum > bestWeight) {
5792 bestWeight = weightSum;
5793 bestMAIndex = maIndex;
5794 }
5795 }
5796
5797 // Now select chosen alternative in each constraint.
5798 for (AsmOperandInfo &cInfo : ConstraintOperands)
5799 if (cInfo.Type != InlineAsm::isClobber)
5800 cInfo.selectAlternative(index: bestMAIndex);
5801 }
5802 }
5803
5804 // Check and hook up tied operands, choose constraint code to use.
5805 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5806 cIndex != eIndex; ++cIndex) {
5807 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5808
5809 // If this is an output operand with a matching input operand, look up the
5810 // matching input. If their types mismatch, e.g. one is an integer, the
5811 // other is floating point, or their sizes are different, flag it as an
5812 // error.
5813 if (OpInfo.hasMatchingInput()) {
5814 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5815
5816 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5817 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5818 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
5819 VT: OpInfo.ConstraintVT);
5820 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5821 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
5822 VT: Input.ConstraintVT);
5823 if ((OpInfo.ConstraintVT.isInteger() !=
5824 Input.ConstraintVT.isInteger()) ||
5825 (MatchRC.second != InputRC.second)) {
5826 report_fatal_error(reason: "Unsupported asm: input constraint"
5827 " with a matching output constraint of"
5828 " incompatible type!");
5829 }
5830 }
5831 }
5832 }
5833
5834 return ConstraintOperands;
5835}
5836
5837/// Return a number indicating our preference for chosing a type of constraint
5838/// over another, for the purpose of sorting them. Immediates are almost always
5839/// preferrable (when they can be emitted). A higher return value means a
5840/// stronger preference for one constraint type relative to another.
5841/// FIXME: We should prefer registers over memory but doing so may lead to
5842/// unrecoverable register exhaustion later.
5843/// https://github.com/llvm/llvm-project/issues/20571
5844static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5845 switch (CT) {
5846 case TargetLowering::C_Immediate:
5847 case TargetLowering::C_Other:
5848 return 4;
5849 case TargetLowering::C_Memory:
5850 case TargetLowering::C_Address:
5851 return 3;
5852 case TargetLowering::C_RegisterClass:
5853 return 2;
5854 case TargetLowering::C_Register:
5855 return 1;
5856 case TargetLowering::C_Unknown:
5857 return 0;
5858 }
5859 llvm_unreachable("Invalid constraint type");
5860}
5861
5862/// Examine constraint type and operand type and determine a weight value.
5863/// This object must already have been set up with the operand type
5864/// and the current alternative constraint selected.
5865TargetLowering::ConstraintWeight
5866 TargetLowering::getMultipleConstraintMatchWeight(
5867 AsmOperandInfo &info, int maIndex) const {
5868 InlineAsm::ConstraintCodeVector *rCodes;
5869 if (maIndex >= (int)info.multipleAlternatives.size())
5870 rCodes = &info.Codes;
5871 else
5872 rCodes = &info.multipleAlternatives[maIndex].Codes;
5873 ConstraintWeight BestWeight = CW_Invalid;
5874
5875 // Loop over the options, keeping track of the most general one.
5876 for (const std::string &rCode : *rCodes) {
5877 ConstraintWeight weight =
5878 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
5879 if (weight > BestWeight)
5880 BestWeight = weight;
5881 }
5882
5883 return BestWeight;
5884}
5885
5886/// Examine constraint type and operand type and determine a weight value.
5887/// This object must already have been set up with the operand type
5888/// and the current alternative constraint selected.
5889TargetLowering::ConstraintWeight
5890 TargetLowering::getSingleConstraintMatchWeight(
5891 AsmOperandInfo &info, const char *constraint) const {
5892 ConstraintWeight weight = CW_Invalid;
5893 Value *CallOperandVal = info.CallOperandVal;
5894 // If we don't have a value, we can't do a match,
5895 // but allow it at the lowest weight.
5896 if (!CallOperandVal)
5897 return CW_Default;
5898 // Look at the constraint type.
5899 switch (*constraint) {
5900 case 'i': // immediate integer.
5901 case 'n': // immediate integer with a known value.
5902 if (isa<ConstantInt>(Val: CallOperandVal))
5903 weight = CW_Constant;
5904 break;
5905 case 's': // non-explicit intregal immediate.
5906 if (isa<GlobalValue>(Val: CallOperandVal))
5907 weight = CW_Constant;
5908 break;
5909 case 'E': // immediate float if host format.
5910 case 'F': // immediate float.
5911 if (isa<ConstantFP>(Val: CallOperandVal))
5912 weight = CW_Constant;
5913 break;
5914 case '<': // memory operand with autodecrement.
5915 case '>': // memory operand with autoincrement.
5916 case 'm': // memory operand.
5917 case 'o': // offsettable memory operand
5918 case 'V': // non-offsettable memory operand
5919 weight = CW_Memory;
5920 break;
5921 case 'r': // general register.
5922 case 'g': // general register, memory operand or immediate integer.
5923 // note: Clang converts "g" to "imr".
5924 if (CallOperandVal->getType()->isIntegerTy())
5925 weight = CW_Register;
5926 break;
5927 case 'X': // any operand.
5928 default:
5929 weight = CW_Default;
5930 break;
5931 }
5932 return weight;
5933}
5934
5935/// If there are multiple different constraints that we could pick for this
5936/// operand (e.g. "imr") try to pick the 'best' one.
5937/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5938/// into seven classes:
5939/// Register -> one specific register
5940/// RegisterClass -> a group of regs
5941/// Memory -> memory
5942/// Address -> a symbolic memory reference
5943/// Immediate -> immediate values
5944/// Other -> magic values (such as "Flag Output Operands")
5945/// Unknown -> something we don't recognize yet and can't handle
5946/// Ideally, we would pick the most specific constraint possible: if we have
5947/// something that fits into a register, we would pick it. The problem here
5948/// is that if we have something that could either be in a register or in
5949/// memory that use of the register could cause selection of *other*
5950/// operands to fail: they might only succeed if we pick memory. Because of
5951/// this the heuristic we use is:
5952///
5953/// 1) If there is an 'other' constraint, and if the operand is valid for
5954/// that constraint, use it. This makes us take advantage of 'i'
5955/// constraints when available.
5956/// 2) Otherwise, pick the most general constraint present. This prefers
5957/// 'm' over 'r', for example.
5958///
5959TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5960 TargetLowering::AsmOperandInfo &OpInfo) const {
5961 ConstraintGroup Ret;
5962
5963 Ret.reserve(N: OpInfo.Codes.size());
5964 for (StringRef Code : OpInfo.Codes) {
5965 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
5966
5967 // Indirect 'other' or 'immediate' constraints are not allowed.
5968 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5969 CType == TargetLowering::C_Register ||
5970 CType == TargetLowering::C_RegisterClass))
5971 continue;
5972
5973 // Things with matching constraints can only be registers, per gcc
5974 // documentation. This mainly affects "g" constraints.
5975 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5976 continue;
5977
5978 Ret.emplace_back(Args&: Code, Args&: CType);
5979 }
5980
5981 std::stable_sort(
5982 first: Ret.begin(), last: Ret.end(), comp: [](ConstraintPair a, ConstraintPair b) {
5983 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
5984 });
5985
5986 return Ret;
5987}
5988
5989/// If we have an immediate, see if we can lower it. Return true if we can,
5990/// false otherwise.
5991static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
5992 SDValue Op, SelectionDAG *DAG,
5993 const TargetLowering &TLI) {
5994
5995 assert((P.second == TargetLowering::C_Other ||
5996 P.second == TargetLowering::C_Immediate) &&
5997 "need immediate or other");
5998
5999 if (!Op.getNode())
6000 return false;
6001
6002 std::vector<SDValue> ResultOps;
6003 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6004 return !ResultOps.empty();
6005}
6006
6007/// Determines the constraint code and constraint type to use for the specific
6008/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6009void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6010 SDValue Op,
6011 SelectionDAG *DAG) const {
6012 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6013
6014 // Single-letter constraints ('r') are very common.
6015 if (OpInfo.Codes.size() == 1) {
6016 OpInfo.ConstraintCode = OpInfo.Codes[0];
6017 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6018 } else {
6019 ConstraintGroup G = getConstraintPreferences(OpInfo);
6020 if (G.empty())
6021 return;
6022
6023 unsigned BestIdx = 0;
6024 for (const unsigned E = G.size();
6025 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6026 G[BestIdx].second == TargetLowering::C_Immediate);
6027 ++BestIdx) {
6028 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
6029 break;
6030 // If we're out of constraints, just pick the first one.
6031 if (BestIdx + 1 == E) {
6032 BestIdx = 0;
6033 break;
6034 }
6035 }
6036
6037 OpInfo.ConstraintCode = G[BestIdx].first;
6038 OpInfo.ConstraintType = G[BestIdx].second;
6039 }
6040
6041 // 'X' matches anything.
6042 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6043 // Constants are handled elsewhere. For Functions, the type here is the
6044 // type of the result, which is not what we want to look at; leave them
6045 // alone.
6046 Value *v = OpInfo.CallOperandVal;
6047 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6048 return;
6049 }
6050
6051 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6052 OpInfo.ConstraintCode = "i";
6053 return;
6054 }
6055
6056 // Otherwise, try to resolve it to something we know about by looking at
6057 // the actual operand type.
6058 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6059 OpInfo.ConstraintCode = Repl;
6060 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6061 }
6062 }
6063}
6064
6065/// Given an exact SDIV by a constant, create a multiplication
6066/// with the multiplicative inverse of the constant.
6067static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6068 const SDLoc &dl, SelectionDAG &DAG,
6069 SmallVectorImpl<SDNode *> &Created) {
6070 SDValue Op0 = N->getOperand(Num: 0);
6071 SDValue Op1 = N->getOperand(Num: 1);
6072 EVT VT = N->getValueType(ResNo: 0);
6073 EVT SVT = VT.getScalarType();
6074 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6075 EVT ShSVT = ShVT.getScalarType();
6076
6077 bool UseSRA = false;
6078 SmallVector<SDValue, 16> Shifts, Factors;
6079
6080 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6081 if (C->isZero())
6082 return false;
6083 APInt Divisor = C->getAPIntValue();
6084 unsigned Shift = Divisor.countr_zero();
6085 if (Shift) {
6086 Divisor.ashrInPlace(ShiftAmt: Shift);
6087 UseSRA = true;
6088 }
6089 APInt Factor = Divisor.multiplicativeInverse();
6090 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6091 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6092 return true;
6093 };
6094
6095 // Collect all magic values from the build vector.
6096 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6097 return SDValue();
6098
6099 SDValue Shift, Factor;
6100 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6101 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6102 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6103 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6104 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6105 "Expected matchUnaryPredicate to return one element for scalable "
6106 "vectors");
6107 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6108 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6109 } else {
6110 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6111 Shift = Shifts[0];
6112 Factor = Factors[0];
6113 }
6114
6115 SDValue Res = Op0;
6116
6117 // Shift the value upfront if it is even, so the LSB is one.
6118 if (UseSRA) {
6119 // TODO: For UDIV use SRL instead of SRA.
6120 SDNodeFlags Flags;
6121 Flags.setExact(true);
6122 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags);
6123 Created.push_back(Elt: Res.getNode());
6124 }
6125
6126 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6127}
6128
6129SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6130 SelectionDAG &DAG,
6131 SmallVectorImpl<SDNode *> &Created) const {
6132 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6133 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6134 if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6135 return SDValue(N, 0); // Lower SDIV as SDIV
6136 return SDValue();
6137}
6138
6139SDValue
6140TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6141 SelectionDAG &DAG,
6142 SmallVectorImpl<SDNode *> &Created) const {
6143 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6144 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6145 if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6146 return SDValue(N, 0); // Lower SREM as SREM
6147 return SDValue();
6148}
6149
6150/// Build sdiv by power-of-2 with conditional move instructions
6151/// Ref: "Hacker's Delight" by Henry Warren 10-1
6152/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6153/// bgez x, label
6154/// add x, x, 2**k-1
6155/// label:
6156/// sra res, x, k
6157/// neg res, res (when the divisor is negative)
6158SDValue TargetLowering::buildSDIVPow2WithCMov(
6159 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6160 SmallVectorImpl<SDNode *> &Created) const {
6161 unsigned Lg2 = Divisor.countr_zero();
6162 EVT VT = N->getValueType(ResNo: 0);
6163
6164 SDLoc DL(N);
6165 SDValue N0 = N->getOperand(Num: 0);
6166 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6167 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6168 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6169
6170 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6171 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6172 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6173 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6174 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6175
6176 Created.push_back(Elt: Cmp.getNode());
6177 Created.push_back(Elt: Add.getNode());
6178 Created.push_back(Elt: CMov.getNode());
6179
6180 // Divide by pow2.
6181 SDValue SRA =
6182 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6183
6184 // If we're dividing by a positive value, we're done. Otherwise, we must
6185 // negate the result.
6186 if (Divisor.isNonNegative())
6187 return SRA;
6188
6189 Created.push_back(Elt: SRA.getNode());
6190 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6191}
6192
6193/// Given an ISD::SDIV node expressing a divide by constant,
6194/// return a DAG expression to select that will generate the same value by
6195/// multiplying by a magic number.
6196/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6197SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6198 bool IsAfterLegalization,
6199 SmallVectorImpl<SDNode *> &Created) const {
6200 SDLoc dl(N);
6201 EVT VT = N->getValueType(ResNo: 0);
6202 EVT SVT = VT.getScalarType();
6203 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6204 EVT ShSVT = ShVT.getScalarType();
6205 unsigned EltBits = VT.getScalarSizeInBits();
6206 EVT MulVT;
6207
6208 // Check to see if we can do this.
6209 // FIXME: We should be more aggressive here.
6210 if (!isTypeLegal(VT)) {
6211 // Limit this to simple scalars for now.
6212 if (VT.isVector() || !VT.isSimple())
6213 return SDValue();
6214
6215 // If this type will be promoted to a large enough type with a legal
6216 // multiply operation, we can go ahead and do this transform.
6217 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6218 return SDValue();
6219
6220 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6221 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6222 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6223 return SDValue();
6224 }
6225
6226 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6227 if (N->getFlags().hasExact())
6228 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6229
6230 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6231
6232 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6233 if (C->isZero())
6234 return false;
6235
6236 const APInt &Divisor = C->getAPIntValue();
6237 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6238 int NumeratorFactor = 0;
6239 int ShiftMask = -1;
6240
6241 if (Divisor.isOne() || Divisor.isAllOnes()) {
6242 // If d is +1/-1, we just multiply the numerator by +1/-1.
6243 NumeratorFactor = Divisor.getSExtValue();
6244 magics.Magic = 0;
6245 magics.ShiftAmount = 0;
6246 ShiftMask = 0;
6247 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6248 // If d > 0 and m < 0, add the numerator.
6249 NumeratorFactor = 1;
6250 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6251 // If d < 0 and m > 0, subtract the numerator.
6252 NumeratorFactor = -1;
6253 }
6254
6255 MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6256 Factors.push_back(Elt: DAG.getConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6257 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6258 ShiftMasks.push_back(Elt: DAG.getConstant(Val: ShiftMask, DL: dl, VT: SVT));
6259 return true;
6260 };
6261
6262 SDValue N0 = N->getOperand(Num: 0);
6263 SDValue N1 = N->getOperand(Num: 1);
6264
6265 // Collect the shifts / magic values from each element.
6266 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6267 return SDValue();
6268
6269 SDValue MagicFactor, Factor, Shift, ShiftMask;
6270 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6271 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6272 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6273 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6274 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6275 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6276 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6277 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6278 "Expected matchUnaryPredicate to return one element for scalable "
6279 "vectors");
6280 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6281 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6282 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6283 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6284 } else {
6285 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6286 MagicFactor = MagicFactors[0];
6287 Factor = Factors[0];
6288 Shift = Shifts[0];
6289 ShiftMask = ShiftMasks[0];
6290 }
6291
6292 // Multiply the numerator (operand 0) by the magic value.
6293 // FIXME: We should support doing a MUL in a wider type.
6294 auto GetMULHS = [&](SDValue X, SDValue Y) {
6295 // If the type isn't legal, use a wider mul of the type calculated
6296 // earlier.
6297 if (!isTypeLegal(VT)) {
6298 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6299 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6300 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6301 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6302 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6303 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6304 }
6305
6306 if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6307 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6308 if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6309 SDValue LoHi =
6310 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6311 return SDValue(LoHi.getNode(), 1);
6312 }
6313 // If type twice as wide legal, widen and use a mul plus a shift.
6314 unsigned Size = VT.getScalarSizeInBits();
6315 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2);
6316 if (VT.isVector())
6317 WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6318 EC: VT.getVectorElementCount());
6319 if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6320 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6321 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6322 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6323 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6324 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6325 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6326 }
6327 return SDValue();
6328 };
6329
6330 SDValue Q = GetMULHS(N0, MagicFactor);
6331 if (!Q)
6332 return SDValue();
6333
6334 Created.push_back(Elt: Q.getNode());
6335
6336 // (Optionally) Add/subtract the numerator using Factor.
6337 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6338 Created.push_back(Elt: Factor.getNode());
6339 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6340 Created.push_back(Elt: Q.getNode());
6341
6342 // Shift right algebraic by shift value.
6343 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6344 Created.push_back(Elt: Q.getNode());
6345
6346 // Extract the sign bit, mask it and add it to the quotient.
6347 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6348 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6349 Created.push_back(Elt: T.getNode());
6350 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6351 Created.push_back(Elt: T.getNode());
6352 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6353}
6354
6355/// Given an ISD::UDIV node expressing a divide by constant,
6356/// return a DAG expression to select that will generate the same value by
6357/// multiplying by a magic number.
6358/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6359SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6360 bool IsAfterLegalization,
6361 SmallVectorImpl<SDNode *> &Created) const {
6362 SDLoc dl(N);
6363 EVT VT = N->getValueType(ResNo: 0);
6364 EVT SVT = VT.getScalarType();
6365 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6366 EVT ShSVT = ShVT.getScalarType();
6367 unsigned EltBits = VT.getScalarSizeInBits();
6368 EVT MulVT;
6369
6370 // Check to see if we can do this.
6371 // FIXME: We should be more aggressive here.
6372 if (!isTypeLegal(VT)) {
6373 // Limit this to simple scalars for now.
6374 if (VT.isVector() || !VT.isSimple())
6375 return SDValue();
6376
6377 // If this type will be promoted to a large enough type with a legal
6378 // multiply operation, we can go ahead and do this transform.
6379 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6380 return SDValue();
6381
6382 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6383 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6384 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6385 return SDValue();
6386 }
6387
6388 SDValue N0 = N->getOperand(Num: 0);
6389 SDValue N1 = N->getOperand(Num: 1);
6390
6391 // Try to use leading zeros of the dividend to reduce the multiplier and
6392 // avoid expensive fixups.
6393 // TODO: Support vectors.
6394 unsigned LeadingZeros = 0;
6395 if (!VT.isVector() && isa<ConstantSDNode>(Val: N1)) {
6396 assert(!isOneConstant(N1) && "Unexpected divisor");
6397 LeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6398 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6399 // the dividend exceeds the leading zeros for the divisor.
6400 LeadingZeros = std::min(a: LeadingZeros, b: N1->getAsAPIntVal().countl_zero());
6401 }
6402
6403 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6404 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6405
6406 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6407 if (C->isZero())
6408 return false;
6409 const APInt& Divisor = C->getAPIntValue();
6410
6411 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6412
6413 // Magic algorithm doesn't work for division by 1. We need to emit a select
6414 // at the end.
6415 if (Divisor.isOne()) {
6416 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6417 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6418 } else {
6419 UnsignedDivisionByConstantInfo magics =
6420 UnsignedDivisionByConstantInfo::get(D: Divisor, LeadingZeros);
6421
6422 MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6423
6424 assert(magics.PreShift < Divisor.getBitWidth() &&
6425 "We shouldn't generate an undefined shift!");
6426 assert(magics.PostShift < Divisor.getBitWidth() &&
6427 "We shouldn't generate an undefined shift!");
6428 assert((!magics.IsAdd || magics.PreShift == 0) &&
6429 "Unexpected pre-shift");
6430 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6431 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6432 NPQFactor = DAG.getConstant(
6433 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - 1)
6434 : APInt::getZero(numBits: EltBits),
6435 DL: dl, VT: SVT);
6436 UseNPQ |= magics.IsAdd;
6437 UsePreShift |= magics.PreShift != 0;
6438 UsePostShift |= magics.PostShift != 0;
6439 }
6440
6441 PreShifts.push_back(Elt: PreShift);
6442 MagicFactors.push_back(Elt: MagicFactor);
6443 NPQFactors.push_back(Elt: NPQFactor);
6444 PostShifts.push_back(Elt: PostShift);
6445 return true;
6446 };
6447
6448 // Collect the shifts/magic values from each element.
6449 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6450 return SDValue();
6451
6452 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6453 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6454 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6455 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6456 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6457 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6458 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6459 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6460 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6461 "Expected matchUnaryPredicate to return one for scalable vectors");
6462 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6463 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6464 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6465 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6466 } else {
6467 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6468 PreShift = PreShifts[0];
6469 MagicFactor = MagicFactors[0];
6470 PostShift = PostShifts[0];
6471 }
6472
6473 SDValue Q = N0;
6474 if (UsePreShift) {
6475 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6476 Created.push_back(Elt: Q.getNode());
6477 }
6478
6479 // FIXME: We should support doing a MUL in a wider type.
6480 auto GetMULHU = [&](SDValue X, SDValue Y) {
6481 // If the type isn't legal, use a wider mul of the type calculated
6482 // earlier.
6483 if (!isTypeLegal(VT)) {
6484 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6485 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6486 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6487 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6488 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6489 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6490 }
6491
6492 if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6493 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6494 if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6495 SDValue LoHi =
6496 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6497 return SDValue(LoHi.getNode(), 1);
6498 }
6499 // If type twice as wide legal, widen and use a mul plus a shift.
6500 unsigned Size = VT.getScalarSizeInBits();
6501 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2);
6502 if (VT.isVector())
6503 WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6504 EC: VT.getVectorElementCount());
6505 if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6506 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6507 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6508 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6509 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6510 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6511 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6512 }
6513 return SDValue(); // No mulhu or equivalent
6514 };
6515
6516 // Multiply the numerator (operand 0) by the magic value.
6517 Q = GetMULHU(Q, MagicFactor);
6518 if (!Q)
6519 return SDValue();
6520
6521 Created.push_back(Elt: Q.getNode());
6522
6523 if (UseNPQ) {
6524 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6525 Created.push_back(Elt: NPQ.getNode());
6526
6527 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6528 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6529 if (VT.isVector())
6530 NPQ = GetMULHU(NPQ, NPQFactor);
6531 else
6532 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
6533
6534 Created.push_back(Elt: NPQ.getNode());
6535
6536 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6537 Created.push_back(Elt: Q.getNode());
6538 }
6539
6540 if (UsePostShift) {
6541 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6542 Created.push_back(Elt: Q.getNode());
6543 }
6544
6545 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6546
6547 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
6548 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6549 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6550}
6551
6552/// If all values in Values that *don't* match the predicate are same 'splat'
6553/// value, then replace all values with that splat value.
6554/// Else, if AlternativeReplacement was provided, then replace all values that
6555/// do match predicate with AlternativeReplacement value.
6556static void
6557turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6558 std::function<bool(SDValue)> Predicate,
6559 SDValue AlternativeReplacement = SDValue()) {
6560 SDValue Replacement;
6561 // Is there a value for which the Predicate does *NOT* match? What is it?
6562 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6563 if (SplatValue != Values.end()) {
6564 // Does Values consist only of SplatValue's and values matching Predicate?
6565 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6566 return Value == *SplatValue || Predicate(Value);
6567 })) // Then we shall replace values matching predicate with SplatValue.
6568 Replacement = *SplatValue;
6569 }
6570 if (!Replacement) {
6571 // Oops, we did not find the "baseline" splat value.
6572 if (!AlternativeReplacement)
6573 return; // Nothing to do.
6574 // Let's replace with provided value then.
6575 Replacement = AlternativeReplacement;
6576 }
6577 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6578}
6579
6580/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6581/// where the divisor is constant and the comparison target is zero,
6582/// return a DAG expression that will generate the same comparison result
6583/// using only multiplications, additions and shifts/rotations.
6584/// Ref: "Hacker's Delight" 10-17.
6585SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6586 SDValue CompTargetNode,
6587 ISD::CondCode Cond,
6588 DAGCombinerInfo &DCI,
6589 const SDLoc &DL) const {
6590 SmallVector<SDNode *, 5> Built;
6591 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6592 DCI, DL, Created&: Built)) {
6593 for (SDNode *N : Built)
6594 DCI.AddToWorklist(N);
6595 return Folded;
6596 }
6597
6598 return SDValue();
6599}
6600
6601SDValue
6602TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6603 SDValue CompTargetNode, ISD::CondCode Cond,
6604 DAGCombinerInfo &DCI, const SDLoc &DL,
6605 SmallVectorImpl<SDNode *> &Created) const {
6606 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6607 // - D must be constant, with D = D0 * 2^K where D0 is odd
6608 // - P is the multiplicative inverse of D0 modulo 2^W
6609 // - Q = floor(((2^W) - 1) / D)
6610 // where W is the width of the common type of N and D.
6611 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6612 "Only applicable for (in)equality comparisons.");
6613
6614 SelectionDAG &DAG = DCI.DAG;
6615
6616 EVT VT = REMNode.getValueType();
6617 EVT SVT = VT.getScalarType();
6618 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6619 EVT ShSVT = ShVT.getScalarType();
6620
6621 // If MUL is unavailable, we cannot proceed in any case.
6622 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6623 return SDValue();
6624
6625 bool ComparingWithAllZeros = true;
6626 bool AllComparisonsWithNonZerosAreTautological = true;
6627 bool HadTautologicalLanes = false;
6628 bool AllLanesAreTautological = true;
6629 bool HadEvenDivisor = false;
6630 bool AllDivisorsArePowerOfTwo = true;
6631 bool HadTautologicalInvertedLanes = false;
6632 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6633
6634 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6635 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6636 if (CDiv->isZero())
6637 return false;
6638
6639 const APInt &D = CDiv->getAPIntValue();
6640 const APInt &Cmp = CCmp->getAPIntValue();
6641
6642 ComparingWithAllZeros &= Cmp.isZero();
6643
6644 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6645 // if C2 is not less than C1, the comparison is always false.
6646 // But we will only be able to produce the comparison that will give the
6647 // opposive tautological answer. So this lane would need to be fixed up.
6648 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6649 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6650
6651 // If all lanes are tautological (either all divisors are ones, or divisor
6652 // is not greater than the constant we are comparing with),
6653 // we will prefer to avoid the fold.
6654 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6655 HadTautologicalLanes |= TautologicalLane;
6656 AllLanesAreTautological &= TautologicalLane;
6657
6658 // If we are comparing with non-zero, we need'll need to subtract said
6659 // comparison value from the LHS. But there is no point in doing that if
6660 // every lane where we are comparing with non-zero is tautological..
6661 if (!Cmp.isZero())
6662 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6663
6664 // Decompose D into D0 * 2^K
6665 unsigned K = D.countr_zero();
6666 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6667 APInt D0 = D.lshr(shiftAmt: K);
6668
6669 // D is even if it has trailing zeros.
6670 HadEvenDivisor |= (K != 0);
6671 // D is a power-of-two if D0 is one.
6672 // If all divisors are power-of-two, we will prefer to avoid the fold.
6673 AllDivisorsArePowerOfTwo &= D0.isOne();
6674
6675 // P = inv(D0, 2^W)
6676 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6677 unsigned W = D.getBitWidth();
6678 APInt P = D0.multiplicativeInverse();
6679 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6680
6681 // Q = floor((2^W - 1) u/ D)
6682 // R = ((2^W - 1) u% D)
6683 APInt Q, R;
6684 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6685
6686 // If we are comparing with zero, then that comparison constant is okay,
6687 // else it may need to be one less than that.
6688 if (Cmp.ugt(RHS: R))
6689 Q -= 1;
6690
6691 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6692 "We are expecting that K is always less than all-ones for ShSVT");
6693
6694 // If the lane is tautological the result can be constant-folded.
6695 if (TautologicalLane) {
6696 // Set P and K amount to a bogus values so we can try to splat them.
6697 P = 0;
6698 K = -1;
6699 // And ensure that comparison constant is tautological,
6700 // it will always compare true/false.
6701 Q = -1;
6702 }
6703
6704 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6705 KAmts.push_back(
6706 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6707 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6708 return true;
6709 };
6710
6711 SDValue N = REMNode.getOperand(i: 0);
6712 SDValue D = REMNode.getOperand(i: 1);
6713
6714 // Collect the values from each element.
6715 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
6716 return SDValue();
6717
6718 // If all lanes are tautological, the result can be constant-folded.
6719 if (AllLanesAreTautological)
6720 return SDValue();
6721
6722 // If this is a urem by a powers-of-two, avoid the fold since it can be
6723 // best implemented as a bit test.
6724 if (AllDivisorsArePowerOfTwo)
6725 return SDValue();
6726
6727 SDValue PVal, KVal, QVal;
6728 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6729 if (HadTautologicalLanes) {
6730 // Try to turn PAmts into a splat, since we don't care about the values
6731 // that are currently '0'. If we can't, just keep '0'`s.
6732 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6733 // Try to turn KAmts into a splat, since we don't care about the values
6734 // that are currently '-1'. If we can't, change them to '0'`s.
6735 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6736 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
6737 }
6738
6739 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6740 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6741 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6742 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6743 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6744 "Expected matchBinaryPredicate to return one element for "
6745 "SPLAT_VECTORs");
6746 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
6747 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
6748 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
6749 } else {
6750 PVal = PAmts[0];
6751 KVal = KAmts[0];
6752 QVal = QAmts[0];
6753 }
6754
6755 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6756 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
6757 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6758 assert(CompTargetNode.getValueType() == N.getValueType() &&
6759 "Expecting that the types on LHS and RHS of comparisons match.");
6760 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
6761 }
6762
6763 // (mul N, P)
6764 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6765 Created.push_back(Elt: Op0.getNode());
6766
6767 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6768 // divisors as a performance improvement, since rotating by 0 is a no-op.
6769 if (HadEvenDivisor) {
6770 // We need ROTR to do this.
6771 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
6772 return SDValue();
6773 // UREM: (rotr (mul N, P), K)
6774 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
6775 Created.push_back(Elt: Op0.getNode());
6776 }
6777
6778 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6779 SDValue NewCC =
6780 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
6781 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6782 if (!HadTautologicalInvertedLanes)
6783 return NewCC;
6784
6785 // If any lanes previously compared always-false, the NewCC will give
6786 // always-true result for them, so we need to fixup those lanes.
6787 // Or the other way around for inequality predicate.
6788 assert(VT.isVector() && "Can/should only get here for vectors.");
6789 Created.push_back(Elt: NewCC.getNode());
6790
6791 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6792 // if C2 is not less than C1, the comparison is always false.
6793 // But we have produced the comparison that will give the
6794 // opposive tautological answer. So these lanes would need to be fixed up.
6795 SDValue TautologicalInvertedChannels =
6796 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
6797 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
6798
6799 // NOTE: we avoid letting illegal types through even if we're before legalize
6800 // ops – legalization has a hard time producing good code for this.
6801 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
6802 // If we have a vector select, let's replace the comparison results in the
6803 // affected lanes with the correct tautological result.
6804 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
6805 DL, VT: SETCCVT, OpVT: SETCCVT);
6806 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
6807 N2: Replacement, N3: NewCC);
6808 }
6809
6810 // Else, we can just invert the comparison result in the appropriate lanes.
6811 //
6812 // NOTE: see the note above VSELECT above.
6813 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
6814 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
6815 N2: TautologicalInvertedChannels);
6816
6817 return SDValue(); // Don't know how to lower.
6818}
6819
6820/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6821/// where the divisor is constant and the comparison target is zero,
6822/// return a DAG expression that will generate the same comparison result
6823/// using only multiplications, additions and shifts/rotations.
6824/// Ref: "Hacker's Delight" 10-17.
6825SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6826 SDValue CompTargetNode,
6827 ISD::CondCode Cond,
6828 DAGCombinerInfo &DCI,
6829 const SDLoc &DL) const {
6830 SmallVector<SDNode *, 7> Built;
6831 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6832 DCI, DL, Created&: Built)) {
6833 assert(Built.size() <= 7 && "Max size prediction failed.");
6834 for (SDNode *N : Built)
6835 DCI.AddToWorklist(N);
6836 return Folded;
6837 }
6838
6839 return SDValue();
6840}
6841
6842SDValue
6843TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6844 SDValue CompTargetNode, ISD::CondCode Cond,
6845 DAGCombinerInfo &DCI, const SDLoc &DL,
6846 SmallVectorImpl<SDNode *> &Created) const {
6847 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6848 // Fold:
6849 // (seteq/ne (srem N, D), 0)
6850 // To:
6851 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6852 //
6853 // - D must be constant, with D = D0 * 2^K where D0 is odd
6854 // - P is the multiplicative inverse of D0 modulo 2^W
6855 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6856 // - Q = floor((2 * A) / (2^K))
6857 // where W is the width of the common type of N and D.
6858 //
6859 // When D is a power of two (and thus D0 is 1), the normal
6860 // formula for A and Q don't apply, because the derivation
6861 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6862 // does not apply. This specifically fails when N = INT_MIN.
6863 //
6864 // Instead, for power-of-two D, we use:
6865 // - A = 2^(W-1)
6866 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6867 // - Q = 2^(W-K) - 1
6868 // |-> Test that the top K bits are zero after rotation
6869 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6870 "Only applicable for (in)equality comparisons.");
6871
6872 SelectionDAG &DAG = DCI.DAG;
6873
6874 EVT VT = REMNode.getValueType();
6875 EVT SVT = VT.getScalarType();
6876 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6877 EVT ShSVT = ShVT.getScalarType();
6878
6879 // If we are after ops legalization, and MUL is unavailable, we can not
6880 // proceed.
6881 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6882 return SDValue();
6883
6884 // TODO: Could support comparing with non-zero too.
6885 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
6886 if (!CompTarget || !CompTarget->isZero())
6887 return SDValue();
6888
6889 bool HadIntMinDivisor = false;
6890 bool HadOneDivisor = false;
6891 bool AllDivisorsAreOnes = true;
6892 bool HadEvenDivisor = false;
6893 bool NeedToApplyOffset = false;
6894 bool AllDivisorsArePowerOfTwo = true;
6895 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6896
6897 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6898 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6899 if (C->isZero())
6900 return false;
6901
6902 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6903
6904 // WARNING: this fold is only valid for positive divisors!
6905 APInt D = C->getAPIntValue();
6906 if (D.isNegative())
6907 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6908
6909 HadIntMinDivisor |= D.isMinSignedValue();
6910
6911 // If all divisors are ones, we will prefer to avoid the fold.
6912 HadOneDivisor |= D.isOne();
6913 AllDivisorsAreOnes &= D.isOne();
6914
6915 // Decompose D into D0 * 2^K
6916 unsigned K = D.countr_zero();
6917 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6918 APInt D0 = D.lshr(shiftAmt: K);
6919
6920 if (!D.isMinSignedValue()) {
6921 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6922 // we don't care about this lane in this fold, we'll special-handle it.
6923 HadEvenDivisor |= (K != 0);
6924 }
6925
6926 // D is a power-of-two if D0 is one. This includes INT_MIN.
6927 // If all divisors are power-of-two, we will prefer to avoid the fold.
6928 AllDivisorsArePowerOfTwo &= D0.isOne();
6929
6930 // P = inv(D0, 2^W)
6931 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6932 unsigned W = D.getBitWidth();
6933 APInt P = D0.multiplicativeInverse();
6934 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6935
6936 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6937 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
6938 A.clearLowBits(loBits: K);
6939
6940 if (!D.isMinSignedValue()) {
6941 // If divisor INT_MIN, then we don't care about this lane in this fold,
6942 // we'll special-handle it.
6943 NeedToApplyOffset |= A != 0;
6944 }
6945
6946 // Q = floor((2 * A) / (2^K))
6947 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
6948
6949 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6950 "We are expecting that A is always less than all-ones for SVT");
6951 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6952 "We are expecting that K is always less than all-ones for ShSVT");
6953
6954 // If D was a power of two, apply the alternate constant derivation.
6955 if (D0.isOne()) {
6956 // A = 2^(W-1)
6957 A = APInt::getSignedMinValue(numBits: W);
6958 // - Q = 2^(W-K) - 1
6959 Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
6960 }
6961
6962 // If the divisor is 1 the result can be constant-folded. Likewise, we
6963 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6964 if (D.isOne()) {
6965 // Set P, A and K to a bogus values so we can try to splat them.
6966 P = 0;
6967 A = -1;
6968 K = -1;
6969
6970 // x ?% 1 == 0 <--> true <--> x u<= -1
6971 Q = -1;
6972 }
6973
6974 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6975 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
6976 KAmts.push_back(
6977 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6978 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6979 return true;
6980 };
6981
6982 SDValue N = REMNode.getOperand(i: 0);
6983 SDValue D = REMNode.getOperand(i: 1);
6984
6985 // Collect the values from each element.
6986 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
6987 return SDValue();
6988
6989 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6990 if (AllDivisorsAreOnes)
6991 return SDValue();
6992
6993 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6994 // since it can be best implemented as a bit test.
6995 if (AllDivisorsArePowerOfTwo)
6996 return SDValue();
6997
6998 SDValue PVal, AVal, KVal, QVal;
6999 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7000 if (HadOneDivisor) {
7001 // Try to turn PAmts into a splat, since we don't care about the values
7002 // that are currently '0'. If we can't, just keep '0'`s.
7003 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7004 // Try to turn AAmts into a splat, since we don't care about the
7005 // values that are currently '-1'. If we can't, change them to '0'`s.
7006 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7007 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
7008 // Try to turn KAmts into a splat, since we don't care about the values
7009 // that are currently '-1'. If we can't, change them to '0'`s.
7010 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7011 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7012 }
7013
7014 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7015 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7016 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7017 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7018 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7019 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7020 QAmts.size() == 1 &&
7021 "Expected matchUnaryPredicate to return one element for scalable "
7022 "vectors");
7023 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7024 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
7025 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7026 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7027 } else {
7028 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7029 PVal = PAmts[0];
7030 AVal = AAmts[0];
7031 KVal = KAmts[0];
7032 QVal = QAmts[0];
7033 }
7034
7035 // (mul N, P)
7036 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7037 Created.push_back(Elt: Op0.getNode());
7038
7039 if (NeedToApplyOffset) {
7040 // We need ADD to do this.
7041 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7042 return SDValue();
7043
7044 // (add (mul N, P), A)
7045 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7046 Created.push_back(Elt: Op0.getNode());
7047 }
7048
7049 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7050 // divisors as a performance improvement, since rotating by 0 is a no-op.
7051 if (HadEvenDivisor) {
7052 // We need ROTR to do this.
7053 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7054 return SDValue();
7055 // SREM: (rotr (add (mul N, P), A), K)
7056 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7057 Created.push_back(Elt: Op0.getNode());
7058 }
7059
7060 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7061 SDValue Fold =
7062 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7063 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7064
7065 // If we didn't have lanes with INT_MIN divisor, then we're done.
7066 if (!HadIntMinDivisor)
7067 return Fold;
7068
7069 // That fold is only valid for positive divisors. Which effectively means,
7070 // it is invalid for INT_MIN divisors. So if we have such a lane,
7071 // we must fix-up results for said lanes.
7072 assert(VT.isVector() && "Can/should only get here for vectors.");
7073
7074 // NOTE: we avoid letting illegal types through even if we're before legalize
7075 // ops – legalization has a hard time producing good code for the code that
7076 // follows.
7077 if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) ||
7078 !isOperationLegalOrCustom(Op: ISD::AND, VT) ||
7079 !isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) ||
7080 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7081 return SDValue();
7082
7083 Created.push_back(Elt: Fold.getNode());
7084
7085 SDValue IntMin = DAG.getConstant(
7086 Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7087 SDValue IntMax = DAG.getConstant(
7088 Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7089 SDValue Zero =
7090 DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7091
7092 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7093 SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7094 Created.push_back(Elt: DivisorIsIntMin.getNode());
7095
7096 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7097 SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7098 Created.push_back(Elt: Masked.getNode());
7099 SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7100 Created.push_back(Elt: MaskedIsZero.getNode());
7101
7102 // To produce final result we need to blend 2 vectors: 'SetCC' and
7103 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7104 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7105 // constant-folded, select can get lowered to a shuffle with constant mask.
7106 SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7107 N2: MaskedIsZero, N3: Fold);
7108
7109 return Blended;
7110}
7111
7112bool TargetLowering::
7113verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7114 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
7115 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7116 "be a constant integer");
7117 return true;
7118 }
7119
7120 return false;
7121}
7122
7123SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7124 const DenormalMode &Mode) const {
7125 SDLoc DL(Op);
7126 EVT VT = Op.getValueType();
7127 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7128 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7129
7130 // This is specifically a check for the handling of denormal inputs, not the
7131 // result.
7132 if (Mode.Input == DenormalMode::PreserveSign ||
7133 Mode.Input == DenormalMode::PositiveZero) {
7134 // Test = X == 0.0
7135 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7136 }
7137
7138 // Testing it with denormal inputs to avoid wrong estimate.
7139 //
7140 // Test = fabs(X) < SmallestNormal
7141 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7142 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7143 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7144 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7145 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7146}
7147
7148SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7149 bool LegalOps, bool OptForSize,
7150 NegatibleCost &Cost,
7151 unsigned Depth) const {
7152 // fneg is removable even if it has multiple uses.
7153 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7154 Cost = NegatibleCost::Cheaper;
7155 return Op.getOperand(i: 0);
7156 }
7157
7158 // Don't recurse exponentially.
7159 if (Depth > SelectionDAG::MaxRecursionDepth)
7160 return SDValue();
7161
7162 // Pre-increment recursion depth for use in recursive calls.
7163 ++Depth;
7164 const SDNodeFlags Flags = Op->getFlags();
7165 const TargetOptions &Options = DAG.getTarget().Options;
7166 EVT VT = Op.getValueType();
7167 unsigned Opcode = Op.getOpcode();
7168
7169 // Don't allow anything with multiple uses unless we know it is free.
7170 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7171 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7172 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7173 if (!IsFreeExtend)
7174 return SDValue();
7175 }
7176
7177 auto RemoveDeadNode = [&](SDValue N) {
7178 if (N && N.getNode()->use_empty())
7179 DAG.RemoveDeadNode(N: N.getNode());
7180 };
7181
7182 SDLoc DL(Op);
7183
7184 // Because getNegatedExpression can delete nodes we need a handle to keep
7185 // temporary nodes alive in case the recursion manages to create an identical
7186 // node.
7187 std::list<HandleSDNode> Handles;
7188
7189 switch (Opcode) {
7190 case ISD::ConstantFP: {
7191 // Don't invert constant FP values after legalization unless the target says
7192 // the negated constant is legal.
7193 bool IsOpLegal =
7194 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7195 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7196 ForCodeSize: OptForSize);
7197
7198 if (LegalOps && !IsOpLegal)
7199 break;
7200
7201 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7202 V.changeSign();
7203 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7204
7205 // If we already have the use of the negated floating constant, it is free
7206 // to negate it even it has multiple uses.
7207 if (!Op.hasOneUse() && CFP.use_empty())
7208 break;
7209 Cost = NegatibleCost::Neutral;
7210 return CFP;
7211 }
7212 case ISD::BUILD_VECTOR: {
7213 // Only permit BUILD_VECTOR of constants.
7214 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7215 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7216 }))
7217 break;
7218
7219 bool IsOpLegal =
7220 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7221 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7222 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7223 return N.isUndef() ||
7224 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7225 ForCodeSize: OptForSize);
7226 });
7227
7228 if (LegalOps && !IsOpLegal)
7229 break;
7230
7231 SmallVector<SDValue, 4> Ops;
7232 for (SDValue C : Op->op_values()) {
7233 if (C.isUndef()) {
7234 Ops.push_back(Elt: C);
7235 continue;
7236 }
7237 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7238 V.changeSign();
7239 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7240 }
7241 Cost = NegatibleCost::Neutral;
7242 return DAG.getBuildVector(VT, DL, Ops);
7243 }
7244 case ISD::FADD: {
7245 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7246 break;
7247
7248 // After operation legalization, it might not be legal to create new FSUBs.
7249 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7250 break;
7251 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7252
7253 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7254 NegatibleCost CostX = NegatibleCost::Expensive;
7255 SDValue NegX =
7256 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7257 // Prevent this node from being deleted by the next call.
7258 if (NegX)
7259 Handles.emplace_back(args&: NegX);
7260
7261 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7262 NegatibleCost CostY = NegatibleCost::Expensive;
7263 SDValue NegY =
7264 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7265
7266 // We're done with the handles.
7267 Handles.clear();
7268
7269 // Negate the X if its cost is less or equal than Y.
7270 if (NegX && (CostX <= CostY)) {
7271 Cost = CostX;
7272 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7273 if (NegY != N)
7274 RemoveDeadNode(NegY);
7275 return N;
7276 }
7277
7278 // Negate the Y if it is not expensive.
7279 if (NegY) {
7280 Cost = CostY;
7281 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7282 if (NegX != N)
7283 RemoveDeadNode(NegX);
7284 return N;
7285 }
7286 break;
7287 }
7288 case ISD::FSUB: {
7289 // We can't turn -(A-B) into B-A when we honor signed zeros.
7290 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7291 break;
7292
7293 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7294 // fold (fneg (fsub 0, Y)) -> Y
7295 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7296 if (C->isZero()) {
7297 Cost = NegatibleCost::Cheaper;
7298 return Y;
7299 }
7300
7301 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7302 Cost = NegatibleCost::Neutral;
7303 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7304 }
7305 case ISD::FMUL:
7306 case ISD::FDIV: {
7307 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7308
7309 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7310 NegatibleCost CostX = NegatibleCost::Expensive;
7311 SDValue NegX =
7312 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7313 // Prevent this node from being deleted by the next call.
7314 if (NegX)
7315 Handles.emplace_back(args&: NegX);
7316
7317 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7318 NegatibleCost CostY = NegatibleCost::Expensive;
7319 SDValue NegY =
7320 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7321
7322 // We're done with the handles.
7323 Handles.clear();
7324
7325 // Negate the X if its cost is less or equal than Y.
7326 if (NegX && (CostX <= CostY)) {
7327 Cost = CostX;
7328 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7329 if (NegY != N)
7330 RemoveDeadNode(NegY);
7331 return N;
7332 }
7333
7334 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7335 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7336 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7337 break;
7338
7339 // Negate the Y if it is not expensive.
7340 if (NegY) {
7341 Cost = CostY;
7342 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7343 if (NegX != N)
7344 RemoveDeadNode(NegX);
7345 return N;
7346 }
7347 break;
7348 }
7349 case ISD::FMA:
7350 case ISD::FMAD: {
7351 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7352 break;
7353
7354 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7355 NegatibleCost CostZ = NegatibleCost::Expensive;
7356 SDValue NegZ =
7357 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7358 // Give up if fail to negate the Z.
7359 if (!NegZ)
7360 break;
7361
7362 // Prevent this node from being deleted by the next two calls.
7363 Handles.emplace_back(args&: NegZ);
7364
7365 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7366 NegatibleCost CostX = NegatibleCost::Expensive;
7367 SDValue NegX =
7368 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7369 // Prevent this node from being deleted by the next call.
7370 if (NegX)
7371 Handles.emplace_back(args&: NegX);
7372
7373 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7374 NegatibleCost CostY = NegatibleCost::Expensive;
7375 SDValue NegY =
7376 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7377
7378 // We're done with the handles.
7379 Handles.clear();
7380
7381 // Negate the X if its cost is less or equal than Y.
7382 if (NegX && (CostX <= CostY)) {
7383 Cost = std::min(a: CostX, b: CostZ);
7384 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7385 if (NegY != N)
7386 RemoveDeadNode(NegY);
7387 return N;
7388 }
7389
7390 // Negate the Y if it is not expensive.
7391 if (NegY) {
7392 Cost = std::min(a: CostY, b: CostZ);
7393 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7394 if (NegX != N)
7395 RemoveDeadNode(NegX);
7396 return N;
7397 }
7398 break;
7399 }
7400
7401 case ISD::FP_EXTEND:
7402 case ISD::FSIN:
7403 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7404 OptForSize, Cost, Depth))
7405 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7406 break;
7407 case ISD::FP_ROUND:
7408 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7409 OptForSize, Cost, Depth))
7410 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7411 break;
7412 case ISD::SELECT:
7413 case ISD::VSELECT: {
7414 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7415 // iff at least one cost is cheaper and the other is neutral/cheaper
7416 SDValue LHS = Op.getOperand(i: 1);
7417 NegatibleCost CostLHS = NegatibleCost::Expensive;
7418 SDValue NegLHS =
7419 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7420 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7421 RemoveDeadNode(NegLHS);
7422 break;
7423 }
7424
7425 // Prevent this node from being deleted by the next call.
7426 Handles.emplace_back(args&: NegLHS);
7427
7428 SDValue RHS = Op.getOperand(i: 2);
7429 NegatibleCost CostRHS = NegatibleCost::Expensive;
7430 SDValue NegRHS =
7431 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7432
7433 // We're done with the handles.
7434 Handles.clear();
7435
7436 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7437 (CostLHS != NegatibleCost::Cheaper &&
7438 CostRHS != NegatibleCost::Cheaper)) {
7439 RemoveDeadNode(NegLHS);
7440 RemoveDeadNode(NegRHS);
7441 break;
7442 }
7443
7444 Cost = std::min(a: CostLHS, b: CostRHS);
7445 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7446 }
7447 }
7448
7449 return SDValue();
7450}
7451
7452//===----------------------------------------------------------------------===//
7453// Legalization Utilities
7454//===----------------------------------------------------------------------===//
7455
7456bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7457 SDValue LHS, SDValue RHS,
7458 SmallVectorImpl<SDValue> &Result,
7459 EVT HiLoVT, SelectionDAG &DAG,
7460 MulExpansionKind Kind, SDValue LL,
7461 SDValue LH, SDValue RL, SDValue RH) const {
7462 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7463 Opcode == ISD::SMUL_LOHI);
7464
7465 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7466 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7467 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7468 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7469 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7470 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7471 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7472 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7473
7474 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7475 return false;
7476
7477 unsigned OuterBitSize = VT.getScalarSizeInBits();
7478 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7479
7480 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7481 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7482 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7483
7484 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7485 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7486 bool Signed) -> bool {
7487 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7488 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7489 Hi = SDValue(Lo.getNode(), 1);
7490 return true;
7491 }
7492 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7493 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7494 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7495 return true;
7496 }
7497 return false;
7498 };
7499
7500 SDValue Lo, Hi;
7501
7502 if (!LL.getNode() && !RL.getNode() &&
7503 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7504 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7505 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7506 }
7507
7508 if (!LL.getNode())
7509 return false;
7510
7511 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7512 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7513 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7514 // The inputs are both zero-extended.
7515 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7516 Result.push_back(Elt: Lo);
7517 Result.push_back(Elt: Hi);
7518 if (Opcode != ISD::MUL) {
7519 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7520 Result.push_back(Elt: Zero);
7521 Result.push_back(Elt: Zero);
7522 }
7523 return true;
7524 }
7525 }
7526
7527 if (!VT.isVector() && Opcode == ISD::MUL &&
7528 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7529 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7530 // The input values are both sign-extended.
7531 // TODO non-MUL case?
7532 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7533 Result.push_back(Elt: Lo);
7534 Result.push_back(Elt: Hi);
7535 return true;
7536 }
7537 }
7538
7539 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7540 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7541
7542 if (!LH.getNode() && !RH.getNode() &&
7543 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7544 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7545 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7546 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7547 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7548 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7549 }
7550
7551 if (!LH.getNode())
7552 return false;
7553
7554 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7555 return false;
7556
7557 Result.push_back(Elt: Lo);
7558
7559 if (Opcode == ISD::MUL) {
7560 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7561 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7562 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7563 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7564 Result.push_back(Elt: Hi);
7565 return true;
7566 }
7567
7568 // Compute the full width result.
7569 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7570 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7571 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7572 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7573 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7574 };
7575
7576 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7577 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7578 return false;
7579
7580 // This is effectively the add part of a multiply-add of half-sized operands,
7581 // so it cannot overflow.
7582 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7583
7584 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7585 return false;
7586
7587 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7588 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7589
7590 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7591 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7592 if (UseGlue)
7593 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7594 Merge(Lo, Hi));
7595 else
7596 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7597 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
7598
7599 SDValue Carry = Next.getValue(R: 1);
7600 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7601 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7602
7603 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7604 return false;
7605
7606 if (UseGlue)
7607 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7608 Carry);
7609 else
7610 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7611 N2: Zero, N3: Carry);
7612
7613 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7614
7615 if (Opcode == ISD::SMUL_LOHI) {
7616 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7617 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7618 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7619
7620 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7621 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7622 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7623 }
7624
7625 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7626 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7627 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7628 return true;
7629}
7630
7631bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7632 SelectionDAG &DAG, MulExpansionKind Kind,
7633 SDValue LL, SDValue LH, SDValue RL,
7634 SDValue RH) const {
7635 SmallVector<SDValue, 2> Result;
7636 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
7637 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
7638 DAG, Kind, LL, LH, RL, RH);
7639 if (Ok) {
7640 assert(Result.size() == 2);
7641 Lo = Result[0];
7642 Hi = Result[1];
7643 }
7644 return Ok;
7645}
7646
7647// Optimize unsigned division or remainder by constants for types twice as large
7648// as a legal VT.
7649//
7650// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7651// can be computed
7652// as:
7653// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7654// Remainder = Sum % Constant
7655// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7656//
7657// For division, we can compute the remainder using the algorithm described
7658// above, subtract it from the dividend to get an exact multiple of Constant.
7659// Then multiply that exact multiply by the multiplicative inverse modulo
7660// (1 << (BitWidth / 2)) to get the quotient.
7661
7662// If Constant is even, we can shift right the dividend and the divisor by the
7663// number of trailing zeros in Constant before applying the remainder algorithm.
7664// If we're after the quotient, we can subtract this value from the shifted
7665// dividend and multiply by the multiplicative inverse of the shifted divisor.
7666// If we want the remainder, we shift the value left by the number of trailing
7667// zeros and add the bits that were shifted out of the dividend.
7668bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7669 SmallVectorImpl<SDValue> &Result,
7670 EVT HiLoVT, SelectionDAG &DAG,
7671 SDValue LL, SDValue LH) const {
7672 unsigned Opcode = N->getOpcode();
7673 EVT VT = N->getValueType(ResNo: 0);
7674
7675 // TODO: Support signed division/remainder.
7676 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7677 return false;
7678 assert(
7679 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7680 "Unexpected opcode");
7681
7682 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
7683 if (!CN)
7684 return false;
7685
7686 APInt Divisor = CN->getAPIntValue();
7687 unsigned BitWidth = Divisor.getBitWidth();
7688 unsigned HBitWidth = BitWidth / 2;
7689 assert(VT.getScalarSizeInBits() == BitWidth &&
7690 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7691
7692 // Divisor needs to less than (1 << HBitWidth).
7693 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7694 if (Divisor.uge(RHS: HalfMaxPlus1))
7695 return false;
7696
7697 // We depend on the UREM by constant optimization in DAGCombiner that requires
7698 // high multiply.
7699 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
7700 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
7701 return false;
7702
7703 // Don't expand if optimizing for size.
7704 if (DAG.shouldOptForSize())
7705 return false;
7706
7707 // Early out for 0 or 1 divisors.
7708 if (Divisor.ule(RHS: 1))
7709 return false;
7710
7711 // If the divisor is even, shift it until it becomes odd.
7712 unsigned TrailingZeros = 0;
7713 if (!Divisor[0]) {
7714 TrailingZeros = Divisor.countr_zero();
7715 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
7716 }
7717
7718 SDLoc dl(N);
7719 SDValue Sum;
7720 SDValue PartialRem;
7721
7722 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7723 // then add in the carry.
7724 // TODO: If we can't split it in half, we might be able to split into 3 or
7725 // more pieces using a smaller bit width.
7726 if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
7727 assert(!LL == !LH && "Expected both input halves or no input halves!");
7728 if (!LL)
7729 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7730
7731 // Shift the input by the number of TrailingZeros in the divisor. The
7732 // shifted out bits will be added to the remainder later.
7733 if (TrailingZeros) {
7734 // Save the shifted off bits if we need the remainder.
7735 if (Opcode != ISD::UDIV) {
7736 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7737 PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
7738 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
7739 }
7740
7741 LL = DAG.getNode(
7742 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
7743 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
7744 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
7745 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
7746 N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
7747 VT: HiLoVT, DL: dl)));
7748 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
7749 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7750 }
7751
7752 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7753 EVT SetCCType =
7754 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
7755 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
7756 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
7757 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
7758 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
7759 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
7760 } else {
7761 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
7762 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
7763 // If the boolean for the target is 0 or 1, we can add the setcc result
7764 // directly.
7765 if (getBooleanContents(Type: HiLoVT) ==
7766 TargetLoweringBase::ZeroOrOneBooleanContent)
7767 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
7768 else
7769 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
7770 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
7771 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
7772 }
7773 }
7774
7775 // If we didn't find a sum, we can't do the expansion.
7776 if (!Sum)
7777 return false;
7778
7779 // Perform a HiLoVT urem on the Sum using truncated divisor.
7780 SDValue RemL =
7781 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
7782 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
7783 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7784
7785 if (Opcode != ISD::UREM) {
7786 // Subtract the remainder from the shifted dividend.
7787 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
7788 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
7789
7790 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
7791
7792 // Multiply by the multiplicative inverse of the divisor modulo
7793 // (1 << BitWidth).
7794 APInt MulFactor = Divisor.multiplicativeInverse();
7795
7796 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
7797 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
7798
7799 // Split the quotient into low and high parts.
7800 SDValue QuotL, QuotH;
7801 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7802 Result.push_back(Elt: QuotL);
7803 Result.push_back(Elt: QuotH);
7804 }
7805
7806 if (Opcode != ISD::UDIV) {
7807 // If we shifted the input, shift the remainder left and add the bits we
7808 // shifted off the input.
7809 if (TrailingZeros) {
7810 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7811 RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
7812 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7813 RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
7814 }
7815 Result.push_back(Elt: RemL);
7816 Result.push_back(Elt: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
7817 }
7818
7819 return true;
7820}
7821
7822// Check that (every element of) Z is undef or not an exact multiple of BW.
7823static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7824 return ISD::matchUnaryPredicate(
7825 Op: Z,
7826 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
7827 AllowUndefs: true);
7828}
7829
7830static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7831 EVT VT = Node->getValueType(ResNo: 0);
7832 SDValue ShX, ShY;
7833 SDValue ShAmt, InvShAmt;
7834 SDValue X = Node->getOperand(Num: 0);
7835 SDValue Y = Node->getOperand(Num: 1);
7836 SDValue Z = Node->getOperand(Num: 2);
7837 SDValue Mask = Node->getOperand(Num: 3);
7838 SDValue VL = Node->getOperand(Num: 4);
7839
7840 unsigned BW = VT.getScalarSizeInBits();
7841 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7842 SDLoc DL(SDValue(Node, 0));
7843
7844 EVT ShVT = Z.getValueType();
7845 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7846 // fshl: X << C | Y >> (BW - C)
7847 // fshr: X << (BW - C) | Y >> C
7848 // where C = Z % BW is not zero
7849 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7850 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7851 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
7852 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
7853 N4: VL);
7854 ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
7855 N4: VL);
7856 } else {
7857 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7858 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7859 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
7860 if (isPowerOf2_32(Value: BW)) {
7861 // Z % BW -> Z & (BW - 1)
7862 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
7863 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7864 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
7865 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
7866 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
7867 } else {
7868 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7869 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7870 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
7871 }
7872
7873 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7874 if (IsFSHL) {
7875 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
7876 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
7877 ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
7878 } else {
7879 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
7880 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
7881 ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
7882 }
7883 }
7884 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
7885}
7886
7887SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7888 SelectionDAG &DAG) const {
7889 if (Node->isVPOpcode())
7890 return expandVPFunnelShift(Node, DAG);
7891
7892 EVT VT = Node->getValueType(ResNo: 0);
7893
7894 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
7895 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
7896 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
7897 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
7898 return SDValue();
7899
7900 SDValue X = Node->getOperand(Num: 0);
7901 SDValue Y = Node->getOperand(Num: 1);
7902 SDValue Z = Node->getOperand(Num: 2);
7903
7904 unsigned BW = VT.getScalarSizeInBits();
7905 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7906 SDLoc DL(SDValue(Node, 0));
7907
7908 EVT ShVT = Z.getValueType();
7909
7910 // If a funnel shift in the other direction is more supported, use it.
7911 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7912 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7913 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
7914 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7915 // fshl X, Y, Z -> fshr X, Y, -Z
7916 // fshr X, Y, Z -> fshl X, Y, -Z
7917 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
7918 Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
7919 } else {
7920 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7921 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7922 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7923 if (IsFSHL) {
7924 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7925 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
7926 } else {
7927 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7928 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
7929 }
7930 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
7931 }
7932 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
7933 }
7934
7935 SDValue ShX, ShY;
7936 SDValue ShAmt, InvShAmt;
7937 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7938 // fshl: X << C | Y >> (BW - C)
7939 // fshr: X << (BW - C) | Y >> C
7940 // where C = Z % BW is not zero
7941 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7942 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7943 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
7944 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
7945 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
7946 } else {
7947 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7948 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7949 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
7950 if (isPowerOf2_32(Value: BW)) {
7951 // Z % BW -> Z & (BW - 1)
7952 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
7953 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7954 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
7955 } else {
7956 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7957 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7958 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
7959 }
7960
7961 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7962 if (IsFSHL) {
7963 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
7964 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
7965 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
7966 } else {
7967 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
7968 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
7969 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
7970 }
7971 }
7972 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
7973}
7974
7975// TODO: Merge with expandFunnelShift.
7976SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7977 SelectionDAG &DAG) const {
7978 EVT VT = Node->getValueType(ResNo: 0);
7979 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7980 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7981 SDValue Op0 = Node->getOperand(Num: 0);
7982 SDValue Op1 = Node->getOperand(Num: 1);
7983 SDLoc DL(SDValue(Node, 0));
7984
7985 EVT ShVT = Op1.getValueType();
7986 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
7987
7988 // If a rotate in the other direction is more supported, use it.
7989 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7990 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7991 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
7992 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
7993 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
7994 }
7995
7996 if (!AllowVectorOps && VT.isVector() &&
7997 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
7998 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
7999 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8000 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
8001 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8002 return SDValue();
8003
8004 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8005 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8006 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
8007 SDValue ShVal;
8008 SDValue HsVal;
8009 if (isPowerOf2_32(Value: EltSizeInBits)) {
8010 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8011 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8012 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8013 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8014 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8015 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8016 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8017 } else {
8018 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8019 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8020 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8021 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8022 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8023 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8024 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8025 HsVal =
8026 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8027 }
8028 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8029}
8030
8031void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8032 SelectionDAG &DAG) const {
8033 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8034 EVT VT = Node->getValueType(ResNo: 0);
8035 unsigned VTBits = VT.getScalarSizeInBits();
8036 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8037
8038 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8039 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8040 SDValue ShOpLo = Node->getOperand(Num: 0);
8041 SDValue ShOpHi = Node->getOperand(Num: 1);
8042 SDValue ShAmt = Node->getOperand(Num: 2);
8043 EVT ShAmtVT = ShAmt.getValueType();
8044 EVT ShAmtCCVT =
8045 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8046 SDLoc dl(Node);
8047
8048 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8049 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8050 // away during isel.
8051 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8052 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
8053 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8054 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
8055 : DAG.getConstant(Val: 0, DL: dl, VT);
8056
8057 SDValue Tmp2, Tmp3;
8058 if (IsSHL) {
8059 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8060 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8061 } else {
8062 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8063 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8064 }
8065
8066 // If the shift amount is larger or equal than the width of a part we don't
8067 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8068 // values for large shift amounts.
8069 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8070 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8071 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8072 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8073
8074 if (IsSHL) {
8075 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8076 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8077 } else {
8078 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8079 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8080 }
8081}
8082
8083bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8084 SelectionDAG &DAG) const {
8085 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8086 SDValue Src = Node->getOperand(Num: OpNo);
8087 EVT SrcVT = Src.getValueType();
8088 EVT DstVT = Node->getValueType(ResNo: 0);
8089 SDLoc dl(SDValue(Node, 0));
8090
8091 // FIXME: Only f32 to i64 conversions are supported.
8092 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8093 return false;
8094
8095 if (Node->isStrictFPOpcode())
8096 // When a NaN is converted to an integer a trap is allowed. We can't
8097 // use this expansion here because it would eliminate that trap. Other
8098 // traps are also allowed and cannot be eliminated. See
8099 // IEEE 754-2008 sec 5.8.
8100 return false;
8101
8102 // Expand f32 -> i64 conversion
8103 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8104 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8105 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8106 EVT IntVT = SrcVT.changeTypeToInteger();
8107 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8108
8109 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
8110 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
8111 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
8112 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8113 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
8114 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
8115
8116 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8117
8118 SDValue ExponentBits = DAG.getNode(
8119 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8120 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8121 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8122
8123 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8124 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8125 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8126 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8127
8128 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8129 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8130 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
8131
8132 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8133
8134 R = DAG.getSelectCC(
8135 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8136 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8137 N2: DAG.getZExtOrTrunc(
8138 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8139 DL: dl, VT: IntShVT)),
8140 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8141 N2: DAG.getZExtOrTrunc(
8142 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8143 DL: dl, VT: IntShVT)),
8144 Cond: ISD::SETGT);
8145
8146 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8147 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8148
8149 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
8150 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8151 return true;
8152}
8153
8154bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8155 SDValue &Chain,
8156 SelectionDAG &DAG) const {
8157 SDLoc dl(SDValue(Node, 0));
8158 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8159 SDValue Src = Node->getOperand(Num: OpNo);
8160
8161 EVT SrcVT = Src.getValueType();
8162 EVT DstVT = Node->getValueType(ResNo: 0);
8163 EVT SetCCVT =
8164 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8165 EVT DstSetCCVT =
8166 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8167
8168 // Only expand vector types if we have the appropriate vector bit operations.
8169 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8170 ISD::FP_TO_SINT;
8171 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
8172 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8173 return false;
8174
8175 // If the maximum float value is smaller then the signed integer range,
8176 // the destination signmask can't be represented by the float, so we can
8177 // just use FP_TO_SINT directly.
8178 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(VT: SrcVT);
8179 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8180 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8181 if (APFloat::opOverflow &
8182 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8183 if (Node->isStrictFPOpcode()) {
8184 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8185 { Node->getOperand(0), Src });
8186 Chain = Result.getValue(R: 1);
8187 } else
8188 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8189 return true;
8190 }
8191
8192 // Don't expand it if there isn't cheap fsub instruction.
8193 if (!isOperationLegalOrCustom(
8194 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8195 return false;
8196
8197 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8198 SDValue Sel;
8199
8200 if (Node->isStrictFPOpcode()) {
8201 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8202 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
8203 Chain = Sel.getValue(R: 1);
8204 } else {
8205 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8206 }
8207
8208 bool Strict = Node->isStrictFPOpcode() ||
8209 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
8210
8211 if (Strict) {
8212 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8213 // signmask then offset (the result of which should be fully representable).
8214 // Sel = Src < 0x8000000000000000
8215 // FltOfs = select Sel, 0, 0x8000000000000000
8216 // IntOfs = select Sel, 0, 0x8000000000000000
8217 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8218
8219 // TODO: Should any fast-math-flags be set for the FSUB?
8220 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8221 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8222 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8223 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8224 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
8225 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8226 SDValue SInt;
8227 if (Node->isStrictFPOpcode()) {
8228 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8229 { Chain, Src, FltOfs });
8230 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8231 { Val.getValue(1), Val });
8232 Chain = SInt.getValue(R: 1);
8233 } else {
8234 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8235 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8236 }
8237 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8238 } else {
8239 // Expand based on maximum range of FP_TO_SINT:
8240 // True = fp_to_sint(Src)
8241 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8242 // Result = select (Src < 0x8000000000000000), True, False
8243
8244 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8245 // TODO: Should any fast-math-flags be set for the FSUB?
8246 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8247 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8248 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8249 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8250 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8251 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8252 }
8253 return true;
8254}
8255
8256bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8257 SDValue &Chain,
8258 SelectionDAG &DAG) const {
8259 // This transform is not correct for converting 0 when rounding mode is set
8260 // to round toward negative infinity which will produce -0.0. So disable under
8261 // strictfp.
8262 if (Node->isStrictFPOpcode())
8263 return false;
8264
8265 SDValue Src = Node->getOperand(Num: 0);
8266 EVT SrcVT = Src.getValueType();
8267 EVT DstVT = Node->getValueType(ResNo: 0);
8268
8269 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8270 return false;
8271
8272 // Only expand vector types if we have the appropriate vector bit operations.
8273 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
8274 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
8275 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
8276 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
8277 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8278 return false;
8279
8280 SDLoc dl(SDValue(Node, 0));
8281 EVT ShiftVT = getShiftAmountTy(LHSTy: SrcVT, DL: DAG.getDataLayout());
8282
8283 // Implementation of unsigned i64 to f64 following the algorithm in
8284 // __floatundidf in compiler_rt. This implementation performs rounding
8285 // correctly in all rounding modes with the exception of converting 0
8286 // when rounding toward negative infinity. In that case the fsub will produce
8287 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8288 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
8289 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8290 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
8291 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
8292 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
8293 SDValue HiShift = DAG.getConstant(Val: 32, DL: dl, VT: ShiftVT);
8294
8295 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8296 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8297 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8298 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8299 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8300 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8301 SDValue HiSub =
8302 DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8303 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8304 return true;
8305}
8306
8307SDValue
8308TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8309 SelectionDAG &DAG) const {
8310 unsigned Opcode = Node->getOpcode();
8311 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8312 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8313 "Wrong opcode");
8314
8315 if (Node->getFlags().hasNoNaNs()) {
8316 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8317 SDValue Op1 = Node->getOperand(Num: 0);
8318 SDValue Op2 = Node->getOperand(Num: 1);
8319 SDValue SelCC = DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8320 // Copy FMF flags, but always set the no-signed-zeros flag
8321 // as this is implied by the FMINNUM/FMAXNUM semantics.
8322 SDNodeFlags Flags = Node->getFlags();
8323 Flags.setNoSignedZeros(true);
8324 SelCC->setFlags(Flags);
8325 return SelCC;
8326 }
8327
8328 return SDValue();
8329}
8330
8331SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8332 SelectionDAG &DAG) const {
8333 SDLoc dl(Node);
8334 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8335 ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8336 EVT VT = Node->getValueType(ResNo: 0);
8337
8338 if (VT.isScalableVector())
8339 report_fatal_error(
8340 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8341
8342 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8343 SDValue Quiet0 = Node->getOperand(Num: 0);
8344 SDValue Quiet1 = Node->getOperand(Num: 1);
8345
8346 if (!Node->getFlags().hasNoNaNs()) {
8347 // Insert canonicalizes if it's possible we need to quiet to get correct
8348 // sNaN behavior.
8349 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8350 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8351 Flags: Node->getFlags());
8352 }
8353 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8354 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8355 Flags: Node->getFlags());
8356 }
8357 }
8358
8359 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8360 }
8361
8362 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8363 // instead if there are no NaNs and there can't be an incompatible zero
8364 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8365 if ((Node->getFlags().hasNoNaNs() ||
8366 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
8367 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) &&
8368 (Node->getFlags().hasNoSignedZeros() ||
8369 DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: 0)) ||
8370 DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: 1)))) {
8371 unsigned IEEE2018Op =
8372 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8373 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8374 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
8375 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
8376 }
8377
8378 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8379 return SelCC;
8380
8381 return SDValue();
8382}
8383
8384/// Returns a true value if if this FPClassTest can be performed with an ordered
8385/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8386/// std::nullopt if it cannot be performed as a compare with 0.
8387static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8388 const fltSemantics &Semantics,
8389 const MachineFunction &MF) {
8390 FPClassTest OrderedMask = Test & ~fcNan;
8391 FPClassTest NanTest = Test & fcNan;
8392 bool IsOrdered = NanTest == fcNone;
8393 bool IsUnordered = NanTest == fcNan;
8394
8395 // Skip cases that are testing for only a qnan or snan.
8396 if (!IsOrdered && !IsUnordered)
8397 return std::nullopt;
8398
8399 if (OrderedMask == fcZero &&
8400 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8401 return IsOrdered;
8402 if (OrderedMask == (fcZero | fcSubnormal) &&
8403 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8404 return IsOrdered;
8405 return std::nullopt;
8406}
8407
8408SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8409 FPClassTest Test, SDNodeFlags Flags,
8410 const SDLoc &DL,
8411 SelectionDAG &DAG) const {
8412 EVT OperandVT = Op.getValueType();
8413 assert(OperandVT.isFloatingPoint());
8414
8415 // Degenerated cases.
8416 if (Test == fcNone)
8417 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8418 if ((Test & fcAllFlags) == fcAllFlags)
8419 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8420
8421 // PPC double double is a pair of doubles, of which the higher part determines
8422 // the value class.
8423 if (OperandVT == MVT::ppcf128) {
8424 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8425 DAG.getConstant(1, DL, MVT::i32));
8426 OperandVT = MVT::f64;
8427 }
8428
8429 // Some checks may be represented as inversion of simpler check, for example
8430 // "inf|normal|subnormal|zero" => !"nan".
8431 bool IsInverted = false;
8432 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8433 IsInverted = true;
8434 Test = InvertedCheck;
8435 }
8436
8437 // Floating-point type properties.
8438 EVT ScalarFloatVT = OperandVT.getScalarType();
8439 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
8440 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8441 bool IsF80 = (ScalarFloatVT == MVT::f80);
8442
8443 // Some checks can be implemented using float comparisons, if floating point
8444 // exceptions are ignored.
8445 if (Flags.hasNoFPExcept() &&
8446 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8447 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8448 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8449
8450 if (std::optional<bool> IsCmp0 =
8451 isFCmpEqualZero(Test, Semantics, MF: DAG.getMachineFunction());
8452 IsCmp0 && (isCondCodeLegalOrCustom(
8453 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8454 VT: OperandVT.getScalarType().getSimpleVT()))) {
8455
8456 // If denormals could be implicitly treated as 0, this is not equivalent
8457 // to a compare with 0 since it will also be true for denormals.
8458 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8459 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
8460 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8461 }
8462
8463 if (Test == fcNan &&
8464 isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETO : ISD::SETUO,
8465 VT: OperandVT.getScalarType().getSimpleVT())) {
8466 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8467 Cond: IsInverted ? ISD::SETO : ISD::SETUO);
8468 }
8469
8470 if (Test == fcInf &&
8471 isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8472 VT: OperandVT.getScalarType().getSimpleVT()) &&
8473 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType())) {
8474 // isinf(x) --> fabs(x) == inf
8475 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8476 SDValue Inf =
8477 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8478 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8479 Cond: IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8480 }
8481 }
8482
8483 // In the general case use integer operations.
8484 unsigned BitSize = OperandVT.getScalarSizeInBits();
8485 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
8486 if (OperandVT.isVector())
8487 IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
8488 EC: OperandVT.getVectorElementCount());
8489 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
8490
8491 // Various masks.
8492 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
8493 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
8494 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
8495 const unsigned ExplicitIntBitInF80 = 63;
8496 APInt ExpMask = Inf;
8497 if (IsF80)
8498 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
8499 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
8500 APInt QNaNBitMask =
8501 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
8502 APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
8503
8504 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
8505 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
8506 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
8507 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
8508 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
8509 SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
8510
8511 SDValue Res;
8512 const auto appendResult = [&](SDValue PartialRes) {
8513 if (PartialRes) {
8514 if (Res)
8515 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
8516 else
8517 Res = PartialRes;
8518 }
8519 };
8520
8521 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8522 const auto getIntBitIsSet = [&]() -> SDValue {
8523 if (!IntBitIsSetV) {
8524 APInt IntBitMask(BitSize, 0);
8525 IntBitMask.setBit(ExplicitIntBitInF80);
8526 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
8527 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
8528 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
8529 }
8530 return IntBitIsSetV;
8531 };
8532
8533 // Split the value into sign bit and absolute value.
8534 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
8535 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
8536 RHS: DAG.getConstant(Val: 0.0, DL, VT: IntVT), Cond: ISD::SETLT);
8537
8538 // Tests that involve more than one class should be processed first.
8539 SDValue PartialRes;
8540
8541 if (IsF80)
8542 ; // Detect finite numbers of f80 by checking individual classes because
8543 // they have different settings of the explicit integer bit.
8544 else if ((Test & fcFinite) == fcFinite) {
8545 // finite(V) ==> abs(V) < exp_mask
8546 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8547 Test &= ~fcFinite;
8548 } else if ((Test & fcFinite) == fcPosFinite) {
8549 // finite(V) && V > 0 ==> V < exp_mask
8550 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
8551 Test &= ~fcPosFinite;
8552 } else if ((Test & fcFinite) == fcNegFinite) {
8553 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8554 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8555 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8556 Test &= ~fcNegFinite;
8557 }
8558 appendResult(PartialRes);
8559
8560 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8561 // fcZero | fcSubnormal => test all exponent bits are 0
8562 // TODO: Handle sign bit specific cases
8563 if (PartialCheck == (fcZero | fcSubnormal)) {
8564 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
8565 SDValue ExpIsZero =
8566 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8567 appendResult(ExpIsZero);
8568 Test &= ~PartialCheck & fcAllFlags;
8569 }
8570 }
8571
8572 // Check for individual classes.
8573
8574 if (unsigned PartialCheck = Test & fcZero) {
8575 if (PartialCheck == fcPosZero)
8576 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
8577 else if (PartialCheck == fcZero)
8578 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
8579 else // ISD::fcNegZero
8580 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
8581 appendResult(PartialRes);
8582 }
8583
8584 if (unsigned PartialCheck = Test & fcSubnormal) {
8585 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8586 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8587 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8588 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
8589 SDValue VMinusOneV =
8590 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
8591 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
8592 if (PartialCheck == fcNegSubnormal)
8593 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8594 appendResult(PartialRes);
8595 }
8596
8597 if (unsigned PartialCheck = Test & fcInf) {
8598 if (PartialCheck == fcPosInf)
8599 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
8600 else if (PartialCheck == fcInf)
8601 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
8602 else { // ISD::fcNegInf
8603 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
8604 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
8605 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
8606 }
8607 appendResult(PartialRes);
8608 }
8609
8610 if (unsigned PartialCheck = Test & fcNan) {
8611 APInt InfWithQnanBit = Inf | QNaNBitMask;
8612 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
8613 if (PartialCheck == fcNan) {
8614 // isnan(V) ==> abs(V) > int(inf)
8615 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8616 if (IsF80) {
8617 // Recognize unsupported values as NaNs for compatibility with glibc.
8618 // In them (exp(V)==0) == int_bit.
8619 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
8620 SDValue ExpIsZero =
8621 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8622 SDValue IsPseudo =
8623 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
8624 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
8625 }
8626 } else if (PartialCheck == fcQNan) {
8627 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8628 PartialRes =
8629 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
8630 } else { // ISD::fcSNan
8631 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8632 // abs(V) < (unsigned(Inf) | quiet_bit)
8633 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8634 SDValue IsNotQnan =
8635 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
8636 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
8637 }
8638 appendResult(PartialRes);
8639 }
8640
8641 if (unsigned PartialCheck = Test & fcNormal) {
8642 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8643 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
8644 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
8645 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
8646 APInt ExpLimit = ExpMask - ExpLSB;
8647 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
8648 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
8649 if (PartialCheck == fcNegNormal)
8650 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8651 else if (PartialCheck == fcPosNormal) {
8652 SDValue PosSignV =
8653 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
8654 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
8655 }
8656 if (IsF80)
8657 PartialRes =
8658 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
8659 appendResult(PartialRes);
8660 }
8661
8662 if (!Res)
8663 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
8664 if (IsInverted)
8665 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
8666 return Res;
8667}
8668
8669// Only expand vector types if we have the appropriate vector bit operations.
8670static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8671 assert(VT.isVector() && "Expected vector type");
8672 unsigned Len = VT.getScalarSizeInBits();
8673 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
8674 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
8675 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8676 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
8677 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
8678}
8679
8680SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8681 SDLoc dl(Node);
8682 EVT VT = Node->getValueType(ResNo: 0);
8683 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8684 SDValue Op = Node->getOperand(Num: 0);
8685 unsigned Len = VT.getScalarSizeInBits();
8686 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8687
8688 // TODO: Add support for irregular type lengths.
8689 if (!(Len <= 128 && Len % 8 == 0))
8690 return SDValue();
8691
8692 // Only expand vector types if we have the appropriate vector bit operations.
8693 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
8694 return SDValue();
8695
8696 // This is the "best" algorithm from
8697 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8698 SDValue Mask55 =
8699 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
8700 SDValue Mask33 =
8701 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
8702 SDValue Mask0F =
8703 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
8704
8705 // v = v - ((v >> 1) & 0x55555555...)
8706 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
8707 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8708 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8709 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
8710 N2: Mask55));
8711 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8712 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
8713 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8714 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8715 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
8716 N2: Mask33));
8717 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8718 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8719 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8720 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8721 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
8722 N2: Mask0F);
8723
8724 if (Len <= 8)
8725 return Op;
8726
8727 // Avoid the multiply if we only have 2 bytes to add.
8728 // TODO: Only doing this for scalars because vectors weren't as obviously
8729 // improved.
8730 if (Len == 16 && !VT.isVector()) {
8731 // v = (v + (v >> 8)) & 0x00FF;
8732 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8733 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8734 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8735 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
8736 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
8737 }
8738
8739 // v = (v * 0x01010101...) >> (Len - 8)
8740 SDValue V;
8741 if (isOperationLegalOrCustomOrPromote(
8742 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8743 SDValue Mask01 =
8744 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
8745 V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
8746 } else {
8747 V = Op;
8748 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8749 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
8750 V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
8751 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
8752 }
8753 }
8754 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
8755}
8756
8757SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8758 SDLoc dl(Node);
8759 EVT VT = Node->getValueType(ResNo: 0);
8760 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8761 SDValue Op = Node->getOperand(Num: 0);
8762 SDValue Mask = Node->getOperand(Num: 1);
8763 SDValue VL = Node->getOperand(Num: 2);
8764 unsigned Len = VT.getScalarSizeInBits();
8765 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8766
8767 // TODO: Add support for irregular type lengths.
8768 if (!(Len <= 128 && Len % 8 == 0))
8769 return SDValue();
8770
8771 // This is same algorithm of expandCTPOP from
8772 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8773 SDValue Mask55 =
8774 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
8775 SDValue Mask33 =
8776 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
8777 SDValue Mask0F =
8778 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
8779
8780 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8781
8782 // v = v - ((v >> 1) & 0x55555555...)
8783 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8784 N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8785 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8786 N2: Mask55, N3: Mask, N4: VL);
8787 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
8788
8789 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8790 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
8791 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8792 N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8793 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8794 N2: Mask33, N3: Mask, N4: VL);
8795 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
8796
8797 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8798 Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
8799 N3: Mask, N4: VL),
8800 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
8801 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
8802
8803 if (Len <= 8)
8804 return Op;
8805
8806 // v = (v * 0x01010101...) >> (Len - 8)
8807 SDValue V;
8808 if (isOperationLegalOrCustomOrPromote(
8809 Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8810 SDValue Mask01 =
8811 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
8812 V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
8813 } else {
8814 V = Op;
8815 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8816 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
8817 V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
8818 N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
8819 N3: Mask, N4: VL);
8820 }
8821 }
8822 return DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: V,
8823 N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT), N3: Mask, N4: VL);
8824}
8825
8826SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8827 SDLoc dl(Node);
8828 EVT VT = Node->getValueType(ResNo: 0);
8829 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8830 SDValue Op = Node->getOperand(Num: 0);
8831 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8832
8833 // If the non-ZERO_UNDEF version is supported we can use that instead.
8834 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8835 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
8836 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
8837
8838 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8839 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
8840 EVT SetCCVT =
8841 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8842 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8843 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8844 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8845 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8846 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
8847 }
8848
8849 // Only expand vector types if we have the appropriate vector bit operations.
8850 // This includes the operations needed to expand CTPOP if it isn't supported.
8851 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
8852 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8853 !canExpandVectorCTPOP(TLI: *this, VT)) ||
8854 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8855 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8856 return SDValue();
8857
8858 // for now, we do this:
8859 // x = x | (x >> 1);
8860 // x = x | (x >> 2);
8861 // ...
8862 // x = x | (x >>16);
8863 // x = x | (x >>32); // for 64-bit input
8864 // return popcount(~x);
8865 //
8866 // Ref: "Hacker's Delight" by Henry Warren
8867 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8868 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
8869 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
8870 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
8871 }
8872 Op = DAG.getNOT(DL: dl, Val: Op, VT);
8873 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
8874}
8875
8876SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8877 SDLoc dl(Node);
8878 EVT VT = Node->getValueType(ResNo: 0);
8879 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8880 SDValue Op = Node->getOperand(Num: 0);
8881 SDValue Mask = Node->getOperand(Num: 1);
8882 SDValue VL = Node->getOperand(Num: 2);
8883 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8884
8885 // do this:
8886 // x = x | (x >> 1);
8887 // x = x | (x >> 2);
8888 // ...
8889 // x = x | (x >>16);
8890 // x = x | (x >>32); // for 64-bit input
8891 // return popcount(~x);
8892 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8893 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
8894 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
8895 N2: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
8896 N4: VL);
8897 }
8898 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: -1, DL: dl, VT), N3: Mask,
8899 N4: VL);
8900 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
8901}
8902
8903SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8904 const SDLoc &DL, EVT VT, SDValue Op,
8905 unsigned BitWidth) const {
8906 if (BitWidth != 32 && BitWidth != 64)
8907 return SDValue();
8908 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8909 : APInt(64, 0x0218A392CD3D5DBFULL);
8910 const DataLayout &TD = DAG.getDataLayout();
8911 MachinePointerInfo PtrInfo =
8912 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
8913 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
8914 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
8915 SDValue Lookup = DAG.getNode(
8916 Opcode: ISD::SRL, DL, VT,
8917 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
8918 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
8919 N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
8920 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
8921
8922 SmallVector<uint8_t> Table(BitWidth, 0);
8923 for (unsigned i = 0; i < BitWidth; i++) {
8924 APInt Shl = DeBruijn.shl(shiftAmt: i);
8925 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
8926 Table[Lshr.getZExtValue()] = i;
8927 }
8928
8929 // Create a ConstantArray in Constant Pool
8930 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
8931 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
8932 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
8933 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8934 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8935 PtrInfo, MVT::i8);
8936 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8937 return ExtLoad;
8938
8939 EVT SetCCVT =
8940 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8941 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
8942 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8943 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
8944 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
8945}
8946
8947SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8948 SDLoc dl(Node);
8949 EVT VT = Node->getValueType(ResNo: 0);
8950 SDValue Op = Node->getOperand(Num: 0);
8951 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8952
8953 // If the non-ZERO_UNDEF version is supported we can use that instead.
8954 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8955 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
8956 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
8957
8958 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8959 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
8960 EVT SetCCVT =
8961 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8962 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8963 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8964 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8965 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8966 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
8967 }
8968
8969 // Only expand vector types if we have the appropriate vector bit operations.
8970 // This includes the operations needed to expand CTPOP if it isn't supported.
8971 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
8972 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8973 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
8974 !canExpandVectorCTPOP(TLI: *this, VT)) ||
8975 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8976 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
8977 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
8978 return SDValue();
8979
8980 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8981 if (!VT.isVector() && isOperationExpand(Op: ISD::CTPOP, VT) &&
8982 !isOperationLegal(Op: ISD::CTLZ, VT))
8983 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
8984 return V;
8985
8986 // for now, we use: { return popcount(~x & (x - 1)); }
8987 // unless the target has ctlz but not ctpop, in which case we use:
8988 // { return 32 - nlz(~x & (x-1)); }
8989 // Ref: "Hacker's Delight" by Henry Warren
8990 SDValue Tmp = DAG.getNode(
8991 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
8992 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
8993
8994 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8995 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
8996 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
8997 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
8998 }
8999
9000 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9001}
9002
9003SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9004 SDValue Op = Node->getOperand(Num: 0);
9005 SDValue Mask = Node->getOperand(Num: 1);
9006 SDValue VL = Node->getOperand(Num: 2);
9007 SDLoc dl(Node);
9008 EVT VT = Node->getValueType(ResNo: 0);
9009
9010 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9011 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9012 N2: DAG.getConstant(Val: -1, DL: dl, VT), N3: Mask, N4: VL);
9013 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9014 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
9015 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9016 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9017}
9018
9019SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9020 bool IsNegative) const {
9021 SDLoc dl(N);
9022 EVT VT = N->getValueType(ResNo: 0);
9023 SDValue Op = N->getOperand(Num: 0);
9024
9025 // abs(x) -> smax(x,sub(0,x))
9026 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9027 isOperationLegal(Op: ISD::SMAX, VT)) {
9028 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9029 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9030 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9031 }
9032
9033 // abs(x) -> umin(x,sub(0,x))
9034 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9035 isOperationLegal(Op: ISD::UMIN, VT)) {
9036 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9037 Op = DAG.getFreeze(V: Op);
9038 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9039 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9040 }
9041
9042 // 0 - abs(x) -> smin(x, sub(0,x))
9043 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9044 isOperationLegal(Op: ISD::SMIN, VT)) {
9045 Op = DAG.getFreeze(V: Op);
9046 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9047 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9048 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9049 }
9050
9051 // Only expand vector types if we have the appropriate vector operations.
9052 if (VT.isVector() &&
9053 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
9054 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
9055 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
9056 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9057 return SDValue();
9058
9059 Op = DAG.getFreeze(V: Op);
9060 SDValue Shift = DAG.getNode(
9061 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9062 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
9063 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9064
9065 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9066 if (!IsNegative)
9067 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9068
9069 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9070 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9071}
9072
9073SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9074 SDLoc dl(N);
9075 EVT VT = N->getValueType(ResNo: 0);
9076 SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: 0));
9077 SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: 1));
9078 bool IsSigned = N->getOpcode() == ISD::ABDS;
9079
9080 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9081 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9082 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9083 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9084 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9085 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9086 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9087 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9088 }
9089
9090 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9091 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9092 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9093 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9094 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9095
9096 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9097 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9098 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9099 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9100 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9101 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9102 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9103}
9104
9105SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9106 SDLoc dl(N);
9107 EVT VT = N->getValueType(ResNo: 0);
9108 SDValue Op = N->getOperand(Num: 0);
9109
9110 if (!VT.isSimple())
9111 return SDValue();
9112
9113 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9114 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9115 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9116 default:
9117 return SDValue();
9118 case MVT::i16:
9119 // Use a rotate by 8. This can be further expanded if necessary.
9120 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9121 case MVT::i32:
9122 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9123 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9124 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
9125 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9126 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9127 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
9128 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9129 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9130 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9131 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9132 case MVT::i64:
9133 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
9134 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9135 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
9136 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
9137 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9138 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
9139 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9140 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9141 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
9142 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9143 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9144 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9145 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
9146 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9147 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9148 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
9149 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
9150 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9151 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
9152 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
9153 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9154 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9155 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9156 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9157 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9158 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9159 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9160 }
9161}
9162
9163SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9164 SDLoc dl(N);
9165 EVT VT = N->getValueType(ResNo: 0);
9166 SDValue Op = N->getOperand(Num: 0);
9167 SDValue Mask = N->getOperand(Num: 1);
9168 SDValue EVL = N->getOperand(Num: 2);
9169
9170 if (!VT.isSimple())
9171 return SDValue();
9172
9173 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9174 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9175 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9176 default:
9177 return SDValue();
9178 case MVT::i16:
9179 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9180 N3: Mask, N4: EVL);
9181 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9182 N3: Mask, N4: EVL);
9183 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9184 case MVT::i32:
9185 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9186 N3: Mask, N4: EVL);
9187 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
9188 N3: Mask, N4: EVL);
9189 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9190 N3: Mask, N4: EVL);
9191 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9192 N3: Mask, N4: EVL);
9193 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9194 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
9195 Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9196 N3: Mask, N4: EVL);
9197 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9198 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9199 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9200 case MVT::i64:
9201 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
9202 N3: Mask, N4: EVL);
9203 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9204 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
9205 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
9206 N3: Mask, N4: EVL);
9207 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9208 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
9209 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9210 N3: Mask, N4: EVL);
9211 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9212 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
9213 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9214 N3: Mask, N4: EVL);
9215 Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9216 N3: Mask, N4: EVL);
9217 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9218 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
9219 Tmp3 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9220 N3: Mask, N4: EVL);
9221 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9222 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
9223 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
9224 N3: Mask, N4: EVL);
9225 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9226 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
9227 Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
9228 N3: Mask, N4: EVL);
9229 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9230 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9231 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9232 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9233 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9234 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9235 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9236 }
9237}
9238
9239SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9240 SDLoc dl(N);
9241 EVT VT = N->getValueType(ResNo: 0);
9242 SDValue Op = N->getOperand(Num: 0);
9243 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9244 unsigned Sz = VT.getScalarSizeInBits();
9245
9246 SDValue Tmp, Tmp2, Tmp3;
9247
9248 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9249 // and finally the i1 pairs.
9250 // TODO: We can easily support i4/i2 legal types if any target ever does.
9251 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
9252 // Create the masks - repeating the pattern every byte.
9253 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
9254 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
9255 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
9256
9257 // BSWAP if the type is wider than a single byte.
9258 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
9259
9260 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9261 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
9262 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9263 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9264 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
9265 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9266
9267 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9268 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
9269 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9270 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9271 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
9272 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9273
9274 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9275 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
9276 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9277 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9278 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
9279 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9280 return Tmp;
9281 }
9282
9283 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
9284 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9285 if (I < J)
9286 Tmp2 =
9287 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
9288 else
9289 Tmp2 =
9290 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
9291
9292 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
9293 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
9294 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
9295 }
9296
9297 return Tmp;
9298}
9299
9300SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9301 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9302
9303 SDLoc dl(N);
9304 EVT VT = N->getValueType(ResNo: 0);
9305 SDValue Op = N->getOperand(Num: 0);
9306 SDValue Mask = N->getOperand(Num: 1);
9307 SDValue EVL = N->getOperand(Num: 2);
9308 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9309 unsigned Sz = VT.getScalarSizeInBits();
9310
9311 SDValue Tmp, Tmp2, Tmp3;
9312
9313 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9314 // and finally the i1 pairs.
9315 // TODO: We can easily support i4/i2 legal types if any target ever does.
9316 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
9317 // Create the masks - repeating the pattern every byte.
9318 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
9319 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
9320 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
9321
9322 // BSWAP if the type is wider than a single byte.
9323 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
9324
9325 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9326 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
9327 N3: Mask, N4: EVL);
9328 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9329 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
9330 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
9331 N3: Mask, N4: EVL);
9332 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
9333 N3: Mask, N4: EVL);
9334 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9335
9336 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9337 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
9338 N3: Mask, N4: EVL);
9339 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9340 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
9341 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
9342 N3: Mask, N4: EVL);
9343 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
9344 N3: Mask, N4: EVL);
9345 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9346
9347 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9348 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
9349 N3: Mask, N4: EVL);
9350 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9351 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
9352 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
9353 N3: Mask, N4: EVL);
9354 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
9355 N3: Mask, N4: EVL);
9356 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9357 return Tmp;
9358 }
9359 return SDValue();
9360}
9361
9362std::pair<SDValue, SDValue>
9363TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9364 SelectionDAG &DAG) const {
9365 SDLoc SL(LD);
9366 SDValue Chain = LD->getChain();
9367 SDValue BasePTR = LD->getBasePtr();
9368 EVT SrcVT = LD->getMemoryVT();
9369 EVT DstVT = LD->getValueType(ResNo: 0);
9370 ISD::LoadExtType ExtType = LD->getExtensionType();
9371
9372 if (SrcVT.isScalableVector())
9373 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
9374
9375 unsigned NumElem = SrcVT.getVectorNumElements();
9376
9377 EVT SrcEltVT = SrcVT.getScalarType();
9378 EVT DstEltVT = DstVT.getScalarType();
9379
9380 // A vector must always be stored in memory as-is, i.e. without any padding
9381 // between the elements, since various code depend on it, e.g. in the
9382 // handling of a bitcast of a vector type to int, which may be done with a
9383 // vector store followed by an integer load. A vector that does not have
9384 // elements that are byte-sized must therefore be stored as an integer
9385 // built out of the extracted vector elements.
9386 if (!SrcEltVT.isByteSized()) {
9387 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9388 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
9389
9390 unsigned NumSrcBits = SrcVT.getSizeInBits();
9391 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
9392
9393 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9394 SDValue SrcEltBitMask = DAG.getConstant(
9395 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
9396
9397 // Load the whole vector and avoid masking off the top bits as it makes
9398 // the codegen worse.
9399 SDValue Load =
9400 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
9401 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getOriginalAlign(),
9402 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9403
9404 SmallVector<SDValue, 8> Vals;
9405 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9406 unsigned ShiftIntoIdx =
9407 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9408 SDValue ShiftAmount =
9409 DAG.getShiftAmountConstant(Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9410 VT: LoadVT, DL: SL, /*LegalTypes=*/false);
9411 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
9412 SDValue Elt =
9413 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
9414 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
9415
9416 if (ExtType != ISD::NON_EXTLOAD) {
9417 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
9418 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
9419 }
9420
9421 Vals.push_back(Elt: Scalar);
9422 }
9423
9424 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9425 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
9426 }
9427
9428 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9429 assert(SrcEltVT.isByteSized());
9430
9431 SmallVector<SDValue, 8> Vals;
9432 SmallVector<SDValue, 8> LoadChains;
9433
9434 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9435 SDValue ScalarLoad =
9436 DAG.getExtLoad(ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
9437 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride),
9438 MemVT: SrcEltVT, Alignment: LD->getOriginalAlign(),
9439 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9440
9441 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
9442
9443 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
9444 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
9445 }
9446
9447 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9448 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9449
9450 return std::make_pair(x&: Value, y&: NewChain);
9451}
9452
9453SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9454 SelectionDAG &DAG) const {
9455 SDLoc SL(ST);
9456
9457 SDValue Chain = ST->getChain();
9458 SDValue BasePtr = ST->getBasePtr();
9459 SDValue Value = ST->getValue();
9460 EVT StVT = ST->getMemoryVT();
9461
9462 if (StVT.isScalableVector())
9463 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
9464
9465 // The type of the data we want to save
9466 EVT RegVT = Value.getValueType();
9467 EVT RegSclVT = RegVT.getScalarType();
9468
9469 // The type of data as saved in memory.
9470 EVT MemSclVT = StVT.getScalarType();
9471
9472 unsigned NumElem = StVT.getVectorNumElements();
9473
9474 // A vector must always be stored in memory as-is, i.e. without any padding
9475 // between the elements, since various code depend on it, e.g. in the
9476 // handling of a bitcast of a vector type to int, which may be done with a
9477 // vector store followed by an integer load. A vector that does not have
9478 // elements that are byte-sized must therefore be stored as an integer
9479 // built out of the extracted vector elements.
9480 if (!MemSclVT.isByteSized()) {
9481 unsigned NumBits = StVT.getSizeInBits();
9482 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
9483
9484 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
9485
9486 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9487 SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9488 N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9489 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
9490 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
9491 unsigned ShiftIntoIdx =
9492 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9493 SDValue ShiftAmount =
9494 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
9495 SDValue ShiftedElt =
9496 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
9497 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
9498 }
9499
9500 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
9501 Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9502 AAInfo: ST->getAAInfo());
9503 }
9504
9505 // Store Stride in bytes
9506 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9507 assert(Stride && "Zero stride!");
9508 // Extract each of the elements from the original vector and save them into
9509 // memory individually.
9510 SmallVector<SDValue, 8> Stores;
9511 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9512 SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9513 N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9514
9515 SDValue Ptr =
9516 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
9517
9518 // This scalar TruncStore may be illegal, but we legalize it later.
9519 SDValue Store = DAG.getTruncStore(
9520 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
9521 SVT: MemSclVT, Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9522 AAInfo: ST->getAAInfo());
9523
9524 Stores.push_back(Elt: Store);
9525 }
9526
9527 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9528}
9529
9530std::pair<SDValue, SDValue>
9531TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
9532 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9533 "unaligned indexed loads not implemented!");
9534 SDValue Chain = LD->getChain();
9535 SDValue Ptr = LD->getBasePtr();
9536 EVT VT = LD->getValueType(ResNo: 0);
9537 EVT LoadedVT = LD->getMemoryVT();
9538 SDLoc dl(LD);
9539 auto &MF = DAG.getMachineFunction();
9540
9541 if (VT.isFloatingPoint() || VT.isVector()) {
9542 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
9543 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
9544 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
9545 LoadedVT.isVector()) {
9546 // Scalarize the load and let the individual components be handled.
9547 return scalarizeVectorLoad(LD, DAG);
9548 }
9549
9550 // Expand to a (misaligned) integer load of the same size,
9551 // then bitconvert to floating point or vector.
9552 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
9553 MMO: LD->getMemOperand());
9554 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
9555 if (LoadedVT != VT)
9556 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
9557 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
9558
9559 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
9560 }
9561
9562 // Copy the value to a (aligned) stack slot using (unaligned) integer
9563 // loads and stores, then do a (aligned) load from the stack slot.
9564 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
9565 unsigned LoadedBytes = LoadedVT.getStoreSize();
9566 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9567 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9568
9569 // Make sure the stack slot is also aligned for the register type.
9570 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
9571 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
9572 SmallVector<SDValue, 8> Stores;
9573 SDValue StackPtr = StackBase;
9574 unsigned Offset = 0;
9575
9576 EVT PtrVT = Ptr.getValueType();
9577 EVT StackPtrVT = StackPtr.getValueType();
9578
9579 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9580 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9581
9582 // Do all but one copies using the full register width.
9583 for (unsigned i = 1; i < NumRegs; i++) {
9584 // Load one integer register's worth from the original location.
9585 SDValue Load = DAG.getLoad(
9586 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
9587 Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9588 AAInfo: LD->getAAInfo());
9589 // Follow the load with a store to the stack slot. Remember the store.
9590 Stores.push_back(Elt: DAG.getStore(
9591 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
9592 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
9593 // Increment the pointers.
9594 Offset += RegBytes;
9595
9596 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9597 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9598 }
9599
9600 // The last copy may be partial. Do an extending load.
9601 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
9602 BitWidth: 8 * (LoadedBytes - Offset));
9603 SDValue Load =
9604 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
9605 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT,
9606 Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9607 AAInfo: LD->getAAInfo());
9608 // Follow the load with a store to the stack slot. Remember the store.
9609 // On big-endian machines this requires a truncating store to ensure
9610 // that the bits end up in the right place.
9611 Stores.push_back(Elt: DAG.getTruncStore(
9612 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
9613 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
9614
9615 // The order of the stores doesn't matter - say it with a TokenFactor.
9616 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9617
9618 // Finally, perform the original load only redirected to the stack slot.
9619 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
9620 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
9621 MemVT: LoadedVT);
9622
9623 // Callers expect a MERGE_VALUES node.
9624 return std::make_pair(x&: Load, y&: TF);
9625 }
9626
9627 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9628 "Unaligned load of unsupported type.");
9629
9630 // Compute the new VT that is half the size of the old one. This is an
9631 // integer MVT.
9632 unsigned NumBits = LoadedVT.getSizeInBits();
9633 EVT NewLoadedVT;
9634 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
9635 NumBits >>= 1;
9636
9637 Align Alignment = LD->getOriginalAlign();
9638 unsigned IncrementSize = NumBits / 8;
9639 ISD::LoadExtType HiExtType = LD->getExtensionType();
9640
9641 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9642 if (HiExtType == ISD::NON_EXTLOAD)
9643 HiExtType = ISD::ZEXTLOAD;
9644
9645 // Load the value in two parts
9646 SDValue Lo, Hi;
9647 if (DAG.getDataLayout().isLittleEndian()) {
9648 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9649 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9650 AAInfo: LD->getAAInfo());
9651
9652 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9653 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
9654 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9655 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9656 AAInfo: LD->getAAInfo());
9657 } else {
9658 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9659 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9660 AAInfo: LD->getAAInfo());
9661
9662 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9663 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9664 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9665 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9666 AAInfo: LD->getAAInfo());
9667 }
9668
9669 // aggregate the two parts
9670 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
9671 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
9672 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
9673
9674 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9675 Hi.getValue(1));
9676
9677 return std::make_pair(x&: Result, y&: TF);
9678}
9679
9680SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9681 SelectionDAG &DAG) const {
9682 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9683 "unaligned indexed stores not implemented!");
9684 SDValue Chain = ST->getChain();
9685 SDValue Ptr = ST->getBasePtr();
9686 SDValue Val = ST->getValue();
9687 EVT VT = Val.getValueType();
9688 Align Alignment = ST->getOriginalAlign();
9689 auto &MF = DAG.getMachineFunction();
9690 EVT StoreMemVT = ST->getMemoryVT();
9691
9692 SDLoc dl(ST);
9693 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9694 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
9695 if (isTypeLegal(VT: intVT)) {
9696 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
9697 StoreMemVT.isVector()) {
9698 // Scalarize the store and let the individual components be handled.
9699 SDValue Result = scalarizeVectorStore(ST, DAG);
9700 return Result;
9701 }
9702 // Expand to a bitconvert of the value to the integer type of the
9703 // same size, then a (misaligned) int store.
9704 // FIXME: Does not handle truncating floating point stores!
9705 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
9706 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
9707 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
9708 return Result;
9709 }
9710 // Do a (aligned) store to a stack slot, then copy from the stack slot
9711 // to the final destination using (unaligned) integer loads and stores.
9712 MVT RegVT = getRegisterType(
9713 Context&: *DAG.getContext(),
9714 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
9715 EVT PtrVT = Ptr.getValueType();
9716 unsigned StoredBytes = StoreMemVT.getStoreSize();
9717 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9718 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9719
9720 // Make sure the stack slot is also aligned for the register type.
9721 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
9722 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
9723
9724 // Perform the original store, only redirected to the stack slot.
9725 SDValue Store = DAG.getTruncStore(
9726 Chain, dl, Val, Ptr: StackPtr,
9727 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
9728
9729 EVT StackPtrVT = StackPtr.getValueType();
9730
9731 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9732 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9733 SmallVector<SDValue, 8> Stores;
9734 unsigned Offset = 0;
9735
9736 // Do all but one copies using the full register width.
9737 for (unsigned i = 1; i < NumRegs; i++) {
9738 // Load one integer register's worth from the stack slot.
9739 SDValue Load = DAG.getLoad(
9740 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
9741 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
9742 // Store it to the final location. Remember the store.
9743 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
9744 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
9745 Alignment: ST->getOriginalAlign(),
9746 MMOFlags: ST->getMemOperand()->getFlags()));
9747 // Increment the pointers.
9748 Offset += RegBytes;
9749 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9750 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9751 }
9752
9753 // The last store may be partial. Do a truncating store. On big-endian
9754 // machines this requires an extending load from the stack slot to ensure
9755 // that the bits are in the right place.
9756 EVT LoadMemVT =
9757 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
9758
9759 // Load from the stack slot.
9760 SDValue Load = DAG.getExtLoad(
9761 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
9762 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
9763
9764 Stores.push_back(
9765 Elt: DAG.getTruncStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
9766 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
9767 Alignment: ST->getOriginalAlign(),
9768 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
9769 // The order of the stores doesn't matter - say it with a TokenFactor.
9770 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9771 return Result;
9772 }
9773
9774 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9775 "Unaligned store of unknown type.");
9776 // Get the half-size VT
9777 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
9778 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9779 unsigned IncrementSize = NumBits / 8;
9780
9781 // Divide the stored value in two parts.
9782 SDValue ShiftAmount =
9783 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
9784 SDValue Lo = Val;
9785 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9786 // fold and not use the upper bits. A smaller constant may be easier to
9787 // materialize.
9788 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
9789 Lo = DAG.getNode(
9790 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
9791 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
9792 VT));
9793 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
9794
9795 // Store the two parts
9796 SDValue Store1, Store2;
9797 Store1 = DAG.getTruncStore(Chain, dl,
9798 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9799 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
9800 MMOFlags: ST->getMemOperand()->getFlags());
9801
9802 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9803 Store2 = DAG.getTruncStore(
9804 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9805 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
9806 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
9807
9808 SDValue Result =
9809 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9810 return Result;
9811}
9812
9813SDValue
9814TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9815 const SDLoc &DL, EVT DataVT,
9816 SelectionDAG &DAG,
9817 bool IsCompressedMemory) const {
9818 SDValue Increment;
9819 EVT AddrVT = Addr.getValueType();
9820 EVT MaskVT = Mask.getValueType();
9821 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9822 "Incompatible types of Data and Mask");
9823 if (IsCompressedMemory) {
9824 if (DataVT.isScalableVector())
9825 report_fatal_error(
9826 reason: "Cannot currently handle compressed memory with scalable vectors");
9827 // Incrementing the pointer according to number of '1's in the mask.
9828 EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
9829 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
9830 if (MaskIntVT.getSizeInBits() < 32) {
9831 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9832 MaskIntVT = MVT::i32;
9833 }
9834
9835 // Count '1's with POPCNT.
9836 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
9837 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
9838 // Scale is an element size in bytes.
9839 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
9840 VT: AddrVT);
9841 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
9842 } else if (DataVT.isScalableVector()) {
9843 Increment = DAG.getVScale(DL, VT: AddrVT,
9844 MulImm: APInt(AddrVT.getFixedSizeInBits(),
9845 DataVT.getStoreSize().getKnownMinValue()));
9846 } else
9847 Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
9848
9849 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
9850}
9851
9852static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9853 EVT VecVT, const SDLoc &dl,
9854 ElementCount SubEC) {
9855 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9856 "Cannot index a scalable vector within a fixed-width vector");
9857
9858 unsigned NElts = VecVT.getVectorMinNumElements();
9859 unsigned NumSubElts = SubEC.getKnownMinValue();
9860 EVT IdxVT = Idx.getValueType();
9861
9862 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9863 // If this is a constant index and we know the value plus the number of the
9864 // elements in the subvector minus one is less than the minimum number of
9865 // elements then it's safe to return Idx.
9866 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
9867 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9868 return Idx;
9869 SDValue VS =
9870 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
9871 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9872 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
9873 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
9874 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
9875 }
9876 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
9877 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
9878 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
9879 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
9880 }
9881 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9882 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
9883 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
9884}
9885
9886SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9887 SDValue VecPtr, EVT VecVT,
9888 SDValue Index) const {
9889 return getVectorSubVecPointer(
9890 DAG, VecPtr, VecVT,
9891 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
9892 Index);
9893}
9894
9895SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9896 SDValue VecPtr, EVT VecVT,
9897 EVT SubVecVT,
9898 SDValue Index) const {
9899 SDLoc dl(Index);
9900 // Make sure the index type is big enough to compute in.
9901 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
9902
9903 EVT EltVT = VecVT.getVectorElementType();
9904
9905 // Calculate the element offset and add it to the pointer.
9906 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9907 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9908 "Converting bits to bytes lost precision");
9909 assert(SubVecVT.getVectorElementType() == EltVT &&
9910 "Sub-vector must be a vector with matching element type");
9911 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
9912 SubEC: SubVecVT.getVectorElementCount());
9913
9914 EVT IdxVT = Index.getValueType();
9915 if (SubVecVT.isScalableVector())
9916 Index =
9917 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9918 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
9919
9920 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9921 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
9922 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
9923}
9924
9925//===----------------------------------------------------------------------===//
9926// Implementation of Emulated TLS Model
9927//===----------------------------------------------------------------------===//
9928
9929SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9930 SelectionDAG &DAG) const {
9931 // Access to address of TLS varialbe xyz is lowered to a function call:
9932 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9933 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
9934 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
9935 SDLoc dl(GA);
9936
9937 ArgListTy Args;
9938 ArgListEntry Entry;
9939 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9940 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9941 StringRef EmuTlsVarName(NameString);
9942 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(Name: EmuTlsVarName);
9943 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9944 Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
9945 Entry.Ty = VoidPtrType;
9946 Args.push_back(x: Entry);
9947
9948 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
9949
9950 TargetLowering::CallLoweringInfo CLI(DAG);
9951 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9952 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
9953 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9954
9955 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9956 // At last for X86 targets, maybe good for other targets too?
9957 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9958 MFI.setAdjustsStack(true); // Is this only for X86 target?
9959 MFI.setHasCalls(true);
9960
9961 assert((GA->getOffset() == 0) &&
9962 "Emulated TLS must have zero offset in GlobalAddressSDNode");
9963 return CallResult.first;
9964}
9965
9966SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9967 SelectionDAG &DAG) const {
9968 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9969 if (!isCtlzFast())
9970 return SDValue();
9971 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
9972 SDLoc dl(Op);
9973 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
9974 EVT VT = Op.getOperand(i: 0).getValueType();
9975 SDValue Zext = Op.getOperand(i: 0);
9976 if (VT.bitsLT(MVT::i32)) {
9977 VT = MVT::i32;
9978 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
9979 }
9980 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
9981 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
9982 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9983 DAG.getConstant(Log2b, dl, MVT::i32));
9984 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9985 }
9986 return SDValue();
9987}
9988
9989SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
9990 SDValue Op0 = Node->getOperand(Num: 0);
9991 SDValue Op1 = Node->getOperand(Num: 1);
9992 EVT VT = Op0.getValueType();
9993 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9994 unsigned Opcode = Node->getOpcode();
9995 SDLoc DL(Node);
9996
9997 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9998 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
9999 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10000 Op0 = DAG.getFreeze(V: Op0);
10001 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
10002 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10003 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
10004 }
10005
10006 // umin(x,y) -> sub(x,usubsat(x,y))
10007 // TODO: Missing freeze(Op0)?
10008 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
10009 isOperationLegal(Op: ISD::USUBSAT, VT)) {
10010 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10011 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
10012 }
10013
10014 // umax(x,y) -> add(x,usubsat(y,x))
10015 // TODO: Missing freeze(Op0)?
10016 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
10017 isOperationLegal(Op: ISD::USUBSAT, VT)) {
10018 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
10019 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
10020 }
10021
10022 // FIXME: Should really try to split the vector in case it's legal on a
10023 // subvector.
10024 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10025 return DAG.UnrollVectorOp(N: Node);
10026
10027 // Attempt to find an existing SETCC node that we can reuse.
10028 // TODO: Do we need a generic doesSETCCNodeExist?
10029 // TODO: Missing freeze(Op0)/freeze(Op1)?
10030 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10031 ISD::CondCode PrefCommuteCC,
10032 ISD::CondCode AltCommuteCC) {
10033 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
10034 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10035 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10036 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10037 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10038 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10039 }
10040 }
10041 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10042 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10043 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10044 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10045 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
10046 }
10047 }
10048 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
10049 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10050 };
10051
10052 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10053 // -> Y = (A < B) ? B : A
10054 // -> Y = (A >= B) ? A : B
10055 // -> Y = (A <= B) ? B : A
10056 switch (Opcode) {
10057 case ISD::SMAX:
10058 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10059 case ISD::SMIN:
10060 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10061 case ISD::UMAX:
10062 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10063 case ISD::UMIN:
10064 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10065 }
10066
10067 llvm_unreachable("How did we get here?");
10068}
10069
10070SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10071 unsigned Opcode = Node->getOpcode();
10072 SDValue LHS = Node->getOperand(Num: 0);
10073 SDValue RHS = Node->getOperand(Num: 1);
10074 EVT VT = LHS.getValueType();
10075 SDLoc dl(Node);
10076
10077 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10078 assert(VT.isInteger() && "Expected operands to be integers");
10079
10080 // usub.sat(a, b) -> umax(a, b) - b
10081 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10082 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10083 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10084 }
10085
10086 // uadd.sat(a, b) -> umin(a, ~b) + b
10087 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10088 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10089 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10090 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10091 }
10092
10093 unsigned OverflowOp;
10094 switch (Opcode) {
10095 case ISD::SADDSAT:
10096 OverflowOp = ISD::SADDO;
10097 break;
10098 case ISD::UADDSAT:
10099 OverflowOp = ISD::UADDO;
10100 break;
10101 case ISD::SSUBSAT:
10102 OverflowOp = ISD::SSUBO;
10103 break;
10104 case ISD::USUBSAT:
10105 OverflowOp = ISD::USUBO;
10106 break;
10107 default:
10108 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10109 "addition or subtraction node.");
10110 }
10111
10112 // FIXME: Should really try to split the vector in case it's legal on a
10113 // subvector.
10114 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10115 return DAG.UnrollVectorOp(N: Node);
10116
10117 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10118 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10119 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10120 SDValue SumDiff = Result.getValue(R: 0);
10121 SDValue Overflow = Result.getValue(R: 1);
10122 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10123 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10124
10125 if (Opcode == ISD::UADDSAT) {
10126 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10127 // (LHS + RHS) | OverflowMask
10128 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10129 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10130 }
10131 // Overflow ? 0xffff.... : (LHS + RHS)
10132 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10133 }
10134
10135 if (Opcode == ISD::USUBSAT) {
10136 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10137 // (LHS - RHS) & ~OverflowMask
10138 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10139 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10140 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10141 }
10142 // Overflow ? 0 : (LHS - RHS)
10143 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10144 }
10145
10146 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10147 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10148 APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10149
10150 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10151 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10152
10153 // If either of the operand signs are known, then they are guaranteed to
10154 // only saturate in one direction. If non-negative they will saturate
10155 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10156 //
10157 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10158 // sign of 'y' has to be flipped.
10159
10160 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10161 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10162 : KnownRHS.isNegative();
10163 if (LHSIsNonNegative || RHSIsNonNegative) {
10164 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10165 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10166 }
10167
10168 bool LHSIsNegative = KnownLHS.isNegative();
10169 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10170 : KnownRHS.isNonNegative();
10171 if (LHSIsNegative || RHSIsNegative) {
10172 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10173 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10174 }
10175 }
10176
10177 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10178 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10179 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10180 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10181 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
10182 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10183 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10184}
10185
10186SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10187 unsigned Opcode = Node->getOpcode();
10188 bool IsSigned = Opcode == ISD::SSHLSAT;
10189 SDValue LHS = Node->getOperand(Num: 0);
10190 SDValue RHS = Node->getOperand(Num: 1);
10191 EVT VT = LHS.getValueType();
10192 SDLoc dl(Node);
10193
10194 assert((Node->getOpcode() == ISD::SSHLSAT ||
10195 Node->getOpcode() == ISD::USHLSAT) &&
10196 "Expected a SHLSAT opcode");
10197 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10198 assert(VT.isInteger() && "Expected operands to be integers");
10199
10200 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10201 return DAG.UnrollVectorOp(N: Node);
10202
10203 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10204
10205 unsigned BW = VT.getScalarSizeInBits();
10206 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10207 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10208 SDValue Orig =
10209 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10210
10211 SDValue SatVal;
10212 if (IsSigned) {
10213 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10214 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10215 SDValue Cond =
10216 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
10217 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10218 } else {
10219 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
10220 }
10221 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
10222 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
10223}
10224
10225void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10226 bool Signed, EVT WideVT,
10227 const SDValue LL, const SDValue LH,
10228 const SDValue RL, const SDValue RH,
10229 SDValue &Lo, SDValue &Hi) const {
10230 // We can fall back to a libcall with an illegal type for the MUL if we
10231 // have a libcall big enough.
10232 // Also, we can fall back to a division in some cases, but that's a big
10233 // performance hit in the general case.
10234 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10235 if (WideVT == MVT::i16)
10236 LC = RTLIB::MUL_I16;
10237 else if (WideVT == MVT::i32)
10238 LC = RTLIB::MUL_I32;
10239 else if (WideVT == MVT::i64)
10240 LC = RTLIB::MUL_I64;
10241 else if (WideVT == MVT::i128)
10242 LC = RTLIB::MUL_I128;
10243
10244 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(Call: LC)) {
10245 // We'll expand the multiplication by brute force because we have no other
10246 // options. This is a trivially-generalized version of the code from
10247 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10248 // 4.3.1).
10249 EVT VT = LL.getValueType();
10250 unsigned Bits = VT.getSizeInBits();
10251 unsigned HalfBits = Bits >> 1;
10252 SDValue Mask =
10253 DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
10254 SDValue LLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LL, N2: Mask);
10255 SDValue RLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RL, N2: Mask);
10256
10257 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLL);
10258 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
10259
10260 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
10261 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
10262 SDValue LLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LL, N2: Shift);
10263 SDValue RLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RL, N2: Shift);
10264
10265 SDValue U = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10266 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLL), N2: TH);
10267 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
10268 SDValue UH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: U, N2: Shift);
10269
10270 SDValue V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10271 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLH), N2: UL);
10272 SDValue VH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: Shift);
10273
10274 SDValue W =
10275 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLH),
10276 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
10277 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
10278 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
10279
10280 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: W,
10281 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10282 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RH, N2: LL),
10283 N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RL, N2: LH)));
10284 } else {
10285 // Attempt a libcall.
10286 SDValue Ret;
10287 TargetLowering::MakeLibCallOptions CallOptions;
10288 CallOptions.setSExt(Signed);
10289 CallOptions.setIsPostTypeLegalization(true);
10290 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
10291 // Halves of WideVT are packed into registers in different order
10292 // depending on platform endianness. This is usually handled by
10293 // the C calling convention, but we can't defer to it in
10294 // the legalizer.
10295 SDValue Args[] = {LL, LH, RL, RH};
10296 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10297 } else {
10298 SDValue Args[] = {LH, LL, RH, RL};
10299 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10300 }
10301 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10302 "Ret value is a collection of constituent nodes holding result.");
10303 if (DAG.getDataLayout().isLittleEndian()) {
10304 // Same as above.
10305 Lo = Ret.getOperand(i: 0);
10306 Hi = Ret.getOperand(i: 1);
10307 } else {
10308 Lo = Ret.getOperand(i: 1);
10309 Hi = Ret.getOperand(i: 0);
10310 }
10311 }
10312}
10313
10314void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10315 bool Signed, const SDValue LHS,
10316 const SDValue RHS, SDValue &Lo,
10317 SDValue &Hi) const {
10318 EVT VT = LHS.getValueType();
10319 assert(RHS.getValueType() == VT && "Mismatching operand types");
10320
10321 SDValue HiLHS;
10322 SDValue HiRHS;
10323 if (Signed) {
10324 // The high part is obtained by SRA'ing all but one of the bits of low
10325 // part.
10326 unsigned LoSize = VT.getFixedSizeInBits();
10327 HiLHS = DAG.getNode(
10328 Opcode: ISD::SRA, DL: dl, VT, N1: LHS,
10329 N2: DAG.getConstant(Val: LoSize - 1, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10330 HiRHS = DAG.getNode(
10331 Opcode: ISD::SRA, DL: dl, VT, N1: RHS,
10332 N2: DAG.getConstant(Val: LoSize - 1, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10333 } else {
10334 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
10335 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
10336 }
10337 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits() * 2);
10338 forceExpandWideMUL(DAG, dl, Signed, WideVT, LL: LHS, LH: HiLHS, RL: RHS, RH: HiRHS, Lo, Hi);
10339}
10340
10341SDValue
10342TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10343 assert((Node->getOpcode() == ISD::SMULFIX ||
10344 Node->getOpcode() == ISD::UMULFIX ||
10345 Node->getOpcode() == ISD::SMULFIXSAT ||
10346 Node->getOpcode() == ISD::UMULFIXSAT) &&
10347 "Expected a fixed point multiplication opcode");
10348
10349 SDLoc dl(Node);
10350 SDValue LHS = Node->getOperand(Num: 0);
10351 SDValue RHS = Node->getOperand(Num: 1);
10352 EVT VT = LHS.getValueType();
10353 unsigned Scale = Node->getConstantOperandVal(Num: 2);
10354 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10355 Node->getOpcode() == ISD::UMULFIXSAT);
10356 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10357 Node->getOpcode() == ISD::SMULFIXSAT);
10358 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10359 unsigned VTSize = VT.getScalarSizeInBits();
10360
10361 if (!Scale) {
10362 // [us]mul.fix(a, b, 0) -> mul(a, b)
10363 if (!Saturating) {
10364 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
10365 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10366 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
10367 SDValue Result =
10368 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10369 SDValue Product = Result.getValue(R: 0);
10370 SDValue Overflow = Result.getValue(R: 1);
10371 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10372
10373 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
10374 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
10375 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10376 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10377 // Xor the inputs, if resulting sign bit is 0 the product will be
10378 // positive, else negative.
10379 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10380 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
10381 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
10382 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
10383 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
10384 SDValue Result =
10385 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10386 SDValue Product = Result.getValue(R: 0);
10387 SDValue Overflow = Result.getValue(R: 1);
10388
10389 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10390 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10391 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
10392 }
10393 }
10394
10395 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10396 "Expected scale to be less than the number of bits if signed or at "
10397 "most the number of bits if unsigned.");
10398 assert(LHS.getValueType() == RHS.getValueType() &&
10399 "Expected both operands to be the same type");
10400
10401 // Get the upper and lower bits of the result.
10402 SDValue Lo, Hi;
10403 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10404 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10405 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
10406 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
10407 Lo = Result.getValue(R: 0);
10408 Hi = Result.getValue(R: 1);
10409 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
10410 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10411 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
10412 } else if (VT.isVector()) {
10413 return SDValue();
10414 } else {
10415 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10416 }
10417
10418 if (Scale == VTSize)
10419 // Result is just the top half since we'd be shifting by the width of the
10420 // operand. Overflow impossible so this works for both UMULFIX and
10421 // UMULFIXSAT.
10422 return Hi;
10423
10424 // The result will need to be shifted right by the scale since both operands
10425 // are scaled. The result is given to us in 2 halves, so we only want part of
10426 // both in the result.
10427 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
10428 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
10429 if (!Saturating)
10430 return Result;
10431
10432 if (!Signed) {
10433 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10434 // widened multiplication) aren't all zeroes.
10435
10436 // Saturate to max if ((Hi >> Scale) != 0),
10437 // which is the same as if (Hi > ((1 << Scale) - 1))
10438 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10439 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
10440 DL: dl, VT);
10441 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
10442 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
10443 Cond: ISD::SETUGT);
10444
10445 return Result;
10446 }
10447
10448 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10449 // widened multiplication) aren't all ones or all zeroes.
10450
10451 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
10452 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
10453
10454 if (Scale == 0) {
10455 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
10456 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
10457 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
10458 // Saturated to SatMin if wide product is negative, and SatMax if wide
10459 // product is positive ...
10460 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10461 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
10462 Cond: ISD::SETLT);
10463 // ... but only if we overflowed.
10464 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
10465 }
10466
10467 // We handled Scale==0 above so all the bits to examine is in Hi.
10468
10469 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10470 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10471 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
10472 DL: dl, VT);
10473 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
10474 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10475 // which is the same as if (HI < (-1 << (Scale - 1))
10476 SDValue HighMask =
10477 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
10478 DL: dl, VT);
10479 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
10480 return Result;
10481}
10482
10483SDValue
10484TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10485 SDValue LHS, SDValue RHS,
10486 unsigned Scale, SelectionDAG &DAG) const {
10487 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10488 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10489 "Expected a fixed point division opcode");
10490
10491 EVT VT = LHS.getValueType();
10492 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10493 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10494 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10495
10496 // If there is enough room in the type to upscale the LHS or downscale the
10497 // RHS before the division, we can perform it in this type without having to
10498 // resize. For signed operations, the LHS headroom is the number of
10499 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10500 // The headroom for the RHS is the number of trailing zeroes.
10501 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
10502 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
10503 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
10504
10505 // For signed saturating operations, we need to be able to detect true integer
10506 // division overflow; that is, when you have MIN / -EPS. However, this
10507 // is undefined behavior and if we emit divisions that could take such
10508 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10509 // example).
10510 // Avoid this by requiring an extra bit so that we never get this case.
10511 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10512 // signed saturating division, we need to emit a whopping 32-bit division.
10513 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10514 return SDValue();
10515
10516 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
10517 unsigned RHSShift = Scale - LHSShift;
10518
10519 // At this point, we know that if we shift the LHS up by LHSShift and the
10520 // RHS down by RHSShift, we can emit a regular division with a final scaling
10521 // factor of Scale.
10522
10523 if (LHSShift)
10524 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
10525 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
10526 if (RHSShift)
10527 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
10528 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
10529
10530 SDValue Quot;
10531 if (Signed) {
10532 // For signed operations, if the resulting quotient is negative and the
10533 // remainder is nonzero, subtract 1 from the quotient to round towards
10534 // negative infinity.
10535 SDValue Rem;
10536 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10537 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10538 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10539 if (isTypeLegal(VT) &&
10540 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
10541 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
10542 VTList: DAG.getVTList(VT1: VT, VT2: VT),
10543 N1: LHS, N2: RHS);
10544 Rem = Quot.getValue(R: 1);
10545 Quot = Quot.getValue(R: 0);
10546 } else {
10547 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
10548 N1: LHS, N2: RHS);
10549 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
10550 N1: LHS, N2: RHS);
10551 }
10552 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10553 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
10554 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
10555 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
10556 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
10557 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
10558 N2: DAG.getConstant(Val: 1, DL: dl, VT));
10559 Quot = DAG.getSelect(DL: dl, VT,
10560 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
10561 LHS: Sub1, RHS: Quot);
10562 } else
10563 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
10564 N1: LHS, N2: RHS);
10565
10566 return Quot;
10567}
10568
10569void TargetLowering::expandUADDSUBO(
10570 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10571 SDLoc dl(Node);
10572 SDValue LHS = Node->getOperand(Num: 0);
10573 SDValue RHS = Node->getOperand(Num: 1);
10574 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10575
10576 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10577 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10578 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
10579 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
10580 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
10581 Ops: { LHS, RHS, CarryIn });
10582 Result = SDValue(NodeCarry.getNode(), 0);
10583 Overflow = SDValue(NodeCarry.getNode(), 1);
10584 return;
10585 }
10586
10587 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10588 VT: LHS.getValueType(), N1: LHS, N2: RHS);
10589
10590 EVT ResultType = Node->getValueType(ResNo: 1);
10591 EVT SetCCType = getSetCCResultType(
10592 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
10593 SDValue SetCC;
10594 if (IsAdd && isOneConstant(V: RHS)) {
10595 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10596 // the live range of X. We assume comparing with 0 is cheap.
10597 // The general case (X + C) < C is not necessarily beneficial. Although we
10598 // reduce the live range of X, we may introduce the materialization of
10599 // constant C.
10600 SetCC =
10601 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
10602 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
10603 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
10604 // Special case: uaddo X, -1 overflows if X != 0.
10605 SetCC =
10606 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
10607 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
10608 } else {
10609 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10610 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
10611 }
10612 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10613}
10614
10615void TargetLowering::expandSADDSUBO(
10616 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10617 SDLoc dl(Node);
10618 SDValue LHS = Node->getOperand(Num: 0);
10619 SDValue RHS = Node->getOperand(Num: 1);
10620 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10621
10622 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10623 VT: LHS.getValueType(), N1: LHS, N2: RHS);
10624
10625 EVT ResultType = Node->getValueType(ResNo: 1);
10626 EVT OType = getSetCCResultType(
10627 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
10628
10629 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10630 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10631 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
10632 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
10633 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
10634 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10635 return;
10636 }
10637
10638 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
10639
10640 // For an addition, the result should be less than one of the operands (LHS)
10641 // if and only if the other operand (RHS) is negative, otherwise there will
10642 // be overflow.
10643 // For a subtraction, the result should be less than one of the operands
10644 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10645 // otherwise there will be overflow.
10646 SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
10647 SDValue ConditionRHS =
10648 DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
10649
10650 Overflow = DAG.getBoolExtOrTrunc(
10651 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
10652 VT: ResultType, OpVT: ResultType);
10653}
10654
10655bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10656 SDValue &Overflow, SelectionDAG &DAG) const {
10657 SDLoc dl(Node);
10658 EVT VT = Node->getValueType(ResNo: 0);
10659 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10660 SDValue LHS = Node->getOperand(Num: 0);
10661 SDValue RHS = Node->getOperand(Num: 1);
10662 bool isSigned = Node->getOpcode() == ISD::SMULO;
10663
10664 // For power-of-two multiplications we can use a simpler shift expansion.
10665 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
10666 const APInt &C = RHSC->getAPIntValue();
10667 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10668 if (C.isPowerOf2()) {
10669 // smulo(x, signed_min) is same as umulo(x, signed_min).
10670 bool UseArithShift = isSigned && !C.isMinSignedValue();
10671 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
10672 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
10673 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
10674 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
10675 DL: dl, VT, N1: Result, N2: ShiftAmt),
10676 RHS: LHS, Cond: ISD::SETNE);
10677 return true;
10678 }
10679 }
10680
10681 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getScalarSizeInBits() * 2);
10682 if (VT.isVector())
10683 WideVT =
10684 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
10685
10686 SDValue BottomHalf;
10687 SDValue TopHalf;
10688 static const unsigned Ops[2][3] =
10689 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10690 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10691 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
10692 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10693 TopHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VT, N1: LHS, N2: RHS);
10694 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
10695 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
10696 N2: RHS);
10697 TopHalf = BottomHalf.getValue(R: 1);
10698 } else if (isTypeLegal(VT: WideVT)) {
10699 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
10700 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
10701 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
10702 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
10703 SDValue ShiftAmt =
10704 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
10705 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
10706 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
10707 } else {
10708 if (VT.isVector())
10709 return false;
10710
10711 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
10712 }
10713
10714 Result = BottomHalf;
10715 if (isSigned) {
10716 SDValue ShiftAmt = DAG.getShiftAmountConstant(
10717 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
10718 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
10719 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
10720 } else {
10721 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
10722 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
10723 }
10724
10725 // Truncate the result if SetCC returns a larger type than needed.
10726 EVT RType = Node->getValueType(ResNo: 1);
10727 if (RType.bitsLT(VT: Overflow.getValueType()))
10728 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
10729
10730 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10731 "Unexpected result type for S/UMULO legalization");
10732 return true;
10733}
10734
10735SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
10736 SDLoc dl(Node);
10737 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10738 SDValue Op = Node->getOperand(Num: 0);
10739 EVT VT = Op.getValueType();
10740
10741 if (VT.isScalableVector())
10742 report_fatal_error(
10743 reason: "Expanding reductions for scalable vectors is undefined.");
10744
10745 // Try to use a shuffle reduction for power of two vectors.
10746 if (VT.isPow2VectorType()) {
10747 while (VT.getVectorNumElements() > 1) {
10748 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
10749 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
10750 break;
10751
10752 SDValue Lo, Hi;
10753 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
10754 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
10755 VT = HalfVT;
10756 }
10757 }
10758
10759 EVT EltVT = VT.getVectorElementType();
10760 unsigned NumElts = VT.getVectorNumElements();
10761
10762 SmallVector<SDValue, 8> Ops;
10763 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
10764
10765 SDValue Res = Ops[0];
10766 for (unsigned i = 1; i < NumElts; i++)
10767 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags: Node->getFlags());
10768
10769 // Result type may be wider than element type.
10770 if (EltVT != Node->getValueType(ResNo: 0))
10771 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
10772 return Res;
10773}
10774
10775SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
10776 SDLoc dl(Node);
10777 SDValue AccOp = Node->getOperand(Num: 0);
10778 SDValue VecOp = Node->getOperand(Num: 1);
10779 SDNodeFlags Flags = Node->getFlags();
10780
10781 EVT VT = VecOp.getValueType();
10782 EVT EltVT = VT.getVectorElementType();
10783
10784 if (VT.isScalableVector())
10785 report_fatal_error(
10786 reason: "Expanding reductions for scalable vectors is undefined.");
10787
10788 unsigned NumElts = VT.getVectorNumElements();
10789
10790 SmallVector<SDValue, 8> Ops;
10791 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
10792
10793 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10794
10795 SDValue Res = AccOp;
10796 for (unsigned i = 0; i < NumElts; i++)
10797 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
10798
10799 return Res;
10800}
10801
10802bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10803 SelectionDAG &DAG) const {
10804 EVT VT = Node->getValueType(ResNo: 0);
10805 SDLoc dl(Node);
10806 bool isSigned = Node->getOpcode() == ISD::SREM;
10807 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10808 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10809 SDValue Dividend = Node->getOperand(Num: 0);
10810 SDValue Divisor = Node->getOperand(Num: 1);
10811 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
10812 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
10813 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
10814 return true;
10815 }
10816 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
10817 // X % Y -> X-X/Y*Y
10818 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
10819 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
10820 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
10821 return true;
10822 }
10823 return false;
10824}
10825
10826SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10827 SelectionDAG &DAG) const {
10828 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10829 SDLoc dl(SDValue(Node, 0));
10830 SDValue Src = Node->getOperand(Num: 0);
10831
10832 // DstVT is the result type, while SatVT is the size to which we saturate
10833 EVT SrcVT = Src.getValueType();
10834 EVT DstVT = Node->getValueType(ResNo: 0);
10835
10836 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
10837 unsigned SatWidth = SatVT.getScalarSizeInBits();
10838 unsigned DstWidth = DstVT.getScalarSizeInBits();
10839 assert(SatWidth <= DstWidth &&
10840 "Expected saturation width smaller than result width");
10841
10842 // Determine minimum and maximum integer values and their corresponding
10843 // floating-point values.
10844 APInt MinInt, MaxInt;
10845 if (IsSigned) {
10846 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
10847 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
10848 } else {
10849 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
10850 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
10851 }
10852
10853 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10854 // libcall emission cannot handle this. Large result types will fail.
10855 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10856 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10857 SrcVT = Src.getValueType();
10858 }
10859
10860 APFloat MinFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10861 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10862
10863 APFloat::opStatus MinStatus =
10864 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
10865 APFloat::opStatus MaxStatus =
10866 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
10867 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10868 !(MaxStatus & APFloat::opStatus::opInexact);
10869
10870 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
10871 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
10872
10873 // If the integer bounds are exactly representable as floats and min/max are
10874 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10875 // of comparisons and selects.
10876 bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
10877 isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
10878 if (AreExactFloatBounds && MinMaxLegal) {
10879 SDValue Clamped = Src;
10880
10881 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10882 Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
10883 // Clamp by MaxFloat from above. NaN cannot occur.
10884 Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
10885 // Convert clamped value to integer.
10886 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10887 DL: dl, VT: DstVT, Operand: Clamped);
10888
10889 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10890 // which will cast to zero.
10891 if (!IsSigned)
10892 return FpToInt;
10893
10894 // Otherwise, select 0 if Src is NaN.
10895 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
10896 EVT SetCCVT =
10897 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10898 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10899 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
10900 }
10901
10902 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
10903 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
10904
10905 // Result of direct conversion. The assumption here is that the operation is
10906 // non-trapping and it's fine to apply it to an out-of-range value if we
10907 // select it away later.
10908 SDValue FpToInt =
10909 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
10910
10911 SDValue Select = FpToInt;
10912
10913 EVT SetCCVT =
10914 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10915
10916 // If Src ULT MinFloat, select MinInt. In particular, this also selects
10917 // MinInt if Src is NaN.
10918 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
10919 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
10920 // If Src OGT MaxFloat, select MaxInt.
10921 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
10922 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
10923
10924 // In the unsigned case we are done, because we mapped NaN to MinInt, which
10925 // is already zero.
10926 if (!IsSigned)
10927 return Select;
10928
10929 // Otherwise, select 0 if Src is NaN.
10930 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
10931 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10932 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
10933}
10934
10935SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
10936 const SDLoc &dl,
10937 SelectionDAG &DAG) const {
10938 EVT OperandVT = Op.getValueType();
10939 if (OperandVT.getScalarType() == ResultVT.getScalarType())
10940 return Op;
10941 EVT ResultIntVT = ResultVT.changeTypeToInteger();
10942 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
10943 // can induce double-rounding which may alter the results. We can
10944 // correct for this using a trick explained in: Boldo, Sylvie, and
10945 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
10946 // World Congress. 2005.
10947 unsigned BitSize = OperandVT.getScalarSizeInBits();
10948 EVT WideIntVT = OperandVT.changeTypeToInteger();
10949 SDValue OpAsInt = DAG.getBitcast(VT: WideIntVT, V: Op);
10950 SDValue SignBit =
10951 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: WideIntVT, N1: OpAsInt,
10952 N2: DAG.getConstant(Val: APInt::getSignMask(BitWidth: BitSize), DL: dl, VT: WideIntVT));
10953 SDValue AbsWide;
10954 if (isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT)) {
10955 AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
10956 } else {
10957 SDValue ClearedSign = DAG.getNode(
10958 Opcode: ISD::AND, DL: dl, VT: WideIntVT, N1: OpAsInt,
10959 N2: DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BitSize), DL: dl, VT: WideIntVT));
10960 AbsWide = DAG.getBitcast(VT: OperandVT, V: ClearedSign);
10961 }
10962 SDValue AbsNarrow = DAG.getFPExtendOrRound(Op: AbsWide, DL: dl, VT: ResultVT);
10963 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(Op: AbsNarrow, DL: dl, VT: OperandVT);
10964
10965 // We can keep the narrow value as-is if narrowing was exact (no
10966 // rounding error), the wide value was NaN (the narrow value is also
10967 // NaN and should be preserved) or if we rounded to the odd value.
10968 SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: AbsNarrow);
10969 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResultIntVT);
10970 SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
10971 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
10972 EVT ResultIntVTCCVT = getSetCCResultType(
10973 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
10974 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: ResultIntVT);
10975 // The result is already odd so we don't need to do anything.
10976 SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
10977
10978 EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
10979 VT: AbsWide.getValueType());
10980 // We keep results which are exact, odd or NaN.
10981 SDValue KeepNarrow =
10982 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETUEQ);
10983 KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
10984 // We morally performed a round-down if AbsNarrow is smaller than
10985 // AbsWide.
10986 SDValue NarrowIsRd =
10987 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
10988 // If the narrow value is odd or exact, pick it.
10989 // Otherwise, narrow is even and corresponds to either the rounded-up
10990 // or rounded-down value. If narrow is the rounded-down value, we want
10991 // the rounded-up value as it will be odd.
10992 SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
10993 SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
10994 Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
10995 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
10996 SDValue ShiftCnst = DAG.getShiftAmountConstant(Val: ShiftAmount, VT: WideIntVT, DL: dl);
10997 SignBit = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideIntVT, N1: SignBit, N2: ShiftCnst);
10998 SignBit = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ResultIntVT, Operand: SignBit);
10999 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResultIntVT, N1: Op, N2: SignBit);
11000 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
11001}
11002
11003SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11004 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11005 SDValue Op = Node->getOperand(Num: 0);
11006 EVT VT = Node->getValueType(ResNo: 0);
11007 SDLoc dl(Node);
11008 if (VT.getScalarType() == MVT::bf16) {
11009 if (Node->getConstantOperandVal(Num: 1) == 1) {
11010 return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: 0));
11011 }
11012 EVT OperandVT = Op.getValueType();
11013 SDValue IsNaN = DAG.getSetCC(
11014 DL: dl,
11015 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
11016 LHS: Op, RHS: Op, Cond: ISD::SETUO);
11017
11018 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11019 // can induce double-rounding which may alter the results. We can
11020 // correct for this using a trick explained in: Boldo, Sylvie, and
11021 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11022 // World Congress. 2005.
11023 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11024 EVT I32 = F32.changeTypeToInteger();
11025 Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
11026 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11027
11028 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11029 // turning into infinities.
11030 SDValue NaN =
11031 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: 0x400000, DL: dl, VT: I32));
11032
11033 // Factor in the contribution of the low 16 bits.
11034 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: I32);
11035 SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11036 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
11037 Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
11038 SDValue RoundingBias =
11039 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: 0x7fff, DL: dl, VT: I32), N2: Lsb);
11040 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
11041
11042 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11043 // 0x80000000.
11044 Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
11045
11046 // Now that we have rounded, shift the bits into position.
11047 Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11048 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
11049 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11050 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11051 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
11052 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
11053 }
11054 return SDValue();
11055}
11056
11057SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11058 SelectionDAG &DAG) const {
11059 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11060 assert(Node->getValueType(0).isScalableVector() &&
11061 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11062
11063 EVT VT = Node->getValueType(ResNo: 0);
11064 SDValue V1 = Node->getOperand(Num: 0);
11065 SDValue V2 = Node->getOperand(Num: 1);
11066 int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2))->getSExtValue();
11067 SDLoc DL(Node);
11068
11069 // Expand through memory thusly:
11070 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11071 // Store V1, Ptr
11072 // Store V2, Ptr + sizeof(V1)
11073 // If (Imm < 0)
11074 // TrailingElts = -Imm
11075 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11076 // else
11077 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11078 // Res = Load Ptr
11079
11080 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11081
11082 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
11083 EC: VT.getVectorElementCount() * 2);
11084 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
11085 EVT PtrVT = StackPtr.getValueType();
11086 auto &MF = DAG.getMachineFunction();
11087 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11088 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
11089
11090 // Store the lo part of CONCAT_VECTORS(V1, V2)
11091 SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
11092 // Store the hi part of CONCAT_VECTORS(V1, V2)
11093 SDValue OffsetToV2 = DAG.getVScale(
11094 DL, VT: PtrVT,
11095 MulImm: APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11096 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
11097 SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
11098
11099 if (Imm >= 0) {
11100 // Load back the required element. getVectorElementPointer takes care of
11101 // clamping the index if it's out-of-bounds.
11102 StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: 2));
11103 // Load the spliced result
11104 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
11105 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11106 }
11107
11108 uint64_t TrailingElts = -Imm;
11109
11110 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11111 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11112 SDValue TrailingBytes =
11113 DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
11114
11115 if (TrailingElts > VT.getVectorMinNumElements()) {
11116 SDValue VLBytes =
11117 DAG.getVScale(DL, VT: PtrVT,
11118 MulImm: APInt(PtrVT.getFixedSizeInBits(),
11119 VT.getStoreSize().getKnownMinValue()));
11120 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
11121 }
11122
11123 // Calculate the start address of the spliced result.
11124 StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
11125
11126 // Load the spliced result
11127 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
11128 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11129}
11130
11131bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11132 SDValue &LHS, SDValue &RHS,
11133 SDValue &CC, SDValue Mask,
11134 SDValue EVL, bool &NeedInvert,
11135 const SDLoc &dl, SDValue &Chain,
11136 bool IsSignaling) const {
11137 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11138 MVT OpVT = LHS.getSimpleValueType();
11139 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
11140 NeedInvert = false;
11141 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11142 bool IsNonVP = !EVL;
11143 switch (TLI.getCondCodeAction(CC: CCCode, VT: OpVT)) {
11144 default:
11145 llvm_unreachable("Unknown condition code action!");
11146 case TargetLowering::Legal:
11147 // Nothing to do.
11148 break;
11149 case TargetLowering::Expand: {
11150 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
11151 if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11152 std::swap(a&: LHS, b&: RHS);
11153 CC = DAG.getCondCode(Cond: InvCC);
11154 return true;
11155 }
11156 // Swapping operands didn't work. Try inverting the condition.
11157 bool NeedSwap = false;
11158 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
11159 if (!TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11160 // If inverting the condition is not enough, try swapping operands
11161 // on top of it.
11162 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
11163 NeedSwap = true;
11164 }
11165 if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11166 CC = DAG.getCondCode(Cond: InvCC);
11167 NeedInvert = true;
11168 if (NeedSwap)
11169 std::swap(a&: LHS, b&: RHS);
11170 return true;
11171 }
11172
11173 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11174 unsigned Opc = 0;
11175 switch (CCCode) {
11176 default:
11177 llvm_unreachable("Don't know how to expand this condition!");
11178 case ISD::SETUO:
11179 if (TLI.isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
11180 CC1 = ISD::SETUNE;
11181 CC2 = ISD::SETUNE;
11182 Opc = ISD::OR;
11183 break;
11184 }
11185 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11186 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11187 NeedInvert = true;
11188 [[fallthrough]];
11189 case ISD::SETO:
11190 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11191 "If SETO is expanded, SETOEQ must be legal!");
11192 CC1 = ISD::SETOEQ;
11193 CC2 = ISD::SETOEQ;
11194 Opc = ISD::AND;
11195 break;
11196 case ISD::SETONE:
11197 case ISD::SETUEQ:
11198 // If the SETUO or SETO CC isn't legal, we might be able to use
11199 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11200 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11201 // the operands.
11202 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11203 if (!TLI.isCondCodeLegal(CC: CC2, VT: OpVT) &&
11204 (TLI.isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
11205 TLI.isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
11206 CC1 = ISD::SETOGT;
11207 CC2 = ISD::SETOLT;
11208 Opc = ISD::OR;
11209 NeedInvert = ((unsigned)CCCode & 0x8U);
11210 break;
11211 }
11212 [[fallthrough]];
11213 case ISD::SETOEQ:
11214 case ISD::SETOGT:
11215 case ISD::SETOGE:
11216 case ISD::SETOLT:
11217 case ISD::SETOLE:
11218 case ISD::SETUNE:
11219 case ISD::SETUGT:
11220 case ISD::SETUGE:
11221 case ISD::SETULT:
11222 case ISD::SETULE:
11223 // If we are floating point, assign and break, otherwise fall through.
11224 if (!OpVT.isInteger()) {
11225 // We can use the 4th bit to tell if we are the unordered
11226 // or ordered version of the opcode.
11227 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11228 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11229 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11230 break;
11231 }
11232 // Fallthrough if we are unsigned integer.
11233 [[fallthrough]];
11234 case ISD::SETLE:
11235 case ISD::SETGT:
11236 case ISD::SETGE:
11237 case ISD::SETLT:
11238 case ISD::SETNE:
11239 case ISD::SETEQ:
11240 // If all combinations of inverting the condition and swapping operands
11241 // didn't work then we have no means to expand the condition.
11242 llvm_unreachable("Don't know how to expand this condition!");
11243 }
11244
11245 SDValue SetCC1, SetCC2;
11246 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11247 // If we aren't the ordered or unorder operation,
11248 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11249 if (IsNonVP) {
11250 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
11251 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
11252 } else {
11253 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
11254 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
11255 }
11256 } else {
11257 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11258 if (IsNonVP) {
11259 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
11260 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
11261 } else {
11262 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
11263 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
11264 }
11265 }
11266 if (Chain)
11267 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11268 SetCC2.getValue(1));
11269 if (IsNonVP)
11270 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
11271 else {
11272 // Transform the binary opcode to the VP equivalent.
11273 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11274 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11275 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
11276 }
11277 RHS = SDValue();
11278 CC = SDValue();
11279 return true;
11280 }
11281 }
11282 return false;
11283}
11284

source code of llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp