1//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements routines for translating from LLVM IR into SelectionDAG IR.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SelectionDAGBuilder.h"
14#include "SDNodeDbgValue.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
25#include "llvm/Analysis/ConstantFolding.h"
26#include "llvm/Analysis/Loads.h"
27#include "llvm/Analysis/MemoryLocation.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/TargetTransformInfo.h"
30#include "llvm/Analysis/ValueTracking.h"
31#include "llvm/Analysis/VectorUtils.h"
32#include "llvm/CodeGen/Analysis.h"
33#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
34#include "llvm/CodeGen/CodeGenCommonISel.h"
35#include "llvm/CodeGen/FunctionLoweringInfo.h"
36#include "llvm/CodeGen/GCMetadata.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineBasicBlock.h"
39#include "llvm/CodeGen/MachineFrameInfo.h"
40#include "llvm/CodeGen/MachineFunction.h"
41#include "llvm/CodeGen/MachineInstrBuilder.h"
42#include "llvm/CodeGen/MachineInstrBundleIterator.h"
43#include "llvm/CodeGen/MachineMemOperand.h"
44#include "llvm/CodeGen/MachineModuleInfo.h"
45#include "llvm/CodeGen/MachineOperand.h"
46#include "llvm/CodeGen/MachineRegisterInfo.h"
47#include "llvm/CodeGen/RuntimeLibcalls.h"
48#include "llvm/CodeGen/SelectionDAG.h"
49#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
50#include "llvm/CodeGen/StackMaps.h"
51#include "llvm/CodeGen/SwiftErrorValueTracking.h"
52#include "llvm/CodeGen/TargetFrameLowering.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetOpcodes.h"
55#include "llvm/CodeGen/TargetRegisterInfo.h"
56#include "llvm/CodeGen/TargetSubtargetInfo.h"
57#include "llvm/CodeGen/WinEHFuncInfo.h"
58#include "llvm/IR/Argument.h"
59#include "llvm/IR/Attributes.h"
60#include "llvm/IR/BasicBlock.h"
61#include "llvm/IR/CFG.h"
62#include "llvm/IR/CallingConv.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/ConstantRange.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
68#include "llvm/IR/DebugInfoMetadata.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/DiagnosticInfo.h"
71#include "llvm/IR/EHPersonalities.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GetElementPtrTypeIterator.h"
74#include "llvm/IR/InlineAsm.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instructions.h"
77#include "llvm/IR/IntrinsicInst.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/IntrinsicsAArch64.h"
80#include "llvm/IR/IntrinsicsAMDGPU.h"
81#include "llvm/IR/IntrinsicsWebAssembly.h"
82#include "llvm/IR/LLVMContext.h"
83#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
84#include "llvm/IR/Metadata.h"
85#include "llvm/IR/Module.h"
86#include "llvm/IR/Operator.h"
87#include "llvm/IR/PatternMatch.h"
88#include "llvm/IR/Statepoint.h"
89#include "llvm/IR/Type.h"
90#include "llvm/IR/User.h"
91#include "llvm/IR/Value.h"
92#include "llvm/MC/MCContext.h"
93#include "llvm/Support/AtomicOrdering.h"
94#include "llvm/Support/Casting.h"
95#include "llvm/Support/CommandLine.h"
96#include "llvm/Support/Compiler.h"
97#include "llvm/Support/Debug.h"
98#include "llvm/Support/InstructionCost.h"
99#include "llvm/Support/MathExtras.h"
100#include "llvm/Support/raw_ostream.h"
101#include "llvm/Target/TargetIntrinsicInfo.h"
102#include "llvm/Target/TargetMachine.h"
103#include "llvm/Target/TargetOptions.h"
104#include "llvm/TargetParser/Triple.h"
105#include "llvm/Transforms/Utils/Local.h"
106#include <cstddef>
107#include <iterator>
108#include <limits>
109#include <optional>
110#include <tuple>
111
112using namespace llvm;
113using namespace PatternMatch;
114using namespace SwitchCG;
115
116#define DEBUG_TYPE "isel"
117
118/// LimitFloatPrecision - Generate low-precision inline sequences for
119/// some float libcalls (6, 8 or 12 bits).
120static unsigned LimitFloatPrecision;
121
122static cl::opt<bool>
123 InsertAssertAlign("insert-assert-align", cl::init(Val: true),
124 cl::desc("Insert the experimental `assertalign` node."),
125 cl::ReallyHidden);
126
127static cl::opt<unsigned, true>
128 LimitFPPrecision("limit-float-precision",
129 cl::desc("Generate low-precision inline sequences "
130 "for some float libcalls"),
131 cl::location(L&: LimitFloatPrecision), cl::Hidden,
132 cl::init(Val: 0));
133
134static cl::opt<unsigned> SwitchPeelThreshold(
135 "switch-peel-threshold", cl::Hidden, cl::init(Val: 66),
136 cl::desc("Set the case probability threshold for peeling the case from a "
137 "switch statement. A value greater than 100 will void this "
138 "optimization"));
139
140// Limit the width of DAG chains. This is important in general to prevent
141// DAG-based analysis from blowing up. For example, alias analysis and
142// load clustering may not complete in reasonable time. It is difficult to
143// recognize and avoid this situation within each individual analysis, and
144// future analyses are likely to have the same behavior. Limiting DAG width is
145// the safe approach and will be especially important with global DAGs.
146//
147// MaxParallelChains default is arbitrarily high to avoid affecting
148// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
149// sequence over this should have been converted to llvm.memcpy by the
150// frontend. It is easy to induce this behavior with .ll code such as:
151// %buffer = alloca [4096 x i8]
152// %data = load [4096 x i8]* %argPtr
153// store [4096 x i8] %data, [4096 x i8]* %buffer
154static const unsigned MaxParallelChains = 64;
155
156static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
157 const SDValue *Parts, unsigned NumParts,
158 MVT PartVT, EVT ValueVT, const Value *V,
159 SDValue InChain,
160 std::optional<CallingConv::ID> CC);
161
162/// getCopyFromParts - Create a value that contains the specified legal parts
163/// combined into the value they represent. If the parts combine to a type
164/// larger than ValueVT then AssertOp can be used to specify whether the extra
165/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
166/// (ISD::AssertSext).
167static SDValue
168getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
169 unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
170 SDValue InChain,
171 std::optional<CallingConv::ID> CC = std::nullopt,
172 std::optional<ISD::NodeType> AssertOp = std::nullopt) {
173 // Let the target assemble the parts if it wants to
174 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
175 if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
176 PartVT, ValueVT, CC))
177 return Val;
178
179 if (ValueVT.isVector())
180 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
181 InChain, CC);
182
183 assert(NumParts > 0 && "No parts to assemble!");
184 SDValue Val = Parts[0];
185
186 if (NumParts > 1) {
187 // Assemble the value from multiple parts.
188 if (ValueVT.isInteger()) {
189 unsigned PartBits = PartVT.getSizeInBits();
190 unsigned ValueBits = ValueVT.getSizeInBits();
191
192 // Assemble the power of 2 part.
193 unsigned RoundParts = llvm::bit_floor(Value: NumParts);
194 unsigned RoundBits = PartBits * RoundParts;
195 EVT RoundVT = RoundBits == ValueBits ?
196 ValueVT : EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RoundBits);
197 SDValue Lo, Hi;
198
199 EVT HalfVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RoundBits/2);
200
201 if (RoundParts > 2) {
202 Lo = getCopyFromParts(DAG, DL, Parts, NumParts: RoundParts / 2, PartVT, ValueVT: HalfVT, V,
203 InChain);
204 Hi = getCopyFromParts(DAG, DL, Parts: Parts + RoundParts / 2, NumParts: RoundParts / 2,
205 PartVT, ValueVT: HalfVT, V, InChain);
206 } else {
207 Lo = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Parts[0]);
208 Hi = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Parts[1]);
209 }
210
211 if (DAG.getDataLayout().isBigEndian())
212 std::swap(a&: Lo, b&: Hi);
213
214 Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: RoundVT, N1: Lo, N2: Hi);
215
216 if (RoundParts < NumParts) {
217 // Assemble the trailing non-power-of-2 part.
218 unsigned OddParts = NumParts - RoundParts;
219 EVT OddVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: OddParts * PartBits);
220 Hi = getCopyFromParts(DAG, DL, Parts: Parts + RoundParts, NumParts: OddParts, PartVT,
221 ValueVT: OddVT, V, InChain, CC);
222
223 // Combine the round and odd parts.
224 Lo = Val;
225 if (DAG.getDataLayout().isBigEndian())
226 std::swap(a&: Lo, b&: Hi);
227 EVT TotalVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
228 Hi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: TotalVT, Operand: Hi);
229 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: TotalVT, N1: Hi,
230 N2: DAG.getConstant(Val: Lo.getValueSizeInBits(), DL,
231 VT: TLI.getShiftAmountTy(
232 LHSTy: TotalVT, DL: DAG.getDataLayout())));
233 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: TotalVT, Operand: Lo);
234 Val = DAG.getNode(Opcode: ISD::OR, DL, VT: TotalVT, N1: Lo, N2: Hi);
235 }
236 } else if (PartVT.isFloatingPoint()) {
237 // FP split into multiple FP parts (for ppcf128)
238 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
239 "Unexpected split");
240 SDValue Lo, Hi;
241 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
242 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
243 if (TLI.hasBigEndianPartOrdering(VT: ValueVT, DL: DAG.getDataLayout()))
244 std::swap(a&: Lo, b&: Hi);
245 Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: ValueVT, N1: Lo, N2: Hi);
246 } else {
247 // FP split into integer parts (soft fp)
248 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
249 !PartVT.isVector() && "Unexpected split");
250 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits());
251 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, ValueVT: IntVT, V,
252 InChain, CC);
253 }
254 }
255
256 // There is now one part, held in Val. Correct it to match ValueVT.
257 // PartEVT is the type of the register class that holds the value.
258 // ValueVT is the type of the inline asm operation.
259 EVT PartEVT = Val.getValueType();
260
261 if (PartEVT == ValueVT)
262 return Val;
263
264 if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
265 ValueVT.bitsLT(VT: PartEVT)) {
266 // For an FP value in an integer part, we need to truncate to the right
267 // width first.
268 PartEVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits());
269 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: PartEVT, Operand: Val);
270 }
271
272 // Handle types that have the same size.
273 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
274 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
275
276 // Handle types with different sizes.
277 if (PartEVT.isInteger() && ValueVT.isInteger()) {
278 if (ValueVT.bitsLT(VT: PartEVT)) {
279 // For a truncate, see if we have any information to
280 // indicate whether the truncated bits will always be
281 // zero or sign-extension.
282 if (AssertOp)
283 Val = DAG.getNode(Opcode: *AssertOp, DL, VT: PartEVT, N1: Val,
284 N2: DAG.getValueType(ValueVT));
285 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
286 }
287 return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValueVT, Operand: Val);
288 }
289
290 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
291 // FP_ROUND's are always exact here.
292 if (ValueVT.bitsLT(VT: Val.getValueType())) {
293
294 SDValue NoChange =
295 DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
296
297 if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr(
298 llvm::Attribute::StrictFP)) {
299 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
300 DAG.getVTList(ValueVT, MVT::Other), InChain, Val,
301 NoChange);
302 }
303
304 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: ValueVT, N1: Val, N2: NoChange);
305 }
306
307 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: ValueVT, Operand: Val);
308 }
309
310 // Handle MMX to a narrower integer type by bitcasting MMX to integer and
311 // then truncating.
312 if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
313 ValueVT.bitsLT(VT: PartEVT)) {
314 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
315 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
316 }
317
318 report_fatal_error(reason: "Unknown mismatch in getCopyFromParts!");
319}
320
321static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
322 const Twine &ErrMsg) {
323 const Instruction *I = dyn_cast_or_null<Instruction>(Val: V);
324 if (!V)
325 return Ctx.emitError(ErrorStr: ErrMsg);
326
327 const char *AsmError = ", possible invalid constraint for vector type";
328 if (const CallInst *CI = dyn_cast<CallInst>(Val: I))
329 if (CI->isInlineAsm())
330 return Ctx.emitError(I, ErrorStr: ErrMsg + AsmError);
331
332 return Ctx.emitError(I, ErrorStr: ErrMsg);
333}
334
335/// getCopyFromPartsVector - Create a value that contains the specified legal
336/// parts combined into the value they represent. If the parts combine to a
337/// type larger than ValueVT then AssertOp can be used to specify whether the
338/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
339/// ValueVT (ISD::AssertSext).
340static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
341 const SDValue *Parts, unsigned NumParts,
342 MVT PartVT, EVT ValueVT, const Value *V,
343 SDValue InChain,
344 std::optional<CallingConv::ID> CallConv) {
345 assert(ValueVT.isVector() && "Not a vector value");
346 assert(NumParts > 0 && "No parts to assemble!");
347 const bool IsABIRegCopy = CallConv.has_value();
348
349 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
350 SDValue Val = Parts[0];
351
352 // Handle a multi-element vector.
353 if (NumParts > 1) {
354 EVT IntermediateVT;
355 MVT RegisterVT;
356 unsigned NumIntermediates;
357 unsigned NumRegs;
358
359 if (IsABIRegCopy) {
360 NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
361 Context&: *DAG.getContext(), CC: *CallConv, VT: ValueVT, IntermediateVT,
362 NumIntermediates, RegisterVT);
363 } else {
364 NumRegs =
365 TLI.getVectorTypeBreakdown(Context&: *DAG.getContext(), VT: ValueVT, IntermediateVT,
366 NumIntermediates, RegisterVT);
367 }
368
369 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
370 NumParts = NumRegs; // Silence a compiler warning.
371 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
372 assert(RegisterVT.getSizeInBits() ==
373 Parts[0].getSimpleValueType().getSizeInBits() &&
374 "Part type sizes don't match!");
375
376 // Assemble the parts into intermediate operands.
377 SmallVector<SDValue, 8> Ops(NumIntermediates);
378 if (NumIntermediates == NumParts) {
379 // If the register was not expanded, truncate or copy the value,
380 // as appropriate.
381 for (unsigned i = 0; i != NumParts; ++i)
382 Ops[i] = getCopyFromParts(DAG, DL, Parts: &Parts[i], NumParts: 1, PartVT, ValueVT: IntermediateVT,
383 V, InChain, CC: CallConv);
384 } else if (NumParts > 0) {
385 // If the intermediate type was expanded, build the intermediate
386 // operands from the parts.
387 assert(NumParts % NumIntermediates == 0 &&
388 "Must expand into a divisible number of parts!");
389 unsigned Factor = NumParts / NumIntermediates;
390 for (unsigned i = 0; i != NumIntermediates; ++i)
391 Ops[i] = getCopyFromParts(DAG, DL, Parts: &Parts[i * Factor], NumParts: Factor, PartVT,
392 ValueVT: IntermediateVT, V, InChain, CC: CallConv);
393 }
394
395 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
396 // intermediate operands.
397 EVT BuiltVectorTy =
398 IntermediateVT.isVector()
399 ? EVT::getVectorVT(
400 Context&: *DAG.getContext(), VT: IntermediateVT.getScalarType(),
401 EC: IntermediateVT.getVectorElementCount() * NumParts)
402 : EVT::getVectorVT(Context&: *DAG.getContext(),
403 VT: IntermediateVT.getScalarType(),
404 NumElements: NumIntermediates);
405 Val = DAG.getNode(Opcode: IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
406 : ISD::BUILD_VECTOR,
407 DL, VT: BuiltVectorTy, Ops);
408 }
409
410 // There is now one part, held in Val. Correct it to match ValueVT.
411 EVT PartEVT = Val.getValueType();
412
413 if (PartEVT == ValueVT)
414 return Val;
415
416 if (PartEVT.isVector()) {
417 // Vector/Vector bitcast.
418 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
419 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
420
421 // If the parts vector has more elements than the value vector, then we
422 // have a vector widening case (e.g. <2 x float> -> <4 x float>).
423 // Extract the elements we want.
424 if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
425 assert((PartEVT.getVectorElementCount().getKnownMinValue() >
426 ValueVT.getVectorElementCount().getKnownMinValue()) &&
427 (PartEVT.getVectorElementCount().isScalable() ==
428 ValueVT.getVectorElementCount().isScalable()) &&
429 "Cannot narrow, it would be a lossy transformation");
430 PartEVT =
431 EVT::getVectorVT(Context&: *DAG.getContext(), VT: PartEVT.getVectorElementType(),
432 EC: ValueVT.getVectorElementCount());
433 Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: PartEVT, N1: Val,
434 N2: DAG.getVectorIdxConstant(Val: 0, DL));
435 if (PartEVT == ValueVT)
436 return Val;
437 if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
438 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
439
440 // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>).
441 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
442 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
443 }
444
445 // Promoted vector extract
446 return DAG.getAnyExtOrTrunc(Op: Val, DL, VT: ValueVT);
447 }
448
449 // Trivial bitcast if the types are the same size and the destination
450 // vector type is legal.
451 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
452 TLI.isTypeLegal(VT: ValueVT))
453 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
454
455 if (ValueVT.getVectorNumElements() != 1) {
456 // Certain ABIs require that vectors are passed as integers. For vectors
457 // are the same size, this is an obvious bitcast.
458 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
459 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
460 } else if (ValueVT.bitsLT(VT: PartEVT)) {
461 const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
462 EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize);
463 // Drop the extra bits.
464 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IntermediateType, Operand: Val);
465 return DAG.getBitcast(VT: ValueVT, V: Val);
466 }
467
468 diagnosePossiblyInvalidConstraint(
469 Ctx&: *DAG.getContext(), V, ErrMsg: "non-trivial scalar-to-vector conversion");
470 return DAG.getUNDEF(VT: ValueVT);
471 }
472
473 // Handle cases such as i8 -> <1 x i1>
474 EVT ValueSVT = ValueVT.getVectorElementType();
475 if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
476 unsigned ValueSize = ValueSVT.getSizeInBits();
477 if (ValueSize == PartEVT.getSizeInBits()) {
478 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueSVT, Operand: Val);
479 } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) {
480 // It's possible a scalar floating point type gets softened to integer and
481 // then promoted to a larger integer. If PartEVT is the larger integer
482 // we need to truncate it and then bitcast to the FP type.
483 assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types");
484 EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize);
485 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IntermediateType, Operand: Val);
486 Val = DAG.getBitcast(VT: ValueSVT, V: Val);
487 } else {
488 Val = ValueVT.isFloatingPoint()
489 ? DAG.getFPExtendOrRound(Op: Val, DL, VT: ValueSVT)
490 : DAG.getAnyExtOrTrunc(Op: Val, DL, VT: ValueSVT);
491 }
492 }
493
494 return DAG.getBuildVector(VT: ValueVT, DL, Ops: Val);
495}
496
497static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
498 SDValue Val, SDValue *Parts, unsigned NumParts,
499 MVT PartVT, const Value *V,
500 std::optional<CallingConv::ID> CallConv);
501
502/// getCopyToParts - Create a series of nodes that contain the specified value
503/// split into legal parts. If the parts contain more bits than Val, then, for
504/// integers, ExtendKind can be used to specify how to generate the extra bits.
505static void
506getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
507 unsigned NumParts, MVT PartVT, const Value *V,
508 std::optional<CallingConv::ID> CallConv = std::nullopt,
509 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
510 // Let the target split the parts if it wants to
511 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
512 if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
513 CC: CallConv))
514 return;
515 EVT ValueVT = Val.getValueType();
516
517 // Handle the vector case separately.
518 if (ValueVT.isVector())
519 return getCopyToPartsVector(DAG, dl: DL, Val, Parts, NumParts, PartVT, V,
520 CallConv);
521
522 unsigned OrigNumParts = NumParts;
523 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
524 "Copying to an illegal type!");
525
526 if (NumParts == 0)
527 return;
528
529 assert(!ValueVT.isVector() && "Vector case handled elsewhere");
530 EVT PartEVT = PartVT;
531 if (PartEVT == ValueVT) {
532 assert(NumParts == 1 && "No-op copy with multiple parts!");
533 Parts[0] = Val;
534 return;
535 }
536
537 unsigned PartBits = PartVT.getSizeInBits();
538 if (NumParts * PartBits > ValueVT.getSizeInBits()) {
539 // If the parts cover more bits than the value has, promote the value.
540 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
541 assert(NumParts == 1 && "Do not know what to promote to!");
542 Val = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: PartVT, Operand: Val);
543 } else {
544 if (ValueVT.isFloatingPoint()) {
545 // FP values need to be bitcast, then extended if they are being put
546 // into a larger container.
547 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits());
548 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
549 }
550 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
551 ValueVT.isInteger() &&
552 "Unknown mismatch!");
553 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
554 Val = DAG.getNode(Opcode: ExtendKind, DL, VT: ValueVT, Operand: Val);
555 if (PartVT == MVT::x86mmx)
556 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
557 }
558 } else if (PartBits == ValueVT.getSizeInBits()) {
559 // Different types of the same size.
560 assert(NumParts == 1 && PartEVT != ValueVT);
561 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
562 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
563 // If the parts cover less bits than value has, truncate the value.
564 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
565 ValueVT.isInteger() &&
566 "Unknown mismatch!");
567 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
568 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
569 if (PartVT == MVT::x86mmx)
570 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
571 }
572
573 // The value may have changed - recompute ValueVT.
574 ValueVT = Val.getValueType();
575 assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
576 "Failed to tile the value with PartVT!");
577
578 if (NumParts == 1) {
579 if (PartEVT != ValueVT) {
580 diagnosePossiblyInvalidConstraint(Ctx&: *DAG.getContext(), V,
581 ErrMsg: "scalar-to-vector conversion failed");
582 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
583 }
584
585 Parts[0] = Val;
586 return;
587 }
588
589 // Expand the value into multiple parts.
590 if (NumParts & (NumParts - 1)) {
591 // The number of parts is not a power of 2. Split off and copy the tail.
592 assert(PartVT.isInteger() && ValueVT.isInteger() &&
593 "Do not know what to expand to!");
594 unsigned RoundParts = llvm::bit_floor(Value: NumParts);
595 unsigned RoundBits = RoundParts * PartBits;
596 unsigned OddParts = NumParts - RoundParts;
597 SDValue OddVal = DAG.getNode(Opcode: ISD::SRL, DL, VT: ValueVT, N1: Val,
598 N2: DAG.getShiftAmountConstant(Val: RoundBits, VT: ValueVT, DL));
599
600 getCopyToParts(DAG, DL, Val: OddVal, Parts: Parts + RoundParts, NumParts: OddParts, PartVT, V,
601 CallConv);
602
603 if (DAG.getDataLayout().isBigEndian())
604 // The odd parts were reversed by getCopyToParts - unreverse them.
605 std::reverse(first: Parts + RoundParts, last: Parts + NumParts);
606
607 NumParts = RoundParts;
608 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
609 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
610 }
611
612 // The number of parts is a power of 2. Repeatedly bisect the value using
613 // EXTRACT_ELEMENT.
614 Parts[0] = DAG.getNode(Opcode: ISD::BITCAST, DL,
615 VT: EVT::getIntegerVT(Context&: *DAG.getContext(),
616 BitWidth: ValueVT.getSizeInBits()),
617 Operand: Val);
618
619 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
620 for (unsigned i = 0; i < NumParts; i += StepSize) {
621 unsigned ThisBits = StepSize * PartBits / 2;
622 EVT ThisVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ThisBits);
623 SDValue &Part0 = Parts[i];
624 SDValue &Part1 = Parts[i+StepSize/2];
625
626 Part1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL,
627 VT: ThisVT, N1: Part0, N2: DAG.getIntPtrConstant(Val: 1, DL));
628 Part0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL,
629 VT: ThisVT, N1: Part0, N2: DAG.getIntPtrConstant(Val: 0, DL));
630
631 if (ThisBits == PartBits && ThisVT != PartVT) {
632 Part0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Part0);
633 Part1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Part1);
634 }
635 }
636 }
637
638 if (DAG.getDataLayout().isBigEndian())
639 std::reverse(first: Parts, last: Parts + OrigNumParts);
640}
641
642static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
643 const SDLoc &DL, EVT PartVT) {
644 if (!PartVT.isVector())
645 return SDValue();
646
647 EVT ValueVT = Val.getValueType();
648 EVT PartEVT = PartVT.getVectorElementType();
649 EVT ValueEVT = ValueVT.getVectorElementType();
650 ElementCount PartNumElts = PartVT.getVectorElementCount();
651 ElementCount ValueNumElts = ValueVT.getVectorElementCount();
652
653 // We only support widening vectors with equivalent element types and
654 // fixed/scalable properties. If a target needs to widen a fixed-length type
655 // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
656 if (ElementCount::isKnownLE(LHS: PartNumElts, RHS: ValueNumElts) ||
657 PartNumElts.isScalable() != ValueNumElts.isScalable())
658 return SDValue();
659
660 // Have a try for bf16 because some targets share its ABI with fp16.
661 if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) {
662 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
663 "Cannot widen to illegal type");
664 Val = DAG.getNode(ISD::BITCAST, DL,
665 ValueVT.changeVectorElementType(MVT::EltVT: f16), Val);
666 } else if (PartEVT != ValueEVT) {
667 return SDValue();
668 }
669
670 // Widening a scalable vector to another scalable vector is done by inserting
671 // the vector into a larger undef one.
672 if (PartNumElts.isScalable())
673 return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT),
674 N2: Val, N3: DAG.getVectorIdxConstant(Val: 0, DL));
675
676 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
677 // undef elements.
678 SmallVector<SDValue, 16> Ops;
679 DAG.ExtractVectorElements(Op: Val, Args&: Ops);
680 SDValue EltUndef = DAG.getUNDEF(VT: PartEVT);
681 Ops.append(NumInputs: (PartNumElts - ValueNumElts).getFixedValue(), Elt: EltUndef);
682
683 // FIXME: Use CONCAT for 2x -> 4x.
684 return DAG.getBuildVector(VT: PartVT, DL, Ops);
685}
686
687/// getCopyToPartsVector - Create a series of nodes that contain the specified
688/// value split into legal parts.
689static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
690 SDValue Val, SDValue *Parts, unsigned NumParts,
691 MVT PartVT, const Value *V,
692 std::optional<CallingConv::ID> CallConv) {
693 EVT ValueVT = Val.getValueType();
694 assert(ValueVT.isVector() && "Not a vector");
695 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
696 const bool IsABIRegCopy = CallConv.has_value();
697
698 if (NumParts == 1) {
699 EVT PartEVT = PartVT;
700 if (PartEVT == ValueVT) {
701 // Nothing to do.
702 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
703 // Bitconvert vector->vector case.
704 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
705 } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
706 Val = Widened;
707 } else if (PartVT.isVector() &&
708 PartEVT.getVectorElementType().bitsGE(
709 VT: ValueVT.getVectorElementType()) &&
710 PartEVT.getVectorElementCount() ==
711 ValueVT.getVectorElementCount()) {
712
713 // Promoted vector extract
714 Val = DAG.getAnyExtOrTrunc(Op: Val, DL, VT: PartVT);
715 } else if (PartEVT.isVector() &&
716 PartEVT.getVectorElementType() !=
717 ValueVT.getVectorElementType() &&
718 TLI.getTypeAction(Context&: *DAG.getContext(), VT: ValueVT) ==
719 TargetLowering::TypeWidenVector) {
720 // Combination of widening and promotion.
721 EVT WidenVT =
722 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ValueVT.getVectorElementType(),
723 EC: PartVT.getVectorElementCount());
724 SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT: WidenVT);
725 Val = DAG.getAnyExtOrTrunc(Op: Widened, DL, VT: PartVT);
726 } else {
727 // Don't extract an integer from a float vector. This can happen if the
728 // FP type gets softened to integer and then promoted. The promotion
729 // prevents it from being picked up by the earlier bitcast case.
730 if (ValueVT.getVectorElementCount().isScalar() &&
731 (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) {
732 Val = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: PartVT, N1: Val,
733 N2: DAG.getVectorIdxConstant(Val: 0, DL));
734 } else {
735 uint64_t ValueSize = ValueVT.getFixedSizeInBits();
736 assert(PartVT.getFixedSizeInBits() > ValueSize &&
737 "lossy conversion of vector to scalar type");
738 EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize);
739 Val = DAG.getBitcast(VT: IntermediateType, V: Val);
740 Val = DAG.getAnyExtOrTrunc(Op: Val, DL, VT: PartVT);
741 }
742 }
743
744 assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
745 Parts[0] = Val;
746 return;
747 }
748
749 // Handle a multi-element vector.
750 EVT IntermediateVT;
751 MVT RegisterVT;
752 unsigned NumIntermediates;
753 unsigned NumRegs;
754 if (IsABIRegCopy) {
755 NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
756 Context&: *DAG.getContext(), CC: *CallConv, VT: ValueVT, IntermediateVT, NumIntermediates,
757 RegisterVT);
758 } else {
759 NumRegs =
760 TLI.getVectorTypeBreakdown(Context&: *DAG.getContext(), VT: ValueVT, IntermediateVT,
761 NumIntermediates, RegisterVT);
762 }
763
764 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
765 NumParts = NumRegs; // Silence a compiler warning.
766 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
767
768 assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
769 "Mixing scalable and fixed vectors when copying in parts");
770
771 std::optional<ElementCount> DestEltCnt;
772
773 if (IntermediateVT.isVector())
774 DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
775 else
776 DestEltCnt = ElementCount::getFixed(MinVal: NumIntermediates);
777
778 EVT BuiltVectorTy = EVT::getVectorVT(
779 Context&: *DAG.getContext(), VT: IntermediateVT.getScalarType(), EC: *DestEltCnt);
780
781 if (ValueVT == BuiltVectorTy) {
782 // Nothing to do.
783 } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
784 // Bitconvert vector->vector case.
785 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: BuiltVectorTy, Operand: Val);
786 } else {
787 if (BuiltVectorTy.getVectorElementType().bitsGT(
788 VT: ValueVT.getVectorElementType())) {
789 // Integer promotion.
790 ValueVT = EVT::getVectorVT(Context&: *DAG.getContext(),
791 VT: BuiltVectorTy.getVectorElementType(),
792 EC: ValueVT.getVectorElementCount());
793 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValueVT, Operand: Val);
794 }
795
796 if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT: BuiltVectorTy)) {
797 Val = Widened;
798 }
799 }
800
801 assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
802
803 // Split the vector into intermediate operands.
804 SmallVector<SDValue, 8> Ops(NumIntermediates);
805 for (unsigned i = 0; i != NumIntermediates; ++i) {
806 if (IntermediateVT.isVector()) {
807 // This does something sensible for scalable vectors - see the
808 // definition of EXTRACT_SUBVECTOR for further details.
809 unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
810 Ops[i] =
811 DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: IntermediateVT, N1: Val,
812 N2: DAG.getVectorIdxConstant(Val: i * IntermediateNumElts, DL));
813 } else {
814 Ops[i] = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IntermediateVT, N1: Val,
815 N2: DAG.getVectorIdxConstant(Val: i, DL));
816 }
817 }
818
819 // Split the intermediate operands into legal parts.
820 if (NumParts == NumIntermediates) {
821 // If the register was not expanded, promote or copy the value,
822 // as appropriate.
823 for (unsigned i = 0; i != NumParts; ++i)
824 getCopyToParts(DAG, DL, Val: Ops[i], Parts: &Parts[i], NumParts: 1, PartVT, V, CallConv);
825 } else if (NumParts > 0) {
826 // If the intermediate type was expanded, split each the value into
827 // legal parts.
828 assert(NumIntermediates != 0 && "division by zero");
829 assert(NumParts % NumIntermediates == 0 &&
830 "Must expand into a divisible number of parts!");
831 unsigned Factor = NumParts / NumIntermediates;
832 for (unsigned i = 0; i != NumIntermediates; ++i)
833 getCopyToParts(DAG, DL, Val: Ops[i], Parts: &Parts[i * Factor], NumParts: Factor, PartVT, V,
834 CallConv);
835 }
836}
837
838RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
839 EVT valuevt, std::optional<CallingConv::ID> CC)
840 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
841 RegCount(1, regs.size()), CallConv(CC) {}
842
843RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
844 const DataLayout &DL, unsigned Reg, Type *Ty,
845 std::optional<CallingConv::ID> CC) {
846 ComputeValueVTs(TLI, DL, Ty, ValueVTs);
847
848 CallConv = CC;
849
850 for (EVT ValueVT : ValueVTs) {
851 unsigned NumRegs =
852 isABIMangled()
853 ? TLI.getNumRegistersForCallingConv(Context, CC: *CC, VT: ValueVT)
854 : TLI.getNumRegisters(Context, VT: ValueVT);
855 MVT RegisterVT =
856 isABIMangled()
857 ? TLI.getRegisterTypeForCallingConv(Context, CC: *CC, VT: ValueVT)
858 : TLI.getRegisterType(Context, VT: ValueVT);
859 for (unsigned i = 0; i != NumRegs; ++i)
860 Regs.push_back(Elt: Reg + i);
861 RegVTs.push_back(Elt: RegisterVT);
862 RegCount.push_back(Elt: NumRegs);
863 Reg += NumRegs;
864 }
865}
866
867SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
868 FunctionLoweringInfo &FuncInfo,
869 const SDLoc &dl, SDValue &Chain,
870 SDValue *Glue, const Value *V) const {
871 // A Value with type {} or [0 x %t] needs no registers.
872 if (ValueVTs.empty())
873 return SDValue();
874
875 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
876
877 // Assemble the legal parts into the final values.
878 SmallVector<SDValue, 4> Values(ValueVTs.size());
879 SmallVector<SDValue, 8> Parts;
880 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
881 // Copy the legal parts from the registers.
882 EVT ValueVT = ValueVTs[Value];
883 unsigned NumRegs = RegCount[Value];
884 MVT RegisterVT = isABIMangled()
885 ? TLI.getRegisterTypeForCallingConv(
886 Context&: *DAG.getContext(), CC: *CallConv, VT: RegVTs[Value])
887 : RegVTs[Value];
888
889 Parts.resize(N: NumRegs);
890 for (unsigned i = 0; i != NumRegs; ++i) {
891 SDValue P;
892 if (!Glue) {
893 P = DAG.getCopyFromReg(Chain, dl, Reg: Regs[Part+i], VT: RegisterVT);
894 } else {
895 P = DAG.getCopyFromReg(Chain, dl, Reg: Regs[Part+i], VT: RegisterVT, Glue: *Glue);
896 *Glue = P.getValue(R: 2);
897 }
898
899 Chain = P.getValue(R: 1);
900 Parts[i] = P;
901
902 // If the source register was virtual and if we know something about it,
903 // add an assert node.
904 if (!Register::isVirtualRegister(Reg: Regs[Part + i]) ||
905 !RegisterVT.isInteger())
906 continue;
907
908 const FunctionLoweringInfo::LiveOutInfo *LOI =
909 FuncInfo.GetLiveOutRegInfo(Reg: Regs[Part+i]);
910 if (!LOI)
911 continue;
912
913 unsigned RegSize = RegisterVT.getScalarSizeInBits();
914 unsigned NumSignBits = LOI->NumSignBits;
915 unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
916
917 if (NumZeroBits == RegSize) {
918 // The current value is a zero.
919 // Explicitly express that as it would be easier for
920 // optimizations to kick in.
921 Parts[i] = DAG.getConstant(Val: 0, DL: dl, VT: RegisterVT);
922 continue;
923 }
924
925 // FIXME: We capture more information than the dag can represent. For
926 // now, just use the tightest assertzext/assertsext possible.
927 bool isSExt;
928 EVT FromVT(MVT::Other);
929 if (NumZeroBits) {
930 FromVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RegSize - NumZeroBits);
931 isSExt = false;
932 } else if (NumSignBits > 1) {
933 FromVT =
934 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RegSize - NumSignBits + 1);
935 isSExt = true;
936 } else {
937 continue;
938 }
939 // Add an assertion node.
940 assert(FromVT != MVT::Other);
941 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
942 RegisterVT, P, DAG.getValueType(FromVT));
943 }
944
945 Values[Value] = getCopyFromParts(DAG, DL: dl, Parts: Parts.begin(), NumParts: NumRegs,
946 PartVT: RegisterVT, ValueVT, V, InChain: Chain, CC: CallConv);
947 Part += NumRegs;
948 Parts.clear();
949 }
950
951 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values);
952}
953
954void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
955 const SDLoc &dl, SDValue &Chain, SDValue *Glue,
956 const Value *V,
957 ISD::NodeType PreferredExtendType) const {
958 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
959 ISD::NodeType ExtendKind = PreferredExtendType;
960
961 // Get the list of the values's legal parts.
962 unsigned NumRegs = Regs.size();
963 SmallVector<SDValue, 8> Parts(NumRegs);
964 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
965 unsigned NumParts = RegCount[Value];
966
967 MVT RegisterVT = isABIMangled()
968 ? TLI.getRegisterTypeForCallingConv(
969 Context&: *DAG.getContext(), CC: *CallConv, VT: RegVTs[Value])
970 : RegVTs[Value];
971
972 if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, VT2: RegisterVT))
973 ExtendKind = ISD::ZERO_EXTEND;
974
975 getCopyToParts(DAG, DL: dl, Val: Val.getValue(R: Val.getResNo() + Value), Parts: &Parts[Part],
976 NumParts, PartVT: RegisterVT, V, CallConv, ExtendKind);
977 Part += NumParts;
978 }
979
980 // Copy the parts into the registers.
981 SmallVector<SDValue, 8> Chains(NumRegs);
982 for (unsigned i = 0; i != NumRegs; ++i) {
983 SDValue Part;
984 if (!Glue) {
985 Part = DAG.getCopyToReg(Chain, dl, Reg: Regs[i], N: Parts[i]);
986 } else {
987 Part = DAG.getCopyToReg(Chain, dl, Reg: Regs[i], N: Parts[i], Glue: *Glue);
988 *Glue = Part.getValue(R: 1);
989 }
990
991 Chains[i] = Part.getValue(R: 0);
992 }
993
994 if (NumRegs == 1 || Glue)
995 // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is
996 // flagged to it. That is the CopyToReg nodes and the user are considered
997 // a single scheduling unit. If we create a TokenFactor and return it as
998 // chain, then the TokenFactor is both a predecessor (operand) of the
999 // user as well as a successor (the TF operands are flagged to the user).
1000 // c1, f1 = CopyToReg
1001 // c2, f2 = CopyToReg
1002 // c3 = TokenFactor c1, c2
1003 // ...
1004 // = op c3, ..., f2
1005 Chain = Chains[NumRegs-1];
1006 else
1007 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
1008}
1009
1010void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
1011 unsigned MatchingIdx, const SDLoc &dl,
1012 SelectionDAG &DAG,
1013 std::vector<SDValue> &Ops) const {
1014 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1015
1016 InlineAsm::Flag Flag(Code, Regs.size());
1017 if (HasMatching)
1018 Flag.setMatchingOp(MatchingIdx);
1019 else if (!Regs.empty() && Register::isVirtualRegister(Reg: Regs.front())) {
1020 // Put the register class of the virtual registers in the flag word. That
1021 // way, later passes can recompute register class constraints for inline
1022 // assembly as well as normal instructions.
1023 // Don't do this for tied operands that can use the regclass information
1024 // from the def.
1025 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1026 const TargetRegisterClass *RC = MRI.getRegClass(Reg: Regs.front());
1027 Flag.setRegClass(RC->getID());
1028 }
1029
1030 SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
1031 Ops.push_back(x: Res);
1032
1033 if (Code == InlineAsm::Kind::Clobber) {
1034 // Clobbers should always have a 1:1 mapping with registers, and may
1035 // reference registers that have illegal (e.g. vector) types. Hence, we
1036 // shouldn't try to apply any sort of splitting logic to them.
1037 assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
1038 "No 1:1 mapping from clobbers to regs?");
1039 Register SP = TLI.getStackPointerRegisterToSaveRestore();
1040 (void)SP;
1041 for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
1042 Ops.push_back(x: DAG.getRegister(Reg: Regs[I], VT: RegVTs[I]));
1043 assert(
1044 (Regs[I] != SP ||
1045 DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
1046 "If we clobbered the stack pointer, MFI should know about it.");
1047 }
1048 return;
1049 }
1050
1051 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
1052 MVT RegisterVT = RegVTs[Value];
1053 unsigned NumRegs = TLI.getNumRegisters(Context&: *DAG.getContext(), VT: ValueVTs[Value],
1054 RegisterVT);
1055 for (unsigned i = 0; i != NumRegs; ++i) {
1056 assert(Reg < Regs.size() && "Mismatch in # registers expected");
1057 unsigned TheReg = Regs[Reg++];
1058 Ops.push_back(x: DAG.getRegister(Reg: TheReg, VT: RegisterVT));
1059 }
1060 }
1061}
1062
1063SmallVector<std::pair<unsigned, TypeSize>, 4>
1064RegsForValue::getRegsAndSizes() const {
1065 SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
1066 unsigned I = 0;
1067 for (auto CountAndVT : zip_first(t: RegCount, u: RegVTs)) {
1068 unsigned RegCount = std::get<0>(t&: CountAndVT);
1069 MVT RegisterVT = std::get<1>(t&: CountAndVT);
1070 TypeSize RegisterSize = RegisterVT.getSizeInBits();
1071 for (unsigned E = I + RegCount; I != E; ++I)
1072 OutVec.push_back(Elt: std::make_pair(x: Regs[I], y&: RegisterSize));
1073 }
1074 return OutVec;
1075}
1076
1077void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
1078 AssumptionCache *ac,
1079 const TargetLibraryInfo *li) {
1080 AA = aa;
1081 AC = ac;
1082 GFI = gfi;
1083 LibInfo = li;
1084 Context = DAG.getContext();
1085 LPadToCallSiteMap.clear();
1086 SL->init(tli: DAG.getTargetLoweringInfo(), tm: TM, dl: DAG.getDataLayout());
1087 AssignmentTrackingEnabled = isAssignmentTrackingEnabled(
1088 M: *DAG.getMachineFunction().getFunction().getParent());
1089}
1090
1091void SelectionDAGBuilder::clear() {
1092 NodeMap.clear();
1093 UnusedArgNodeMap.clear();
1094 PendingLoads.clear();
1095 PendingExports.clear();
1096 PendingConstrainedFP.clear();
1097 PendingConstrainedFPStrict.clear();
1098 CurInst = nullptr;
1099 HasTailCall = false;
1100 SDNodeOrder = LowestSDNodeOrder;
1101 StatepointLowering.clear();
1102}
1103
1104void SelectionDAGBuilder::clearDanglingDebugInfo() {
1105 DanglingDebugInfoMap.clear();
1106}
1107
1108// Update DAG root to include dependencies on Pending chains.
1109SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
1110 SDValue Root = DAG.getRoot();
1111
1112 if (Pending.empty())
1113 return Root;
1114
1115 // Add current root to PendingChains, unless we already indirectly
1116 // depend on it.
1117 if (Root.getOpcode() != ISD::EntryToken) {
1118 unsigned i = 0, e = Pending.size();
1119 for (; i != e; ++i) {
1120 assert(Pending[i].getNode()->getNumOperands() > 1);
1121 if (Pending[i].getNode()->getOperand(Num: 0) == Root)
1122 break; // Don't add the root if we already indirectly depend on it.
1123 }
1124
1125 if (i == e)
1126 Pending.push_back(Elt: Root);
1127 }
1128
1129 if (Pending.size() == 1)
1130 Root = Pending[0];
1131 else
1132 Root = DAG.getTokenFactor(DL: getCurSDLoc(), Vals&: Pending);
1133
1134 DAG.setRoot(Root);
1135 Pending.clear();
1136 return Root;
1137}
1138
1139SDValue SelectionDAGBuilder::getMemoryRoot() {
1140 return updateRoot(Pending&: PendingLoads);
1141}
1142
1143SDValue SelectionDAGBuilder::getRoot() {
1144 // Chain up all pending constrained intrinsics together with all
1145 // pending loads, by simply appending them to PendingLoads and
1146 // then calling getMemoryRoot().
1147 PendingLoads.reserve(N: PendingLoads.size() +
1148 PendingConstrainedFP.size() +
1149 PendingConstrainedFPStrict.size());
1150 PendingLoads.append(in_start: PendingConstrainedFP.begin(),
1151 in_end: PendingConstrainedFP.end());
1152 PendingLoads.append(in_start: PendingConstrainedFPStrict.begin(),
1153 in_end: PendingConstrainedFPStrict.end());
1154 PendingConstrainedFP.clear();
1155 PendingConstrainedFPStrict.clear();
1156 return getMemoryRoot();
1157}
1158
1159SDValue SelectionDAGBuilder::getControlRoot() {
1160 // We need to emit pending fpexcept.strict constrained intrinsics,
1161 // so append them to the PendingExports list.
1162 PendingExports.append(in_start: PendingConstrainedFPStrict.begin(),
1163 in_end: PendingConstrainedFPStrict.end());
1164 PendingConstrainedFPStrict.clear();
1165 return updateRoot(Pending&: PendingExports);
1166}
1167
1168void SelectionDAGBuilder::handleDebugDeclare(Value *Address,
1169 DILocalVariable *Variable,
1170 DIExpression *Expression,
1171 DebugLoc DL) {
1172 assert(Variable && "Missing variable");
1173
1174 // Check if address has undef value.
1175 if (!Address || isa<UndefValue>(Val: Address) ||
1176 (Address->use_empty() && !isa<Argument>(Val: Address))) {
1177 LLVM_DEBUG(
1178 dbgs()
1179 << "dbg_declare: Dropping debug info (bad/undef/unused-arg address)\n");
1180 return;
1181 }
1182
1183 bool IsParameter = Variable->isParameter() || isa<Argument>(Val: Address);
1184
1185 SDValue &N = NodeMap[Address];
1186 if (!N.getNode() && isa<Argument>(Val: Address))
1187 // Check unused arguments map.
1188 N = UnusedArgNodeMap[Address];
1189 SDDbgValue *SDV;
1190 if (N.getNode()) {
1191 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Val: Address))
1192 Address = BCI->getOperand(i_nocapture: 0);
1193 // Parameters are handled specially.
1194 auto *FINode = dyn_cast<FrameIndexSDNode>(Val: N.getNode());
1195 if (IsParameter && FINode) {
1196 // Byval parameter. We have a frame index at this point.
1197 SDV = DAG.getFrameIndexDbgValue(Var: Variable, Expr: Expression, FI: FINode->getIndex(),
1198 /*IsIndirect*/ true, DL, O: SDNodeOrder);
1199 } else if (isa<Argument>(Val: Address)) {
1200 // Address is an argument, so try to emit its dbg value using
1201 // virtual register info from the FuncInfo.ValueMap.
1202 EmitFuncArgumentDbgValue(V: Address, Variable, Expr: Expression, DL,
1203 Kind: FuncArgumentDbgValueKind::Declare, N);
1204 return;
1205 } else {
1206 SDV = DAG.getDbgValue(Var: Variable, Expr: Expression, N: N.getNode(), R: N.getResNo(),
1207 IsIndirect: true, DL, O: SDNodeOrder);
1208 }
1209 DAG.AddDbgValue(DB: SDV, isParameter: IsParameter);
1210 } else {
1211 // If Address is an argument then try to emit its dbg value using
1212 // virtual register info from the FuncInfo.ValueMap.
1213 if (!EmitFuncArgumentDbgValue(V: Address, Variable, Expr: Expression, DL,
1214 Kind: FuncArgumentDbgValueKind::Declare, N)) {
1215 LLVM_DEBUG(dbgs() << "dbg_declare: Dropping debug info"
1216 << " (could not emit func-arg dbg_value)\n");
1217 }
1218 }
1219 return;
1220}
1221
1222void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) {
1223 // Add SDDbgValue nodes for any var locs here. Do so before updating
1224 // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
1225 if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
1226 // Add SDDbgValue nodes for any var locs here. Do so before updating
1227 // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
1228 for (auto It = FnVarLocs->locs_begin(Before: &I), End = FnVarLocs->locs_end(Before: &I);
1229 It != End; ++It) {
1230 auto *Var = FnVarLocs->getDILocalVariable(ID: It->VariableID);
1231 dropDanglingDebugInfo(Variable: Var, Expr: It->Expr);
1232 if (It->Values.isKillLocation(Expression: It->Expr)) {
1233 handleKillDebugValue(Var, Expr: It->Expr, DbgLoc: It->DL, Order: SDNodeOrder);
1234 continue;
1235 }
1236 SmallVector<Value *> Values(It->Values.location_ops());
1237 if (!handleDebugValue(Values, Var, Expr: It->Expr, DbgLoc: It->DL, Order: SDNodeOrder,
1238 IsVariadic: It->Values.hasArgList())) {
1239 SmallVector<Value *, 4> Vals;
1240 for (Value *V : It->Values.location_ops())
1241 Vals.push_back(Elt: V);
1242 addDanglingDebugInfo(Values&: Vals,
1243 Var: FnVarLocs->getDILocalVariable(ID: It->VariableID),
1244 Expr: It->Expr, IsVariadic: Vals.size() > 1, DL: It->DL, Order: SDNodeOrder);
1245 }
1246 }
1247 }
1248
1249 // We must skip DbgVariableRecords if they've already been processed above as
1250 // we have just emitted the debug values resulting from assignment tracking
1251 // analysis, making any existing DbgVariableRecords redundant (and probably
1252 // less correct). We still need to process DbgLabelRecords. This does sink
1253 // DbgLabelRecords to the bottom of the group of debug records. That sholdn't
1254 // be important as it does so deterministcally and ordering between
1255 // DbgLabelRecords and DbgVariableRecords is immaterial (other than for MIR/IR
1256 // printing).
1257 bool SkipDbgVariableRecords = DAG.getFunctionVarLocs();
1258 // Is there is any debug-info attached to this instruction, in the form of
1259 // DbgRecord non-instruction debug-info records.
1260 for (DbgRecord &DR : I.getDbgRecordRange()) {
1261 if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(Val: &DR)) {
1262 assert(DLR->getLabel() && "Missing label");
1263 SDDbgLabel *SDV =
1264 DAG.getDbgLabel(Label: DLR->getLabel(), DL: DLR->getDebugLoc(), O: SDNodeOrder);
1265 DAG.AddDbgLabel(DB: SDV);
1266 continue;
1267 }
1268
1269 if (SkipDbgVariableRecords)
1270 continue;
1271 DbgVariableRecord &DVR = cast<DbgVariableRecord>(Val&: DR);
1272 DILocalVariable *Variable = DVR.getVariable();
1273 DIExpression *Expression = DVR.getExpression();
1274 dropDanglingDebugInfo(Variable, Expr: Expression);
1275
1276 if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
1277 if (FuncInfo.PreprocessedDVRDeclares.contains(Ptr: &DVR))
1278 continue;
1279 LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DVR
1280 << "\n");
1281 handleDebugDeclare(Address: DVR.getVariableLocationOp(OpIdx: 0), Variable, Expression,
1282 DL: DVR.getDebugLoc());
1283 continue;
1284 }
1285
1286 // A DbgVariableRecord with no locations is a kill location.
1287 SmallVector<Value *, 4> Values(DVR.location_ops());
1288 if (Values.empty()) {
1289 handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DVR.getDebugLoc(),
1290 Order: SDNodeOrder);
1291 continue;
1292 }
1293
1294 // A DbgVariableRecord with an undef or absent location is also a kill
1295 // location.
1296 if (llvm::any_of(Range&: Values,
1297 P: [](Value *V) { return !V || isa<UndefValue>(Val: V); })) {
1298 handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DVR.getDebugLoc(),
1299 Order: SDNodeOrder);
1300 continue;
1301 }
1302
1303 bool IsVariadic = DVR.hasArgList();
1304 if (!handleDebugValue(Values, Var: Variable, Expr: Expression, DbgLoc: DVR.getDebugLoc(),
1305 Order: SDNodeOrder, IsVariadic)) {
1306 addDanglingDebugInfo(Values, Var: Variable, Expr: Expression, IsVariadic,
1307 DL: DVR.getDebugLoc(), Order: SDNodeOrder);
1308 }
1309 }
1310}
1311
1312void SelectionDAGBuilder::visit(const Instruction &I) {
1313 visitDbgInfo(I);
1314
1315 // Set up outgoing PHI node register values before emitting the terminator.
1316 if (I.isTerminator()) {
1317 HandlePHINodesInSuccessorBlocks(LLVMBB: I.getParent());
1318 }
1319
1320 // Increase the SDNodeOrder if dealing with a non-debug instruction.
1321 if (!isa<DbgInfoIntrinsic>(Val: I))
1322 ++SDNodeOrder;
1323
1324 CurInst = &I;
1325
1326 // Set inserted listener only if required.
1327 bool NodeInserted = false;
1328 std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener;
1329 MDNode *PCSectionsMD = I.getMetadata(KindID: LLVMContext::MD_pcsections);
1330 MDNode *MMRA = I.getMetadata(KindID: LLVMContext::MD_mmra);
1331 if (PCSectionsMD || MMRA) {
1332 InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>(
1333 args&: DAG, args: [&](SDNode *) { NodeInserted = true; });
1334 }
1335
1336 visit(Opcode: I.getOpcode(), I);
1337
1338 if (!I.isTerminator() && !HasTailCall &&
1339 !isa<GCStatepointInst>(Val: I)) // statepoints handle their exports internally
1340 CopyToExportRegsIfNeeded(V: &I);
1341
1342 // Handle metadata.
1343 if (PCSectionsMD || MMRA) {
1344 auto It = NodeMap.find(Val: &I);
1345 if (It != NodeMap.end()) {
1346 if (PCSectionsMD)
1347 DAG.addPCSections(Node: It->second.getNode(), MD: PCSectionsMD);
1348 if (MMRA)
1349 DAG.addMMRAMetadata(Node: It->second.getNode(), MMRA);
1350 } else if (NodeInserted) {
1351 // This should not happen; if it does, don't let it go unnoticed so we can
1352 // fix it. Relevant visit*() function is probably missing a setValue().
1353 errs() << "warning: loosing !pcsections and/or !mmra metadata ["
1354 << I.getModule()->getName() << "]\n";
1355 LLVM_DEBUG(I.dump());
1356 assert(false);
1357 }
1358 }
1359
1360 CurInst = nullptr;
1361}
1362
1363void SelectionDAGBuilder::visitPHI(const PHINode &) {
1364 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1365}
1366
1367void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1368 // Note: this doesn't use InstVisitor, because it has to work with
1369 // ConstantExpr's in addition to instructions.
1370 switch (Opcode) {
1371 default: llvm_unreachable("Unknown instruction type encountered!");
1372 // Build the switch statement using the Instruction.def file.
1373#define HANDLE_INST(NUM, OPCODE, CLASS) \
1374 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1375#include "llvm/IR/Instruction.def"
1376 }
1377}
1378
1379static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
1380 DILocalVariable *Variable,
1381 DebugLoc DL, unsigned Order,
1382 SmallVectorImpl<Value *> &Values,
1383 DIExpression *Expression) {
1384 // For variadic dbg_values we will now insert an undef.
1385 // FIXME: We can potentially recover these!
1386 SmallVector<SDDbgOperand, 2> Locs;
1387 for (const Value *V : Values) {
1388 auto *Undef = UndefValue::get(T: V->getType());
1389 Locs.push_back(Elt: SDDbgOperand::fromConst(Const: Undef));
1390 }
1391 SDDbgValue *SDV = DAG.getDbgValueList(Var: Variable, Expr: Expression, Locs, Dependencies: {},
1392 /*IsIndirect=*/false, DL, O: Order,
1393 /*IsVariadic=*/true);
1394 DAG.AddDbgValue(DB: SDV, /*isParameter=*/false);
1395 return true;
1396}
1397
1398void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl<Value *> &Values,
1399 DILocalVariable *Var,
1400 DIExpression *Expr,
1401 bool IsVariadic, DebugLoc DL,
1402 unsigned Order) {
1403 if (IsVariadic) {
1404 handleDanglingVariadicDebugInfo(DAG, Variable: Var, DL, Order, Values, Expression: Expr);
1405 return;
1406 }
1407 // TODO: Dangling debug info will eventually either be resolved or produce
1408 // an Undef DBG_VALUE. However in the resolution case, a gap may appear
1409 // between the original dbg.value location and its resolved DBG_VALUE,
1410 // which we should ideally fill with an extra Undef DBG_VALUE.
1411 assert(Values.size() == 1);
1412 DanglingDebugInfoMap[Values[0]].emplace_back(args&: Var, args&: Expr, args&: DL, args&: Order);
1413}
1414
1415void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
1416 const DIExpression *Expr) {
1417 auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
1418 DIVariable *DanglingVariable = DDI.getVariable();
1419 DIExpression *DanglingExpr = DDI.getExpression();
1420 if (DanglingVariable == Variable && Expr->fragmentsOverlap(Other: DanglingExpr)) {
1421 LLVM_DEBUG(dbgs() << "Dropping dangling debug info for "
1422 << printDDI(nullptr, DDI) << "\n");
1423 return true;
1424 }
1425 return false;
1426 };
1427
1428 for (auto &DDIMI : DanglingDebugInfoMap) {
1429 DanglingDebugInfoVector &DDIV = DDIMI.second;
1430
1431 // If debug info is to be dropped, run it through final checks to see
1432 // whether it can be salvaged.
1433 for (auto &DDI : DDIV)
1434 if (isMatchingDbgValue(DDI))
1435 salvageUnresolvedDbgValue(V: DDIMI.first, DDI);
1436
1437 erase_if(C&: DDIV, P: isMatchingDbgValue);
1438 }
1439}
1440
1441// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1442// generate the debug data structures now that we've seen its definition.
1443void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
1444 SDValue Val) {
1445 auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(Key: V);
1446 if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
1447 return;
1448
1449 DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
1450 for (auto &DDI : DDIV) {
1451 DebugLoc DL = DDI.getDebugLoc();
1452 unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1453 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1454 DILocalVariable *Variable = DDI.getVariable();
1455 DIExpression *Expr = DDI.getExpression();
1456 assert(Variable->isValidLocationForIntrinsic(DL) &&
1457 "Expected inlined-at fields to agree");
1458 SDDbgValue *SDV;
1459 if (Val.getNode()) {
1460 // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
1461 // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
1462 // we couldn't resolve it directly when examining the DbgValue intrinsic
1463 // in the first place we should not be more successful here). Unless we
1464 // have some test case that prove this to be correct we should avoid
1465 // calling EmitFuncArgumentDbgValue here.
1466 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
1467 Kind: FuncArgumentDbgValueKind::Value, N: Val)) {
1468 LLVM_DEBUG(dbgs() << "Resolve dangling debug info for "
1469 << printDDI(V, DDI) << "\n");
1470 LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1471 // Increase the SDNodeOrder for the DbgValue here to make sure it is
1472 // inserted after the definition of Val when emitting the instructions
1473 // after ISel. An alternative could be to teach
1474 // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1475 LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1476 << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1477 << ValSDNodeOrder << "\n");
1478 SDV = getDbgValue(N: Val, Variable, Expr, dl: DL,
1479 DbgSDNodeOrder: std::max(a: DbgSDNodeOrder, b: ValSDNodeOrder));
1480 DAG.AddDbgValue(DB: SDV, isParameter: false);
1481 } else
1482 LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
1483 << printDDI(V, DDI)
1484 << " in EmitFuncArgumentDbgValue\n");
1485 } else {
1486 LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI)
1487 << "\n");
1488 auto Undef = UndefValue::get(T: V->getType());
1489 auto SDV =
1490 DAG.getConstantDbgValue(Var: Variable, Expr, C: Undef, DL, O: DbgSDNodeOrder);
1491 DAG.AddDbgValue(DB: SDV, isParameter: false);
1492 }
1493 }
1494 DDIV.clear();
1495}
1496
1497void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V,
1498 DanglingDebugInfo &DDI) {
1499 // TODO: For the variadic implementation, instead of only checking the fail
1500 // state of `handleDebugValue`, we need know specifically which values were
1501 // invalid, so that we attempt to salvage only those values when processing
1502 // a DIArgList.
1503 const Value *OrigV = V;
1504 DILocalVariable *Var = DDI.getVariable();
1505 DIExpression *Expr = DDI.getExpression();
1506 DebugLoc DL = DDI.getDebugLoc();
1507 unsigned SDOrder = DDI.getSDNodeOrder();
1508
1509 // Currently we consider only dbg.value intrinsics -- we tell the salvager
1510 // that DW_OP_stack_value is desired.
1511 bool StackValue = true;
1512
1513 // Can this Value can be encoded without any further work?
1514 if (handleDebugValue(Values: V, Var, Expr, DbgLoc: DL, Order: SDOrder, /*IsVariadic=*/false))
1515 return;
1516
1517 // Attempt to salvage back through as many instructions as possible. Bail if
1518 // a non-instruction is seen, such as a constant expression or global
1519 // variable. FIXME: Further work could recover those too.
1520 while (isa<Instruction>(Val: V)) {
1521 const Instruction &VAsInst = *cast<const Instruction>(Val: V);
1522 // Temporary "0", awaiting real implementation.
1523 SmallVector<uint64_t, 16> Ops;
1524 SmallVector<Value *, 4> AdditionalValues;
1525 V = salvageDebugInfoImpl(I&: const_cast<Instruction &>(VAsInst),
1526 CurrentLocOps: Expr->getNumLocationOperands(), Ops,
1527 AdditionalValues);
1528 // If we cannot salvage any further, and haven't yet found a suitable debug
1529 // expression, bail out.
1530 if (!V)
1531 break;
1532
1533 // TODO: If AdditionalValues isn't empty, then the salvage can only be
1534 // represented with a DBG_VALUE_LIST, so we give up. When we have support
1535 // here for variadic dbg_values, remove that condition.
1536 if (!AdditionalValues.empty())
1537 break;
1538
1539 // New value and expr now represent this debuginfo.
1540 Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: 0, StackValue);
1541
1542 // Some kind of simplification occurred: check whether the operand of the
1543 // salvaged debug expression can be encoded in this DAG.
1544 if (handleDebugValue(Values: V, Var, Expr, DbgLoc: DL, Order: SDOrder, /*IsVariadic=*/false)) {
1545 LLVM_DEBUG(
1546 dbgs() << "Salvaged debug location info for:\n " << *Var << "\n"
1547 << *OrigV << "\nBy stripping back to:\n " << *V << "\n");
1548 return;
1549 }
1550 }
1551
1552 // This was the final opportunity to salvage this debug information, and it
1553 // couldn't be done. Place an undef DBG_VALUE at this location to terminate
1554 // any earlier variable location.
1555 assert(OrigV && "V shouldn't be null");
1556 auto *Undef = UndefValue::get(T: OrigV->getType());
1557 auto *SDV = DAG.getConstantDbgValue(Var, Expr, C: Undef, DL, O: SDNodeOrder);
1558 DAG.AddDbgValue(DB: SDV, isParameter: false);
1559 LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n "
1560 << printDDI(OrigV, DDI) << "\n");
1561}
1562
1563void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
1564 DIExpression *Expr,
1565 DebugLoc DbgLoc,
1566 unsigned Order) {
1567 Value *Poison = PoisonValue::get(T: Type::getInt1Ty(C&: *Context));
1568 DIExpression *NewExpr =
1569 const_cast<DIExpression *>(DIExpression::convertToUndefExpression(Expr));
1570 handleDebugValue(Values: Poison, Var, Expr: NewExpr, DbgLoc, Order,
1571 /*IsVariadic*/ false);
1572}
1573
1574bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
1575 DILocalVariable *Var,
1576 DIExpression *Expr, DebugLoc DbgLoc,
1577 unsigned Order, bool IsVariadic) {
1578 if (Values.empty())
1579 return true;
1580
1581 // Filter EntryValue locations out early.
1582 if (visitEntryValueDbgValue(Values, Variable: Var, Expr, DbgLoc))
1583 return true;
1584
1585 SmallVector<SDDbgOperand> LocationOps;
1586 SmallVector<SDNode *> Dependencies;
1587 for (const Value *V : Values) {
1588 // Constant value.
1589 if (isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) || isa<UndefValue>(Val: V) ||
1590 isa<ConstantPointerNull>(Val: V)) {
1591 LocationOps.emplace_back(Args: SDDbgOperand::fromConst(Const: V));
1592 continue;
1593 }
1594
1595 // Look through IntToPtr constants.
1596 if (auto *CE = dyn_cast<ConstantExpr>(Val: V))
1597 if (CE->getOpcode() == Instruction::IntToPtr) {
1598 LocationOps.emplace_back(Args: SDDbgOperand::fromConst(Const: CE->getOperand(i_nocapture: 0)));
1599 continue;
1600 }
1601
1602 // If the Value is a frame index, we can create a FrameIndex debug value
1603 // without relying on the DAG at all.
1604 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) {
1605 auto SI = FuncInfo.StaticAllocaMap.find(Val: AI);
1606 if (SI != FuncInfo.StaticAllocaMap.end()) {
1607 LocationOps.emplace_back(Args: SDDbgOperand::fromFrameIdx(FrameIdx: SI->second));
1608 continue;
1609 }
1610 }
1611
1612 // Do not use getValue() in here; we don't want to generate code at
1613 // this point if it hasn't been done yet.
1614 SDValue N = NodeMap[V];
1615 if (!N.getNode() && isa<Argument>(Val: V)) // Check unused arguments map.
1616 N = UnusedArgNodeMap[V];
1617 if (N.getNode()) {
1618 // Only emit func arg dbg value for non-variadic dbg.values for now.
1619 if (!IsVariadic &&
1620 EmitFuncArgumentDbgValue(V, Variable: Var, Expr, DL: DbgLoc,
1621 Kind: FuncArgumentDbgValueKind::Value, N))
1622 return true;
1623 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: N.getNode())) {
1624 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
1625 // describe stack slot locations.
1626 //
1627 // Consider "int x = 0; int *px = &x;". There are two kinds of
1628 // interesting debug values here after optimization:
1629 //
1630 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
1631 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
1632 //
1633 // Both describe the direct values of their associated variables.
1634 Dependencies.push_back(Elt: N.getNode());
1635 LocationOps.emplace_back(Args: SDDbgOperand::fromFrameIdx(FrameIdx: FISDN->getIndex()));
1636 continue;
1637 }
1638 LocationOps.emplace_back(
1639 Args: SDDbgOperand::fromNode(Node: N.getNode(), ResNo: N.getResNo()));
1640 continue;
1641 }
1642
1643 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1644 // Special rules apply for the first dbg.values of parameter variables in a
1645 // function. Identify them by the fact they reference Argument Values, that
1646 // they're parameters, and they are parameters of the current function. We
1647 // need to let them dangle until they get an SDNode.
1648 bool IsParamOfFunc =
1649 isa<Argument>(Val: V) && Var->isParameter() && !DbgLoc.getInlinedAt();
1650 if (IsParamOfFunc)
1651 return false;
1652
1653 // The value is not used in this block yet (or it would have an SDNode).
1654 // We still want the value to appear for the user if possible -- if it has
1655 // an associated VReg, we can refer to that instead.
1656 auto VMI = FuncInfo.ValueMap.find(Val: V);
1657 if (VMI != FuncInfo.ValueMap.end()) {
1658 unsigned Reg = VMI->second;
1659 // If this is a PHI node, it may be split up into several MI PHI nodes
1660 // (in FunctionLoweringInfo::set).
1661 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
1662 V->getType(), std::nullopt);
1663 if (RFV.occupiesMultipleRegs()) {
1664 // FIXME: We could potentially support variadic dbg_values here.
1665 if (IsVariadic)
1666 return false;
1667 unsigned Offset = 0;
1668 unsigned BitsToDescribe = 0;
1669 if (auto VarSize = Var->getSizeInBits())
1670 BitsToDescribe = *VarSize;
1671 if (auto Fragment = Expr->getFragmentInfo())
1672 BitsToDescribe = Fragment->SizeInBits;
1673 for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
1674 // Bail out if all bits are described already.
1675 if (Offset >= BitsToDescribe)
1676 break;
1677 // TODO: handle scalable vectors.
1678 unsigned RegisterSize = RegAndSize.second;
1679 unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
1680 ? BitsToDescribe - Offset
1681 : RegisterSize;
1682 auto FragmentExpr = DIExpression::createFragmentExpression(
1683 Expr, OffsetInBits: Offset, SizeInBits: FragmentSize);
1684 if (!FragmentExpr)
1685 continue;
1686 SDDbgValue *SDV = DAG.getVRegDbgValue(
1687 Var, Expr: *FragmentExpr, VReg: RegAndSize.first, IsIndirect: false, DL: DbgLoc, O: SDNodeOrder);
1688 DAG.AddDbgValue(DB: SDV, isParameter: false);
1689 Offset += RegisterSize;
1690 }
1691 return true;
1692 }
1693 // We can use simple vreg locations for variadic dbg_values as well.
1694 LocationOps.emplace_back(Args: SDDbgOperand::fromVReg(VReg: Reg));
1695 continue;
1696 }
1697 // We failed to create a SDDbgOperand for V.
1698 return false;
1699 }
1700
1701 // We have created a SDDbgOperand for each Value in Values.
1702 // Should use Order instead of SDNodeOrder?
1703 assert(!LocationOps.empty());
1704 SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, Locs: LocationOps, Dependencies,
1705 /*IsIndirect=*/false, DL: DbgLoc,
1706 O: SDNodeOrder, IsVariadic);
1707 DAG.AddDbgValue(DB: SDV, /*isParameter=*/false);
1708 return true;
1709}
1710
1711void SelectionDAGBuilder::resolveOrClearDbgInfo() {
1712 // Try to fixup any remaining dangling debug info -- and drop it if we can't.
1713 for (auto &Pair : DanglingDebugInfoMap)
1714 for (auto &DDI : Pair.second)
1715 salvageUnresolvedDbgValue(V: const_cast<Value *>(Pair.first), DDI);
1716 clearDanglingDebugInfo();
1717}
1718
1719/// getCopyFromRegs - If there was virtual register allocated for the value V
1720/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1721SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
1722 DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(Val: V);
1723 SDValue Result;
1724
1725 if (It != FuncInfo.ValueMap.end()) {
1726 Register InReg = It->second;
1727
1728 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1729 DAG.getDataLayout(), InReg, Ty,
1730 std::nullopt); // This is not an ABI copy.
1731 SDValue Chain = DAG.getEntryNode();
1732 Result = RFV.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr,
1733 V);
1734 resolveDanglingDebugInfo(V, Val: Result);
1735 }
1736
1737 return Result;
1738}
1739
1740/// getValue - Return an SDValue for the given Value.
1741SDValue SelectionDAGBuilder::getValue(const Value *V) {
1742 // If we already have an SDValue for this value, use it. It's important
1743 // to do this first, so that we don't create a CopyFromReg if we already
1744 // have a regular SDValue.
1745 SDValue &N = NodeMap[V];
1746 if (N.getNode()) return N;
1747
1748 // If there's a virtual register allocated and initialized for this
1749 // value, use it.
1750 if (SDValue copyFromReg = getCopyFromRegs(V, Ty: V->getType()))
1751 return copyFromReg;
1752
1753 // Otherwise create a new SDValue and remember it.
1754 SDValue Val = getValueImpl(V);
1755 NodeMap[V] = Val;
1756 resolveDanglingDebugInfo(V, Val);
1757 return Val;
1758}
1759
1760/// getNonRegisterValue - Return an SDValue for the given Value, but
1761/// don't look in FuncInfo.ValueMap for a virtual register.
1762SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1763 // If we already have an SDValue for this value, use it.
1764 SDValue &N = NodeMap[V];
1765 if (N.getNode()) {
1766 if (isIntOrFPConstant(V: N)) {
1767 // Remove the debug location from the node as the node is about to be used
1768 // in a location which may differ from the original debug location. This
1769 // is relevant to Constant and ConstantFP nodes because they can appear
1770 // as constant expressions inside PHI nodes.
1771 N->setDebugLoc(DebugLoc());
1772 }
1773 return N;
1774 }
1775
1776 // Otherwise create a new SDValue and remember it.
1777 SDValue Val = getValueImpl(V);
1778 NodeMap[V] = Val;
1779 resolveDanglingDebugInfo(V, Val);
1780 return Val;
1781}
1782
1783/// getValueImpl - Helper function for getValue and getNonRegisterValue.
1784/// Create an SDValue for the given value.
1785SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1786 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1787
1788 if (const Constant *C = dyn_cast<Constant>(Val: V)) {
1789 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: V->getType(), AllowUnknown: true);
1790
1791 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
1792 return DAG.getConstant(Val: *CI, DL: getCurSDLoc(), VT);
1793
1794 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
1795 return DAG.getGlobalAddress(GV, DL: getCurSDLoc(), VT);
1796
1797 if (isa<ConstantPointerNull>(Val: C)) {
1798 unsigned AS = V->getType()->getPointerAddressSpace();
1799 return DAG.getConstant(Val: 0, DL: getCurSDLoc(),
1800 VT: TLI.getPointerTy(DL: DAG.getDataLayout(), AS));
1801 }
1802
1803 if (match(V: C, P: m_VScale()))
1804 return DAG.getVScale(DL: getCurSDLoc(), VT, MulImm: APInt(VT.getSizeInBits(), 1));
1805
1806 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
1807 return DAG.getConstantFP(V: *CFP, DL: getCurSDLoc(), VT);
1808
1809 if (isa<UndefValue>(Val: C) && !V->getType()->isAggregateType())
1810 return DAG.getUNDEF(VT);
1811
1812 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
1813 visit(Opcode: CE->getOpcode(), I: *CE);
1814 SDValue N1 = NodeMap[V];
1815 assert(N1.getNode() && "visit didn't populate the NodeMap!");
1816 return N1;
1817 }
1818
1819 if (isa<ConstantStruct>(Val: C) || isa<ConstantArray>(Val: C)) {
1820 SmallVector<SDValue, 4> Constants;
1821 for (const Use &U : C->operands()) {
1822 SDNode *Val = getValue(V: U).getNode();
1823 // If the operand is an empty aggregate, there are no values.
1824 if (!Val) continue;
1825 // Add each leaf value from the operand to the Constants list
1826 // to form a flattened list of all the values.
1827 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1828 Constants.push_back(Elt: SDValue(Val, i));
1829 }
1830
1831 return DAG.getMergeValues(Ops: Constants, dl: getCurSDLoc());
1832 }
1833
1834 if (const ConstantDataSequential *CDS =
1835 dyn_cast<ConstantDataSequential>(Val: C)) {
1836 SmallVector<SDValue, 4> Ops;
1837 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1838 SDNode *Val = getValue(V: CDS->getElementAsConstant(i)).getNode();
1839 // Add each leaf value from the operand to the Constants list
1840 // to form a flattened list of all the values.
1841 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1842 Ops.push_back(Elt: SDValue(Val, i));
1843 }
1844
1845 if (isa<ArrayType>(Val: CDS->getType()))
1846 return DAG.getMergeValues(Ops, dl: getCurSDLoc());
1847 return NodeMap[V] = DAG.getBuildVector(VT, DL: getCurSDLoc(), Ops);
1848 }
1849
1850 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1851 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1852 "Unknown struct or array constant!");
1853
1854 SmallVector<EVT, 4> ValueVTs;
1855 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: C->getType(), ValueVTs);
1856 unsigned NumElts = ValueVTs.size();
1857 if (NumElts == 0)
1858 return SDValue(); // empty struct
1859 SmallVector<SDValue, 4> Constants(NumElts);
1860 for (unsigned i = 0; i != NumElts; ++i) {
1861 EVT EltVT = ValueVTs[i];
1862 if (isa<UndefValue>(Val: C))
1863 Constants[i] = DAG.getUNDEF(VT: EltVT);
1864 else if (EltVT.isFloatingPoint())
1865 Constants[i] = DAG.getConstantFP(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1866 else
1867 Constants[i] = DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1868 }
1869
1870 return DAG.getMergeValues(Ops: Constants, dl: getCurSDLoc());
1871 }
1872
1873 if (const BlockAddress *BA = dyn_cast<BlockAddress>(Val: C))
1874 return DAG.getBlockAddress(BA, VT);
1875
1876 if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(Val: C))
1877 return getValue(V: Equiv->getGlobalValue());
1878
1879 if (const auto *NC = dyn_cast<NoCFIValue>(Val: C))
1880 return getValue(V: NC->getGlobalValue());
1881
1882 if (VT == MVT::aarch64svcount) {
1883 assert(C->isNullValue() && "Can only zero this target type!");
1884 return DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT,
1885 DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1));
1886 }
1887
1888 VectorType *VecTy = cast<VectorType>(Val: V->getType());
1889
1890 // Now that we know the number and type of the elements, get that number of
1891 // elements into the Ops array based on what kind of constant it is.
1892 if (const ConstantVector *CV = dyn_cast<ConstantVector>(Val: C)) {
1893 SmallVector<SDValue, 16> Ops;
1894 unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1895 for (unsigned i = 0; i != NumElements; ++i)
1896 Ops.push_back(Elt: getValue(V: CV->getOperand(i_nocapture: i)));
1897
1898 return NodeMap[V] = DAG.getBuildVector(VT, DL: getCurSDLoc(), Ops);
1899 }
1900
1901 if (isa<ConstantAggregateZero>(Val: C)) {
1902 EVT EltVT =
1903 TLI.getValueType(DL: DAG.getDataLayout(), Ty: VecTy->getElementType());
1904
1905 SDValue Op;
1906 if (EltVT.isFloatingPoint())
1907 Op = DAG.getConstantFP(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1908 else
1909 Op = DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1910
1911 return NodeMap[V] = DAG.getSplat(VT, DL: getCurSDLoc(), Op);
1912 }
1913
1914 llvm_unreachable("Unknown vector constant");
1915 }
1916
1917 // If this is a static alloca, generate it as the frameindex instead of
1918 // computation.
1919 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) {
1920 DenseMap<const AllocaInst*, int>::iterator SI =
1921 FuncInfo.StaticAllocaMap.find(Val: AI);
1922 if (SI != FuncInfo.StaticAllocaMap.end())
1923 return DAG.getFrameIndex(
1924 FI: SI->second, VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: AI->getType()));
1925 }
1926
1927 // If this is an instruction which fast-isel has deferred, select it now.
1928 if (const Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
1929 Register InReg = FuncInfo.InitializeRegForValue(V: Inst);
1930
1931 RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1932 Inst->getType(), std::nullopt);
1933 SDValue Chain = DAG.getEntryNode();
1934 return RFV.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, V);
1935 }
1936
1937 if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(Val: V))
1938 return DAG.getMDNode(MD: cast<MDNode>(Val: MD->getMetadata()));
1939
1940 if (const auto *BB = dyn_cast<BasicBlock>(Val: V))
1941 return DAG.getBasicBlock(MBB: FuncInfo.MBBMap[BB]);
1942
1943 llvm_unreachable("Can't get register for value!");
1944}
1945
1946void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1947 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
1948 bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1949 bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1950 bool IsSEH = isAsynchronousEHPersonality(Pers);
1951 MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1952 if (!IsSEH)
1953 CatchPadMBB->setIsEHScopeEntry();
1954 // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1955 if (IsMSVCCXX || IsCoreCLR)
1956 CatchPadMBB->setIsEHFuncletEntry();
1957}
1958
1959void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1960 // Update machine-CFG edge.
1961 MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1962 FuncInfo.MBB->addSuccessor(Succ: TargetMBB);
1963 TargetMBB->setIsEHCatchretTarget(true);
1964 DAG.getMachineFunction().setHasEHCatchret(true);
1965
1966 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
1967 bool IsSEH = isAsynchronousEHPersonality(Pers);
1968 if (IsSEH) {
1969 // If this is not a fall-through branch or optimizations are switched off,
1970 // emit the branch.
1971 if (TargetMBB != NextBlock(MBB: FuncInfo.MBB) ||
1972 TM.getOptLevel() == CodeGenOptLevel::None)
1973 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1974 getControlRoot(), DAG.getBasicBlock(MBB: TargetMBB)));
1975 return;
1976 }
1977
1978 // Figure out the funclet membership for the catchret's successor.
1979 // This will be used by the FuncletLayout pass to determine how to order the
1980 // BB's.
1981 // A 'catchret' returns to the outer scope's color.
1982 Value *ParentPad = I.getCatchSwitchParentPad();
1983 const BasicBlock *SuccessorColor;
1984 if (isa<ConstantTokenNone>(Val: ParentPad))
1985 SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1986 else
1987 SuccessorColor = cast<Instruction>(Val: ParentPad)->getParent();
1988 assert(SuccessorColor && "No parent funclet for catchret!");
1989 MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1990 assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1991
1992 // Create the terminator node.
1993 SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1994 getControlRoot(), DAG.getBasicBlock(MBB: TargetMBB),
1995 DAG.getBasicBlock(MBB: SuccessorColorMBB));
1996 DAG.setRoot(Ret);
1997}
1998
1999void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
2000 // Don't emit any special code for the cleanuppad instruction. It just marks
2001 // the start of an EH scope/funclet.
2002 FuncInfo.MBB->setIsEHScopeEntry();
2003 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
2004 if (Pers != EHPersonality::Wasm_CXX) {
2005 FuncInfo.MBB->setIsEHFuncletEntry();
2006 FuncInfo.MBB->setIsCleanupFuncletEntry();
2007 }
2008}
2009
2010// In wasm EH, even though a catchpad may not catch an exception if a tag does
2011// not match, it is OK to add only the first unwind destination catchpad to the
2012// successors, because there will be at least one invoke instruction within the
2013// catch scope that points to the next unwind destination, if one exists, so
2014// CFGSort cannot mess up with BB sorting order.
2015// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
2016// call within them, and catchpads only consisting of 'catch (...)' have a
2017// '__cxa_end_catch' call within them, both of which generate invokes in case
2018// the next unwind destination exists, i.e., the next unwind destination is not
2019// the caller.)
2020//
2021// Having at most one EH pad successor is also simpler and helps later
2022// transformations.
2023//
2024// For example,
2025// current:
2026// invoke void @foo to ... unwind label %catch.dispatch
2027// catch.dispatch:
2028// %0 = catchswitch within ... [label %catch.start] unwind label %next
2029// catch.start:
2030// ...
2031// ... in this BB or some other child BB dominated by this BB there will be an
2032// invoke that points to 'next' BB as an unwind destination
2033//
2034// next: ; We don't need to add this to 'current' BB's successor
2035// ...
2036static void findWasmUnwindDestinations(
2037 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
2038 BranchProbability Prob,
2039 SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
2040 &UnwindDests) {
2041 while (EHPadBB) {
2042 const Instruction *Pad = EHPadBB->getFirstNonPHI();
2043 if (isa<CleanupPadInst>(Val: Pad)) {
2044 // Stop on cleanup pads.
2045 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob);
2046 UnwindDests.back().first->setIsEHScopeEntry();
2047 break;
2048 } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: Pad)) {
2049 // Add the catchpad handlers to the possible destinations. We don't
2050 // continue to the unwind destination of the catchswitch for wasm.
2051 for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
2052 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[CatchPadBB], Args&: Prob);
2053 UnwindDests.back().first->setIsEHScopeEntry();
2054 }
2055 break;
2056 } else {
2057 continue;
2058 }
2059 }
2060}
2061
2062/// When an invoke or a cleanupret unwinds to the next EH pad, there are
2063/// many places it could ultimately go. In the IR, we have a single unwind
2064/// destination, but in the machine CFG, we enumerate all the possible blocks.
2065/// This function skips over imaginary basic blocks that hold catchswitch
2066/// instructions, and finds all the "real" machine
2067/// basic block destinations. As those destinations may not be successors of
2068/// EHPadBB, here we also calculate the edge probability to those destinations.
2069/// The passed-in Prob is the edge probability to EHPadBB.
2070static void findUnwindDestinations(
2071 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
2072 BranchProbability Prob,
2073 SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
2074 &UnwindDests) {
2075 EHPersonality Personality =
2076 classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
2077 bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
2078 bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
2079 bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
2080 bool IsSEH = isAsynchronousEHPersonality(Pers: Personality);
2081
2082 if (IsWasmCXX) {
2083 findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
2084 assert(UnwindDests.size() <= 1 &&
2085 "There should be at most one unwind destination for wasm");
2086 return;
2087 }
2088
2089 while (EHPadBB) {
2090 const Instruction *Pad = EHPadBB->getFirstNonPHI();
2091 BasicBlock *NewEHPadBB = nullptr;
2092 if (isa<LandingPadInst>(Val: Pad)) {
2093 // Stop on landingpads. They are not funclets.
2094 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob);
2095 break;
2096 } else if (isa<CleanupPadInst>(Val: Pad)) {
2097 // Stop on cleanup pads. Cleanups are always funclet entries for all known
2098 // personalities.
2099 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob);
2100 UnwindDests.back().first->setIsEHScopeEntry();
2101 UnwindDests.back().first->setIsEHFuncletEntry();
2102 break;
2103 } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: Pad)) {
2104 // Add the catchpad handlers to the possible destinations.
2105 for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
2106 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[CatchPadBB], Args&: Prob);
2107 // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
2108 if (IsMSVCCXX || IsCoreCLR)
2109 UnwindDests.back().first->setIsEHFuncletEntry();
2110 if (!IsSEH)
2111 UnwindDests.back().first->setIsEHScopeEntry();
2112 }
2113 NewEHPadBB = CatchSwitch->getUnwindDest();
2114 } else {
2115 continue;
2116 }
2117
2118 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2119 if (BPI && NewEHPadBB)
2120 Prob *= BPI->getEdgeProbability(Src: EHPadBB, Dst: NewEHPadBB);
2121 EHPadBB = NewEHPadBB;
2122 }
2123}
2124
2125void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
2126 // Update successor info.
2127 SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2128 auto UnwindDest = I.getUnwindDest();
2129 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2130 BranchProbability UnwindDestProb =
2131 (BPI && UnwindDest)
2132 ? BPI->getEdgeProbability(Src: FuncInfo.MBB->getBasicBlock(), Dst: UnwindDest)
2133 : BranchProbability::getZero();
2134 findUnwindDestinations(FuncInfo, EHPadBB: UnwindDest, Prob: UnwindDestProb, UnwindDests);
2135 for (auto &UnwindDest : UnwindDests) {
2136 UnwindDest.first->setIsEHPad();
2137 addSuccessorWithProb(Src: FuncInfo.MBB, Dst: UnwindDest.first, Prob: UnwindDest.second);
2138 }
2139 FuncInfo.MBB->normalizeSuccProbs();
2140
2141 // Create the terminator node.
2142 SDValue Ret =
2143 DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
2144 DAG.setRoot(Ret);
2145}
2146
2147void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
2148 report_fatal_error(reason: "visitCatchSwitch not yet implemented!");
2149}
2150
2151void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
2152 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2153 auto &DL = DAG.getDataLayout();
2154 SDValue Chain = getControlRoot();
2155 SmallVector<ISD::OutputArg, 8> Outs;
2156 SmallVector<SDValue, 8> OutVals;
2157
2158 // Calls to @llvm.experimental.deoptimize don't generate a return value, so
2159 // lower
2160 //
2161 // %val = call <ty> @llvm.experimental.deoptimize()
2162 // ret <ty> %val
2163 //
2164 // differently.
2165 if (I.getParent()->getTerminatingDeoptimizeCall()) {
2166 LowerDeoptimizingReturn();
2167 return;
2168 }
2169
2170 if (!FuncInfo.CanLowerReturn) {
2171 unsigned DemoteReg = FuncInfo.DemoteRegister;
2172 const Function *F = I.getParent()->getParent();
2173
2174 // Emit a store of the return value through the virtual register.
2175 // Leave Outs empty so that LowerReturn won't try to load return
2176 // registers the usual way.
2177 SmallVector<EVT, 1> PtrValueVTs;
2178 ComputeValueVTs(TLI, DL,
2179 Ty: PointerType::get(C&: F->getContext(),
2180 AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()),
2181 ValueVTs&: PtrValueVTs);
2182
2183 SDValue RetPtr =
2184 DAG.getCopyFromReg(Chain, dl: getCurSDLoc(), Reg: DemoteReg, VT: PtrValueVTs[0]);
2185 SDValue RetOp = getValue(V: I.getOperand(i_nocapture: 0));
2186
2187 SmallVector<EVT, 4> ValueVTs, MemVTs;
2188 SmallVector<uint64_t, 4> Offsets;
2189 ComputeValueVTs(TLI, DL, Ty: I.getOperand(i_nocapture: 0)->getType(), ValueVTs, MemVTs: &MemVTs,
2190 FixedOffsets: &Offsets, StartingOffset: 0);
2191 unsigned NumValues = ValueVTs.size();
2192
2193 SmallVector<SDValue, 4> Chains(NumValues);
2194 Align BaseAlign = DL.getPrefTypeAlign(Ty: I.getOperand(i_nocapture: 0)->getType());
2195 for (unsigned i = 0; i != NumValues; ++i) {
2196 // An aggregate return value cannot wrap around the address space, so
2197 // offsets to its parts don't wrap either.
2198 SDValue Ptr = DAG.getObjectPtrOffset(SL: getCurSDLoc(), Ptr: RetPtr,
2199 Offset: TypeSize::getFixed(ExactSize: Offsets[i]));
2200
2201 SDValue Val = RetOp.getValue(R: RetOp.getResNo() + i);
2202 if (MemVTs[i] != ValueVTs[i])
2203 Val = DAG.getPtrExtOrTrunc(Op: Val, DL: getCurSDLoc(), VT: MemVTs[i]);
2204 Chains[i] = DAG.getStore(
2205 Chain, dl: getCurSDLoc(), Val,
2206 // FIXME: better loc info would be nice.
2207 Ptr, PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()),
2208 Alignment: commonAlignment(A: BaseAlign, Offset: Offsets[i]));
2209 }
2210
2211 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
2212 MVT::Other, Chains);
2213 } else if (I.getNumOperands() != 0) {
2214 SmallVector<EVT, 4> ValueVTs;
2215 ComputeValueVTs(TLI, DL, Ty: I.getOperand(i_nocapture: 0)->getType(), ValueVTs);
2216 unsigned NumValues = ValueVTs.size();
2217 if (NumValues) {
2218 SDValue RetOp = getValue(V: I.getOperand(i_nocapture: 0));
2219
2220 const Function *F = I.getParent()->getParent();
2221
2222 bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
2223 Ty: I.getOperand(i_nocapture: 0)->getType(), CallConv: F->getCallingConv(),
2224 /*IsVarArg*/ isVarArg: false, DL);
2225
2226 ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
2227 if (F->getAttributes().hasRetAttr(Attribute::SExt))
2228 ExtendKind = ISD::SIGN_EXTEND;
2229 else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
2230 ExtendKind = ISD::ZERO_EXTEND;
2231
2232 LLVMContext &Context = F->getContext();
2233 bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
2234
2235 for (unsigned j = 0; j != NumValues; ++j) {
2236 EVT VT = ValueVTs[j];
2237
2238 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
2239 VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
2240
2241 CallingConv::ID CC = F->getCallingConv();
2242
2243 unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
2244 MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
2245 SmallVector<SDValue, 4> Parts(NumParts);
2246 getCopyToParts(DAG, DL: getCurSDLoc(),
2247 Val: SDValue(RetOp.getNode(), RetOp.getResNo() + j),
2248 Parts: &Parts[0], NumParts, PartVT, V: &I, CallConv: CC, ExtendKind);
2249
2250 // 'inreg' on function refers to return value
2251 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
2252 if (RetInReg)
2253 Flags.setInReg();
2254
2255 if (I.getOperand(i_nocapture: 0)->getType()->isPointerTy()) {
2256 Flags.setPointer();
2257 Flags.setPointerAddrSpace(
2258 cast<PointerType>(Val: I.getOperand(i_nocapture: 0)->getType())->getAddressSpace());
2259 }
2260
2261 if (NeedsRegBlock) {
2262 Flags.setInConsecutiveRegs();
2263 if (j == NumValues - 1)
2264 Flags.setInConsecutiveRegsLast();
2265 }
2266
2267 // Propagate extension type if any
2268 if (ExtendKind == ISD::SIGN_EXTEND)
2269 Flags.setSExt();
2270 else if (ExtendKind == ISD::ZERO_EXTEND)
2271 Flags.setZExt();
2272
2273 for (unsigned i = 0; i < NumParts; ++i) {
2274 Outs.push_back(Elt: ISD::OutputArg(Flags,
2275 Parts[i].getValueType().getSimpleVT(),
2276 VT, /*isfixed=*/true, 0, 0));
2277 OutVals.push_back(Elt: Parts[i]);
2278 }
2279 }
2280 }
2281 }
2282
2283 // Push in swifterror virtual register as the last element of Outs. This makes
2284 // sure swifterror virtual register will be returned in the swifterror
2285 // physical register.
2286 const Function *F = I.getParent()->getParent();
2287 if (TLI.supportSwiftError() &&
2288 F->getAttributes().hasAttrSomewhere(Attribute::Kind: SwiftError)) {
2289 assert(SwiftError.getFunctionArg() && "Need a swift error argument");
2290 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
2291 Flags.setSwiftError();
2292 Outs.push_back(Elt: ISD::OutputArg(
2293 Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
2294 /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
2295 // Create SDNode for the swifterror virtual register.
2296 OutVals.push_back(
2297 Elt: DAG.getRegister(Reg: SwiftError.getOrCreateVRegUseAt(
2298 &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
2299 VT: EVT(TLI.getPointerTy(DL))));
2300 }
2301
2302 bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
2303 CallingConv::ID CallConv =
2304 DAG.getMachineFunction().getFunction().getCallingConv();
2305 Chain = DAG.getTargetLoweringInfo().LowerReturn(
2306 Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
2307
2308 // Verify that the target's LowerReturn behaved as expected.
2309 assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
2310 "LowerReturn didn't return a valid chain!");
2311
2312 // Update the DAG with the new chain value resulting from return lowering.
2313 DAG.setRoot(Chain);
2314}
2315
2316/// CopyToExportRegsIfNeeded - If the given value has virtual registers
2317/// created for it, emit nodes to copy the value into the virtual
2318/// registers.
2319void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
2320 // Skip empty types
2321 if (V->getType()->isEmptyTy())
2322 return;
2323
2324 DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(Val: V);
2325 if (VMI != FuncInfo.ValueMap.end()) {
2326 assert((!V->use_empty() || isa<CallBrInst>(V)) &&
2327 "Unused value assigned virtual registers!");
2328 CopyValueToVirtualRegister(V, Reg: VMI->second);
2329 }
2330}
2331
2332/// ExportFromCurrentBlock - If this condition isn't known to be exported from
2333/// the current basic block, add it to ValueMap now so that we'll get a
2334/// CopyTo/FromReg.
2335void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
2336 // No need to export constants.
2337 if (!isa<Instruction>(Val: V) && !isa<Argument>(Val: V)) return;
2338
2339 // Already exported?
2340 if (FuncInfo.isExportedInst(V)) return;
2341
2342 Register Reg = FuncInfo.InitializeRegForValue(V);
2343 CopyValueToVirtualRegister(V, Reg);
2344}
2345
2346bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
2347 const BasicBlock *FromBB) {
2348 // The operands of the setcc have to be in this block. We don't know
2349 // how to export them from some other block.
2350 if (const Instruction *VI = dyn_cast<Instruction>(Val: V)) {
2351 // Can export from current BB.
2352 if (VI->getParent() == FromBB)
2353 return true;
2354
2355 // Is already exported, noop.
2356 return FuncInfo.isExportedInst(V);
2357 }
2358
2359 // If this is an argument, we can export it if the BB is the entry block or
2360 // if it is already exported.
2361 if (isa<Argument>(Val: V)) {
2362 if (FromBB->isEntryBlock())
2363 return true;
2364
2365 // Otherwise, can only export this if it is already exported.
2366 return FuncInfo.isExportedInst(V);
2367 }
2368
2369 // Otherwise, constants can always be exported.
2370 return true;
2371}
2372
2373/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
2374BranchProbability
2375SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
2376 const MachineBasicBlock *Dst) const {
2377 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2378 const BasicBlock *SrcBB = Src->getBasicBlock();
2379 const BasicBlock *DstBB = Dst->getBasicBlock();
2380 if (!BPI) {
2381 // If BPI is not available, set the default probability as 1 / N, where N is
2382 // the number of successors.
2383 auto SuccSize = std::max<uint32_t>(a: succ_size(BB: SrcBB), b: 1);
2384 return BranchProbability(1, SuccSize);
2385 }
2386 return BPI->getEdgeProbability(Src: SrcBB, Dst: DstBB);
2387}
2388
2389void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
2390 MachineBasicBlock *Dst,
2391 BranchProbability Prob) {
2392 if (!FuncInfo.BPI)
2393 Src->addSuccessorWithoutProb(Succ: Dst);
2394 else {
2395 if (Prob.isUnknown())
2396 Prob = getEdgeProbability(Src, Dst);
2397 Src->addSuccessor(Succ: Dst, Prob);
2398 }
2399}
2400
2401static bool InBlock(const Value *V, const BasicBlock *BB) {
2402 if (const Instruction *I = dyn_cast<Instruction>(Val: V))
2403 return I->getParent() == BB;
2404 return true;
2405}
2406
2407/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
2408/// This function emits a branch and is used at the leaves of an OR or an
2409/// AND operator tree.
2410void
2411SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
2412 MachineBasicBlock *TBB,
2413 MachineBasicBlock *FBB,
2414 MachineBasicBlock *CurBB,
2415 MachineBasicBlock *SwitchBB,
2416 BranchProbability TProb,
2417 BranchProbability FProb,
2418 bool InvertCond) {
2419 const BasicBlock *BB = CurBB->getBasicBlock();
2420
2421 // If the leaf of the tree is a comparison, merge the condition into
2422 // the caseblock.
2423 if (const CmpInst *BOp = dyn_cast<CmpInst>(Val: Cond)) {
2424 // The operands of the cmp have to be in this block. We don't know
2425 // how to export them from some other block. If this is the first block
2426 // of the sequence, no exporting is needed.
2427 if (CurBB == SwitchBB ||
2428 (isExportableFromCurrentBlock(V: BOp->getOperand(i_nocapture: 0), FromBB: BB) &&
2429 isExportableFromCurrentBlock(V: BOp->getOperand(i_nocapture: 1), FromBB: BB))) {
2430 ISD::CondCode Condition;
2431 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Val: Cond)) {
2432 ICmpInst::Predicate Pred =
2433 InvertCond ? IC->getInversePredicate() : IC->getPredicate();
2434 Condition = getICmpCondCode(Pred);
2435 } else {
2436 const FCmpInst *FC = cast<FCmpInst>(Val: Cond);
2437 FCmpInst::Predicate Pred =
2438 InvertCond ? FC->getInversePredicate() : FC->getPredicate();
2439 Condition = getFCmpCondCode(Pred);
2440 if (TM.Options.NoNaNsFPMath)
2441 Condition = getFCmpCodeWithoutNaN(CC: Condition);
2442 }
2443
2444 CaseBlock CB(Condition, BOp->getOperand(i_nocapture: 0), BOp->getOperand(i_nocapture: 1), nullptr,
2445 TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2446 SL->SwitchCases.push_back(x: CB);
2447 return;
2448 }
2449 }
2450
2451 // Create a CaseBlock record representing this branch.
2452 ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
2453 CaseBlock CB(Opc, Cond, ConstantInt::getTrue(Context&: *DAG.getContext()),
2454 nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2455 SL->SwitchCases.push_back(x: CB);
2456}
2457
2458// Collect dependencies on V recursively. This is used for the cost analysis in
2459// `shouldKeepJumpConditionsTogether`.
2460static bool collectInstructionDeps(
2461 SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V,
2462 SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr,
2463 unsigned Depth = 0) {
2464 // Return false if we have an incomplete count.
2465 if (Depth >= SelectionDAG::MaxRecursionDepth)
2466 return false;
2467
2468 auto *I = dyn_cast<Instruction>(Val: V);
2469 if (I == nullptr)
2470 return true;
2471
2472 if (Necessary != nullptr) {
2473 // This instruction is necessary for the other side of the condition so
2474 // don't count it.
2475 if (Necessary->contains(Key: I))
2476 return true;
2477 }
2478
2479 // Already added this dep.
2480 if (!Deps->try_emplace(Key: I, Args: false).second)
2481 return true;
2482
2483 for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
2484 if (!collectInstructionDeps(Deps, V: I->getOperand(i: OpIdx), Necessary,
2485 Depth: Depth + 1))
2486 return false;
2487 return true;
2488}
2489
2490bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
2491 const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
2492 Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
2493 TargetLoweringBase::CondMergingParams Params) const {
2494 if (I.getNumSuccessors() != 2)
2495 return false;
2496
2497 if (!I.isConditional())
2498 return false;
2499
2500 if (Params.BaseCost < 0)
2501 return false;
2502
2503 // Baseline cost.
2504 InstructionCost CostThresh = Params.BaseCost;
2505
2506 BranchProbabilityInfo *BPI = nullptr;
2507 if (Params.LikelyBias || Params.UnlikelyBias)
2508 BPI = FuncInfo.BPI;
2509 if (BPI != nullptr) {
2510 // See if we are either likely to get an early out or compute both lhs/rhs
2511 // of the condition.
2512 BasicBlock *IfFalse = I.getSuccessor(i: 0);
2513 BasicBlock *IfTrue = I.getSuccessor(i: 1);
2514
2515 std::optional<bool> Likely;
2516 if (BPI->isEdgeHot(Src: I.getParent(), Dst: IfTrue))
2517 Likely = true;
2518 else if (BPI->isEdgeHot(Src: I.getParent(), Dst: IfFalse))
2519 Likely = false;
2520
2521 if (Likely) {
2522 if (Opc == (*Likely ? Instruction::And : Instruction::Or))
2523 // Its likely we will have to compute both lhs and rhs of condition
2524 CostThresh += Params.LikelyBias;
2525 else {
2526 if (Params.UnlikelyBias < 0)
2527 return false;
2528 // Its likely we will get an early out.
2529 CostThresh -= Params.UnlikelyBias;
2530 }
2531 }
2532 }
2533
2534 if (CostThresh <= 0)
2535 return false;
2536
2537 // Collect "all" instructions that lhs condition is dependent on.
2538 // Use map for stable iteration (to avoid non-determanism of iteration of
2539 // SmallPtrSet). The `bool` value is just a dummy.
2540 SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps;
2541 collectInstructionDeps(Deps: &LhsDeps, V: Lhs);
2542 // Collect "all" instructions that rhs condition is dependent on AND are
2543 // dependencies of lhs. This gives us an estimate on which instructions we
2544 // stand to save by splitting the condition.
2545 if (!collectInstructionDeps(Deps: &RhsDeps, V: Rhs, Necessary: &LhsDeps))
2546 return false;
2547 // Add the compare instruction itself unless its a dependency on the LHS.
2548 if (const auto *RhsI = dyn_cast<Instruction>(Val: Rhs))
2549 if (!LhsDeps.contains(Key: RhsI))
2550 RhsDeps.try_emplace(Key: RhsI, Args: false);
2551
2552 const auto &TLI = DAG.getTargetLoweringInfo();
2553 const auto &TTI =
2554 TLI.getTargetMachine().getTargetTransformInfo(F: *I.getFunction());
2555
2556 InstructionCost CostOfIncluding = 0;
2557 // See if this instruction will need to computed independently of whether RHS
2558 // is.
2559 Value *BrCond = I.getCondition();
2560 auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) {
2561 for (const auto *U : Ins->users()) {
2562 // If user is independent of RHS calculation we don't need to count it.
2563 if (auto *UIns = dyn_cast<Instruction>(Val: U))
2564 if (UIns != BrCond && !RhsDeps.contains(Key: UIns))
2565 return false;
2566 }
2567 return true;
2568 };
2569
2570 // Prune instructions from RHS Deps that are dependencies of unrelated
2571 // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
2572 // arbitrary and just meant to cap the how much time we spend in the pruning
2573 // loop. Its highly unlikely to come into affect.
2574 const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
2575 // Stop after a certain point. No incorrectness from including too many
2576 // instructions.
2577 for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
2578 const Instruction *ToDrop = nullptr;
2579 for (const auto &InsPair : RhsDeps) {
2580 if (!ShouldCountInsn(InsPair.first)) {
2581 ToDrop = InsPair.first;
2582 break;
2583 }
2584 }
2585 if (ToDrop == nullptr)
2586 break;
2587 RhsDeps.erase(Key: ToDrop);
2588 }
2589
2590 for (const auto &InsPair : RhsDeps) {
2591 // Finally accumulate latency that we can only attribute to computing the
2592 // RHS condition. Use latency because we are essentially trying to calculate
2593 // the cost of the dependency chain.
2594 // Possible TODO: We could try to estimate ILP and make this more precise.
2595 CostOfIncluding +=
2596 TTI.getInstructionCost(U: InsPair.first, CostKind: TargetTransformInfo::TCK_Latency);
2597
2598 if (CostOfIncluding > CostThresh)
2599 return false;
2600 }
2601 return true;
2602}
2603
2604void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
2605 MachineBasicBlock *TBB,
2606 MachineBasicBlock *FBB,
2607 MachineBasicBlock *CurBB,
2608 MachineBasicBlock *SwitchBB,
2609 Instruction::BinaryOps Opc,
2610 BranchProbability TProb,
2611 BranchProbability FProb,
2612 bool InvertCond) {
2613 // Skip over not part of the tree and remember to invert op and operands at
2614 // next level.
2615 Value *NotCond;
2616 if (match(V: Cond, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: NotCond)))) &&
2617 InBlock(V: NotCond, BB: CurBB->getBasicBlock())) {
2618 FindMergedConditions(Cond: NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
2619 InvertCond: !InvertCond);
2620 return;
2621 }
2622
2623 const Instruction *BOp = dyn_cast<Instruction>(Val: Cond);
2624 const Value *BOpOp0, *BOpOp1;
2625 // Compute the effective opcode for Cond, taking into account whether it needs
2626 // to be inverted, e.g.
2627 // and (not (or A, B)), C
2628 // gets lowered as
2629 // and (and (not A, not B), C)
2630 Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
2631 if (BOp) {
2632 BOpc = match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1)))
2633 ? Instruction::And
2634 : (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1)))
2635 ? Instruction::Or
2636 : (Instruction::BinaryOps)0);
2637 if (InvertCond) {
2638 if (BOpc == Instruction::And)
2639 BOpc = Instruction::Or;
2640 else if (BOpc == Instruction::Or)
2641 BOpc = Instruction::And;
2642 }
2643 }
2644
2645 // If this node is not part of the or/and tree, emit it as a branch.
2646 // Note that all nodes in the tree should have same opcode.
2647 bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
2648 if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
2649 !InBlock(V: BOpOp0, BB: CurBB->getBasicBlock()) ||
2650 !InBlock(V: BOpOp1, BB: CurBB->getBasicBlock())) {
2651 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
2652 TProb, FProb, InvertCond);
2653 return;
2654 }
2655
2656 // Create TmpBB after CurBB.
2657 MachineFunction::iterator BBI(CurBB);
2658 MachineFunction &MF = DAG.getMachineFunction();
2659 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(BB: CurBB->getBasicBlock());
2660 CurBB->getParent()->insert(MBBI: ++BBI, MBB: TmpBB);
2661
2662 if (Opc == Instruction::Or) {
2663 // Codegen X | Y as:
2664 // BB1:
2665 // jmp_if_X TBB
2666 // jmp TmpBB
2667 // TmpBB:
2668 // jmp_if_Y TBB
2669 // jmp FBB
2670 //
2671
2672 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2673 // The requirement is that
2674 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
2675 // = TrueProb for original BB.
2676 // Assuming the original probabilities are A and B, one choice is to set
2677 // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
2678 // A/(1+B) and 2B/(1+B). This choice assumes that
2679 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
2680 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
2681 // TmpBB, but the math is more complicated.
2682
2683 auto NewTrueProb = TProb / 2;
2684 auto NewFalseProb = TProb / 2 + FProb;
2685 // Emit the LHS condition.
2686 FindMergedConditions(Cond: BOpOp0, TBB, FBB: TmpBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb,
2687 FProb: NewFalseProb, InvertCond);
2688
2689 // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
2690 SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
2691 BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end());
2692 // Emit the RHS condition into TmpBB.
2693 FindMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs[0],
2694 FProb: Probs[1], InvertCond);
2695 } else {
2696 assert(Opc == Instruction::And && "Unknown merge op!");
2697 // Codegen X & Y as:
2698 // BB1:
2699 // jmp_if_X TmpBB
2700 // jmp FBB
2701 // TmpBB:
2702 // jmp_if_Y TBB
2703 // jmp FBB
2704 //
2705 // This requires creation of TmpBB after CurBB.
2706
2707 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2708 // The requirement is that
2709 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
2710 // = FalseProb for original BB.
2711 // Assuming the original probabilities are A and B, one choice is to set
2712 // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
2713 // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
2714 // TrueProb for BB1 * FalseProb for TmpBB.
2715
2716 auto NewTrueProb = TProb + FProb / 2;
2717 auto NewFalseProb = FProb / 2;
2718 // Emit the LHS condition.
2719 FindMergedConditions(Cond: BOpOp0, TBB: TmpBB, FBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb,
2720 FProb: NewFalseProb, InvertCond);
2721
2722 // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
2723 SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
2724 BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end());
2725 // Emit the RHS condition into TmpBB.
2726 FindMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs[0],
2727 FProb: Probs[1], InvertCond);
2728 }
2729}
2730
2731/// If the set of cases should be emitted as a series of branches, return true.
2732/// If we should emit this as a bunch of and/or'd together conditions, return
2733/// false.
2734bool
2735SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
2736 if (Cases.size() != 2) return true;
2737
2738 // If this is two comparisons of the same values or'd or and'd together, they
2739 // will get folded into a single comparison, so don't emit two blocks.
2740 if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
2741 Cases[0].CmpRHS == Cases[1].CmpRHS) ||
2742 (Cases[0].CmpRHS == Cases[1].CmpLHS &&
2743 Cases[0].CmpLHS == Cases[1].CmpRHS)) {
2744 return false;
2745 }
2746
2747 // Handle: (X != null) | (Y != null) --> (X|Y) != 0
2748 // Handle: (X == null) & (Y == null) --> (X|Y) == 0
2749 if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
2750 Cases[0].CC == Cases[1].CC &&
2751 isa<Constant>(Val: Cases[0].CmpRHS) &&
2752 cast<Constant>(Val: Cases[0].CmpRHS)->isNullValue()) {
2753 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
2754 return false;
2755 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
2756 return false;
2757 }
2758
2759 return true;
2760}
2761
2762void SelectionDAGBuilder::visitBr(const BranchInst &I) {
2763 MachineBasicBlock *BrMBB = FuncInfo.MBB;
2764
2765 // Update machine-CFG edges.
2766 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(i: 0)];
2767
2768 if (I.isUnconditional()) {
2769 // Update machine-CFG edges.
2770 BrMBB->addSuccessor(Succ: Succ0MBB);
2771
2772 // If this is not a fall-through branch or optimizations are switched off,
2773 // emit the branch.
2774 if (Succ0MBB != NextBlock(MBB: BrMBB) ||
2775 TM.getOptLevel() == CodeGenOptLevel::None) {
2776 auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
2777 getControlRoot(), DAG.getBasicBlock(Succ0MBB));
2778 setValue(V: &I, NewN: Br);
2779 DAG.setRoot(Br);
2780 }
2781
2782 return;
2783 }
2784
2785 // If this condition is one of the special cases we handle, do special stuff
2786 // now.
2787 const Value *CondVal = I.getCondition();
2788 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(i: 1)];
2789
2790 // If this is a series of conditions that are or'd or and'd together, emit
2791 // this as a sequence of branches instead of setcc's with and/or operations.
2792 // As long as jumps are not expensive (exceptions for multi-use logic ops,
2793 // unpredictable branches, and vector extracts because those jumps are likely
2794 // expensive for any target), this should improve performance.
2795 // For example, instead of something like:
2796 // cmp A, B
2797 // C = seteq
2798 // cmp D, E
2799 // F = setle
2800 // or C, F
2801 // jnz foo
2802 // Emit:
2803 // cmp A, B
2804 // je foo
2805 // cmp D, E
2806 // jle foo
2807 const Instruction *BOp = dyn_cast<Instruction>(Val: CondVal);
2808 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
2809 BOp->hasOneUse() && !I.hasMetadata(KindID: LLVMContext::MD_unpredictable)) {
2810 Value *Vec;
2811 const Value *BOp0, *BOp1;
2812 Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
2813 if (match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1))))
2814 Opcode = Instruction::And;
2815 else if (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1))))
2816 Opcode = Instruction::Or;
2817
2818 if (Opcode &&
2819 !(match(V: BOp0, P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_Value())) &&
2820 match(V: BOp1, P: m_ExtractElt(Val: m_Specific(V: Vec), Idx: m_Value()))) &&
2821 !shouldKeepJumpConditionsTogether(
2822 FuncInfo, I, Opc: Opcode, Lhs: BOp0, Rhs: BOp1,
2823 Params: DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
2824 Opcode, BOp0, BOp1))) {
2825 FindMergedConditions(Cond: BOp, TBB: Succ0MBB, FBB: Succ1MBB, CurBB: BrMBB, SwitchBB: BrMBB, Opc: Opcode,
2826 TProb: getEdgeProbability(Src: BrMBB, Dst: Succ0MBB),
2827 FProb: getEdgeProbability(Src: BrMBB, Dst: Succ1MBB),
2828 /*InvertCond=*/false);
2829 // If the compares in later blocks need to use values not currently
2830 // exported from this block, export them now. This block should always
2831 // be the first entry.
2832 assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2833
2834 // Allow some cases to be rejected.
2835 if (ShouldEmitAsBranches(Cases: SL->SwitchCases)) {
2836 for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
2837 ExportFromCurrentBlock(V: SL->SwitchCases[i].CmpLHS);
2838 ExportFromCurrentBlock(V: SL->SwitchCases[i].CmpRHS);
2839 }
2840
2841 // Emit the branch for this block.
2842 visitSwitchCase(CB&: SL->SwitchCases[0], SwitchBB: BrMBB);
2843 SL->SwitchCases.erase(position: SL->SwitchCases.begin());
2844 return;
2845 }
2846
2847 // Okay, we decided not to do this, remove any inserted MBB's and clear
2848 // SwitchCases.
2849 for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
2850 FuncInfo.MF->erase(MBBI: SL->SwitchCases[i].ThisBB);
2851
2852 SL->SwitchCases.clear();
2853 }
2854 }
2855
2856 // Create a CaseBlock record representing this branch.
2857 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(Context&: *DAG.getContext()),
2858 nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2859
2860 // Use visitSwitchCase to actually insert the fast branch sequence for this
2861 // cond branch.
2862 visitSwitchCase(CB, SwitchBB: BrMBB);
2863}
2864
2865/// visitSwitchCase - Emits the necessary code to represent a single node in
2866/// the binary search tree resulting from lowering a switch instruction.
2867void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
2868 MachineBasicBlock *SwitchBB) {
2869 SDValue Cond;
2870 SDValue CondLHS = getValue(V: CB.CmpLHS);
2871 SDLoc dl = CB.DL;
2872
2873 if (CB.CC == ISD::SETTRUE) {
2874 // Branch or fall through to TrueBB.
2875 addSuccessorWithProb(Src: SwitchBB, Dst: CB.TrueBB, Prob: CB.TrueProb);
2876 SwitchBB->normalizeSuccProbs();
2877 if (CB.TrueBB != NextBlock(MBB: SwitchBB)) {
2878 DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
2879 DAG.getBasicBlock(MBB: CB.TrueBB)));
2880 }
2881 return;
2882 }
2883
2884 auto &TLI = DAG.getTargetLoweringInfo();
2885 EVT MemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: CB.CmpLHS->getType());
2886
2887 // Build the setcc now.
2888 if (!CB.CmpMHS) {
2889 // Fold "(X == true)" to X and "(X == false)" to !X to
2890 // handle common cases produced by branch lowering.
2891 if (CB.CmpRHS == ConstantInt::getTrue(Context&: *DAG.getContext()) &&
2892 CB.CC == ISD::SETEQ)
2893 Cond = CondLHS;
2894 else if (CB.CmpRHS == ConstantInt::getFalse(Context&: *DAG.getContext()) &&
2895 CB.CC == ISD::SETEQ) {
2896 SDValue True = DAG.getConstant(Val: 1, DL: dl, VT: CondLHS.getValueType());
2897 Cond = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CondLHS.getValueType(), N1: CondLHS, N2: True);
2898 } else {
2899 SDValue CondRHS = getValue(V: CB.CmpRHS);
2900
2901 // If a pointer's DAG type is larger than its memory type then the DAG
2902 // values are zero-extended. This breaks signed comparisons so truncate
2903 // back to the underlying type before doing the compare.
2904 if (CondLHS.getValueType() != MemVT) {
2905 CondLHS = DAG.getPtrExtOrTrunc(Op: CondLHS, DL: getCurSDLoc(), VT: MemVT);
2906 CondRHS = DAG.getPtrExtOrTrunc(Op: CondRHS, DL: getCurSDLoc(), VT: MemVT);
2907 }
2908 Cond = DAG.getSetCC(DL: dl, MVT::VT: i1, LHS: CondLHS, RHS: CondRHS, Cond: CB.CC);
2909 }
2910 } else {
2911 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2912
2913 const APInt& Low = cast<ConstantInt>(Val: CB.CmpLHS)->getValue();
2914 const APInt& High = cast<ConstantInt>(Val: CB.CmpRHS)->getValue();
2915
2916 SDValue CmpOp = getValue(V: CB.CmpMHS);
2917 EVT VT = CmpOp.getValueType();
2918
2919 if (cast<ConstantInt>(Val: CB.CmpLHS)->isMinValue(IsSigned: true)) {
2920 Cond = DAG.getSetCC(DL: dl, MVT::VT: i1, LHS: CmpOp, RHS: DAG.getConstant(Val: High, DL: dl, VT),
2921 Cond: ISD::SETLE);
2922 } else {
2923 SDValue SUB = DAG.getNode(Opcode: ISD::SUB, DL: dl,
2924 VT, N1: CmpOp, N2: DAG.getConstant(Val: Low, DL: dl, VT));
2925 Cond = DAG.getSetCC(DL: dl, MVT::VT: i1, LHS: SUB,
2926 RHS: DAG.getConstant(Val: High-Low, DL: dl, VT), Cond: ISD::SETULE);
2927 }
2928 }
2929
2930 // Update successor info
2931 addSuccessorWithProb(Src: SwitchBB, Dst: CB.TrueBB, Prob: CB.TrueProb);
2932 // TrueBB and FalseBB are always different unless the incoming IR is
2933 // degenerate. This only happens when running llc on weird IR.
2934 if (CB.TrueBB != CB.FalseBB)
2935 addSuccessorWithProb(Src: SwitchBB, Dst: CB.FalseBB, Prob: CB.FalseProb);
2936 SwitchBB->normalizeSuccProbs();
2937
2938 // If the lhs block is the next block, invert the condition so that we can
2939 // fall through to the lhs instead of the rhs block.
2940 if (CB.TrueBB == NextBlock(MBB: SwitchBB)) {
2941 std::swap(a&: CB.TrueBB, b&: CB.FalseBB);
2942 SDValue True = DAG.getConstant(Val: 1, DL: dl, VT: Cond.getValueType());
2943 Cond = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: Cond.getValueType(), N1: Cond, N2: True);
2944 }
2945
2946 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2947 MVT::Other, getControlRoot(), Cond,
2948 DAG.getBasicBlock(MBB: CB.TrueBB));
2949
2950 setValue(V: CurInst, NewN: BrCond);
2951
2952 // Insert the false branch. Do this even if it's a fall through branch,
2953 // this makes it easier to do DAG optimizations which require inverting
2954 // the branch condition.
2955 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2956 DAG.getBasicBlock(MBB: CB.FalseBB));
2957
2958 DAG.setRoot(BrCond);
2959}
2960
2961/// visitJumpTable - Emit JumpTable node in the current MBB
2962void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
2963 // Emit the code for the jump table
2964 assert(JT.SL && "Should set SDLoc for SelectionDAG!");
2965 assert(JT.Reg != -1U && "Should lower JT Header first!");
2966 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
2967 SDValue Index = DAG.getCopyFromReg(Chain: getControlRoot(), dl: *JT.SL, Reg: JT.Reg, VT: PTy);
2968 SDValue Table = DAG.getJumpTable(JTI: JT.JTI, VT: PTy);
2969 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other,
2970 Index.getValue(R: 1), Table, Index);
2971 DAG.setRoot(BrJumpTable);
2972}
2973
2974/// visitJumpTableHeader - This function emits necessary code to produce index
2975/// in the JumpTable from switch case.
2976void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
2977 JumpTableHeader &JTH,
2978 MachineBasicBlock *SwitchBB) {
2979 assert(JT.SL && "Should set SDLoc for SelectionDAG!");
2980 const SDLoc &dl = *JT.SL;
2981
2982 // Subtract the lowest switch case value from the value being switched on.
2983 SDValue SwitchOp = getValue(V: JTH.SValue);
2984 EVT VT = SwitchOp.getValueType();
2985 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: SwitchOp,
2986 N2: DAG.getConstant(Val: JTH.First, DL: dl, VT));
2987
2988 // The SDNode we just created, which holds the value being switched on minus
2989 // the smallest case value, needs to be copied to a virtual register so it
2990 // can be used as an index into the jump table in a subsequent basic block.
2991 // This value may be smaller or larger than the target's pointer type, and
2992 // therefore require extension or truncating.
2993 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2994 SwitchOp = DAG.getZExtOrTrunc(Op: Sub, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2995
2996 unsigned JumpTableReg =
2997 FuncInfo.CreateReg(VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2998 SDValue CopyTo = DAG.getCopyToReg(Chain: getControlRoot(), dl,
2999 Reg: JumpTableReg, N: SwitchOp);
3000 JT.Reg = JumpTableReg;
3001
3002 if (!JTH.FallthroughUnreachable) {
3003 // Emit the range check for the jump table, and branch to the default block
3004 // for the switch statement if the value being switched on exceeds the
3005 // largest case in the switch.
3006 SDValue CMP = DAG.getSetCC(
3007 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
3008 VT: Sub.getValueType()),
3009 LHS: Sub, RHS: DAG.getConstant(Val: JTH.Last - JTH.First, DL: dl, VT), Cond: ISD::SETUGT);
3010
3011 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
3012 MVT::Other, CopyTo, CMP,
3013 DAG.getBasicBlock(MBB: JT.Default));
3014
3015 // Avoid emitting unnecessary branches to the next block.
3016 if (JT.MBB != NextBlock(MBB: SwitchBB))
3017 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
3018 DAG.getBasicBlock(MBB: JT.MBB));
3019
3020 DAG.setRoot(BrCond);
3021 } else {
3022 // Avoid emitting unnecessary branches to the next block.
3023 if (JT.MBB != NextBlock(MBB: SwitchBB))
3024 DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
3025 DAG.getBasicBlock(MBB: JT.MBB)));
3026 else
3027 DAG.setRoot(CopyTo);
3028 }
3029}
3030
3031/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
3032/// variable if there exists one.
3033static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
3034 SDValue &Chain) {
3035 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3036 EVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout());
3037 EVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout());
3038 MachineFunction &MF = DAG.getMachineFunction();
3039 Value *Global = TLI.getSDagStackGuard(M: *MF.getFunction().getParent());
3040 MachineSDNode *Node =
3041 DAG.getMachineNode(Opcode: TargetOpcode::LOAD_STACK_GUARD, dl: DL, VT: PtrTy, Op1: Chain);
3042 if (Global) {
3043 MachinePointerInfo MPInfo(Global);
3044 auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
3045 MachineMemOperand::MODereferenceable;
3046 MachineMemOperand *MemRef = MF.getMachineMemOperand(
3047 PtrInfo: MPInfo, F: Flags, Size: LocationSize::precise(Value: PtrTy.getSizeInBits() / 8),
3048 BaseAlignment: DAG.getEVTAlign(MemoryVT: PtrTy));
3049 DAG.setNodeMemRefs(N: Node, NewMemRefs: {MemRef});
3050 }
3051 if (PtrTy != PtrMemTy)
3052 return DAG.getPtrExtOrTrunc(Op: SDValue(Node, 0), DL, VT: PtrMemTy);
3053 return SDValue(Node, 0);
3054}
3055
3056/// Codegen a new tail for a stack protector check ParentMBB which has had its
3057/// tail spliced into a stack protector check success bb.
3058///
3059/// For a high level explanation of how this fits into the stack protector
3060/// generation see the comment on the declaration of class
3061/// StackProtectorDescriptor.
3062void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
3063 MachineBasicBlock *ParentBB) {
3064
3065 // First create the loads to the guard/stack slot for the comparison.
3066 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3067 EVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout());
3068 EVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout());
3069
3070 MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
3071 int FI = MFI.getStackProtectorIndex();
3072
3073 SDValue Guard;
3074 SDLoc dl = getCurSDLoc();
3075 SDValue StackSlotPtr = DAG.getFrameIndex(FI, VT: PtrTy);
3076 const Module &M = *ParentBB->getParent()->getFunction().getParent();
3077 Align Align =
3078 DAG.getDataLayout().getPrefTypeAlign(Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
3079
3080 // Generate code to load the content of the guard slot.
3081 SDValue GuardVal = DAG.getLoad(
3082 VT: PtrMemTy, dl, Chain: DAG.getEntryNode(), Ptr: StackSlotPtr,
3083 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), Alignment: Align,
3084 MMOFlags: MachineMemOperand::MOVolatile);
3085
3086 if (TLI.useStackGuardXorFP())
3087 GuardVal = TLI.emitStackGuardXorFP(DAG, Val: GuardVal, DL: dl);
3088
3089 // Retrieve guard check function, nullptr if instrumentation is inlined.
3090 if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
3091 // The target provides a guard check function to validate the guard value.
3092 // Generate a call to that function with the content of the guard slot as
3093 // argument.
3094 FunctionType *FnTy = GuardCheckFn->getFunctionType();
3095 assert(FnTy->getNumParams() == 1 && "Invalid function signature");
3096
3097 TargetLowering::ArgListTy Args;
3098 TargetLowering::ArgListEntry Entry;
3099 Entry.Node = GuardVal;
3100 Entry.Ty = FnTy->getParamType(i: 0);
3101 if (GuardCheckFn->hasParamAttribute(ArgNo: 0, Attribute::AttrKind::Kind: InReg))
3102 Entry.IsInReg = true;
3103 Args.push_back(x: Entry);
3104
3105 TargetLowering::CallLoweringInfo CLI(DAG);
3106 CLI.setDebugLoc(getCurSDLoc())
3107 .setChain(DAG.getEntryNode())
3108 .setCallee(CC: GuardCheckFn->getCallingConv(), ResultType: FnTy->getReturnType(),
3109 Target: getValue(V: GuardCheckFn), ArgsList: std::move(Args));
3110
3111 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
3112 DAG.setRoot(Result.second);
3113 return;
3114 }
3115
3116 // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
3117 // Otherwise, emit a volatile load to retrieve the stack guard value.
3118 SDValue Chain = DAG.getEntryNode();
3119 if (TLI.useLoadStackGuardNode()) {
3120 Guard = getLoadStackGuard(DAG, DL: dl, Chain);
3121 } else {
3122 const Value *IRGuard = TLI.getSDagStackGuard(M);
3123 SDValue GuardPtr = getValue(V: IRGuard);
3124
3125 Guard = DAG.getLoad(VT: PtrMemTy, dl, Chain, Ptr: GuardPtr,
3126 PtrInfo: MachinePointerInfo(IRGuard, 0), Alignment: Align,
3127 MMOFlags: MachineMemOperand::MOVolatile);
3128 }
3129
3130 // Perform the comparison via a getsetcc.
3131 SDValue Cmp = DAG.getSetCC(DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
3132 Context&: *DAG.getContext(),
3133 VT: Guard.getValueType()),
3134 LHS: Guard, RHS: GuardVal, Cond: ISD::SETNE);
3135
3136 // If the guard/stackslot do not equal, branch to failure MBB.
3137 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
3138 MVT::Other, GuardVal.getOperand(i: 0),
3139 Cmp, DAG.getBasicBlock(MBB: SPD.getFailureMBB()));
3140 // Otherwise branch to success MBB.
3141 SDValue Br = DAG.getNode(ISD::BR, dl,
3142 MVT::Other, BrCond,
3143 DAG.getBasicBlock(MBB: SPD.getSuccessMBB()));
3144
3145 DAG.setRoot(Br);
3146}
3147
3148/// Codegen the failure basic block for a stack protector check.
3149///
3150/// A failure stack protector machine basic block consists simply of a call to
3151/// __stack_chk_fail().
3152///
3153/// For a high level explanation of how this fits into the stack protector
3154/// generation see the comment on the declaration of class
3155/// StackProtectorDescriptor.
3156void
3157SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
3158 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3159 TargetLowering::MakeLibCallOptions CallOptions;
3160 CallOptions.setDiscardResult(true);
3161 SDValue Chain =
3162 TLI.makeLibCall(DAG, LC: RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::RetVT: isVoid,
3163 Ops: std::nullopt, CallOptions, dl: getCurSDLoc())
3164 .second;
3165 // On PS4/PS5, the "return address" must still be within the calling
3166 // function, even if it's at the very end, so emit an explicit TRAP here.
3167 // Passing 'true' for doesNotReturn above won't generate the trap for us.
3168 if (TM.getTargetTriple().isPS())
3169 Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
3170 // WebAssembly needs an unreachable instruction after a non-returning call,
3171 // because the function return type can be different from __stack_chk_fail's
3172 // return type (void).
3173 if (TM.getTargetTriple().isWasm())
3174 Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
3175
3176 DAG.setRoot(Chain);
3177}
3178
3179/// visitBitTestHeader - This function emits necessary code to produce value
3180/// suitable for "bit tests"
3181void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
3182 MachineBasicBlock *SwitchBB) {
3183 SDLoc dl = getCurSDLoc();
3184
3185 // Subtract the minimum value.
3186 SDValue SwitchOp = getValue(V: B.SValue);
3187 EVT VT = SwitchOp.getValueType();
3188 SDValue RangeSub =
3189 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: SwitchOp, N2: DAG.getConstant(Val: B.First, DL: dl, VT));
3190
3191 // Determine the type of the test operands.
3192 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3193 bool UsePtrType = false;
3194 if (!TLI.isTypeLegal(VT)) {
3195 UsePtrType = true;
3196 } else {
3197 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
3198 if (!isUIntN(N: VT.getSizeInBits(), x: B.Cases[i].Mask)) {
3199 // Switch table case range are encoded into series of masks.
3200 // Just use pointer type, it's guaranteed to fit.
3201 UsePtrType = true;
3202 break;
3203 }
3204 }
3205 SDValue Sub = RangeSub;
3206 if (UsePtrType) {
3207 VT = TLI.getPointerTy(DL: DAG.getDataLayout());
3208 Sub = DAG.getZExtOrTrunc(Op: Sub, DL: dl, VT);
3209 }
3210
3211 B.RegVT = VT.getSimpleVT();
3212 B.Reg = FuncInfo.CreateReg(VT: B.RegVT);
3213 SDValue CopyTo = DAG.getCopyToReg(Chain: getControlRoot(), dl, Reg: B.Reg, N: Sub);
3214
3215 MachineBasicBlock* MBB = B.Cases[0].ThisBB;
3216
3217 if (!B.FallthroughUnreachable)
3218 addSuccessorWithProb(Src: SwitchBB, Dst: B.Default, Prob: B.DefaultProb);
3219 addSuccessorWithProb(Src: SwitchBB, Dst: MBB, Prob: B.Prob);
3220 SwitchBB->normalizeSuccProbs();
3221
3222 SDValue Root = CopyTo;
3223 if (!B.FallthroughUnreachable) {
3224 // Conditional branch to the default block.
3225 SDValue RangeCmp = DAG.getSetCC(DL: dl,
3226 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
3227 VT: RangeSub.getValueType()),
3228 LHS: RangeSub, RHS: DAG.getConstant(Val: B.Range, DL: dl, VT: RangeSub.getValueType()),
3229 Cond: ISD::SETUGT);
3230
3231 Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
3232 DAG.getBasicBlock(B.Default));
3233 }
3234
3235 // Avoid emitting unnecessary branches to the next block.
3236 if (MBB != NextBlock(SwitchBB))
3237 Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
3238
3239 DAG.setRoot(Root);
3240}
3241
3242/// visitBitTestCase - this function produces one "bit test"
3243void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
3244 MachineBasicBlock* NextMBB,
3245 BranchProbability BranchProbToNext,
3246 unsigned Reg,
3247 BitTestCase &B,
3248 MachineBasicBlock *SwitchBB) {
3249 SDLoc dl = getCurSDLoc();
3250 MVT VT = BB.RegVT;
3251 SDValue ShiftOp = DAG.getCopyFromReg(Chain: getControlRoot(), dl, Reg, VT);
3252 SDValue Cmp;
3253 unsigned PopCount = llvm::popcount(Value: B.Mask);
3254 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3255 if (PopCount == 1) {
3256 // Testing for a single bit; just compare the shift count with what it
3257 // would need to be to shift a 1 bit in that position.
3258 Cmp = DAG.getSetCC(
3259 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT),
3260 LHS: ShiftOp, RHS: DAG.getConstant(Val: llvm::countr_zero(Val: B.Mask), DL: dl, VT),
3261 Cond: ISD::SETEQ);
3262 } else if (PopCount == BB.Range) {
3263 // There is only one zero bit in the range, test for it directly.
3264 Cmp = DAG.getSetCC(
3265 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT),
3266 LHS: ShiftOp, RHS: DAG.getConstant(Val: llvm::countr_one(Value: B.Mask), DL: dl, VT), Cond: ISD::SETNE);
3267 } else {
3268 // Make desired shift
3269 SDValue SwitchVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT,
3270 N1: DAG.getConstant(Val: 1, DL: dl, VT), N2: ShiftOp);
3271
3272 // Emit bit tests and jumps
3273 SDValue AndOp = DAG.getNode(Opcode: ISD::AND, DL: dl,
3274 VT, N1: SwitchVal, N2: DAG.getConstant(Val: B.Mask, DL: dl, VT));
3275 Cmp = DAG.getSetCC(
3276 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT),
3277 LHS: AndOp, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
3278 }
3279
3280 // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
3281 addSuccessorWithProb(Src: SwitchBB, Dst: B.TargetBB, Prob: B.ExtraProb);
3282 // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
3283 addSuccessorWithProb(Src: SwitchBB, Dst: NextMBB, Prob: BranchProbToNext);
3284 // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
3285 // one as they are relative probabilities (and thus work more like weights),
3286 // and hence we need to normalize them to let the sum of them become one.
3287 SwitchBB->normalizeSuccProbs();
3288
3289 SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
3290 MVT::Other, getControlRoot(),
3291 Cmp, DAG.getBasicBlock(B.TargetBB));
3292
3293 // Avoid emitting unnecessary branches to the next block.
3294 if (NextMBB != NextBlock(SwitchBB))
3295 BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
3296 DAG.getBasicBlock(NextMBB));
3297
3298 DAG.setRoot(BrAnd);
3299}
3300
3301void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
3302 MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
3303
3304 // Retrieve successors. Look through artificial IR level blocks like
3305 // catchswitch for successors.
3306 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(i: 0)];
3307 const BasicBlock *EHPadBB = I.getSuccessor(i: 1);
3308 MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
3309
3310 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3311 // have to do anything here to lower funclet bundles.
3312 assert(!I.hasOperandBundlesOtherThan(
3313 {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
3314 LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
3315 LLVMContext::OB_cfguardtarget,
3316 LLVMContext::OB_clang_arc_attachedcall}) &&
3317 "Cannot lower invokes with arbitrary operand bundles yet!");
3318
3319 const Value *Callee(I.getCalledOperand());
3320 const Function *Fn = dyn_cast<Function>(Val: Callee);
3321 if (isa<InlineAsm>(Val: Callee))
3322 visitInlineAsm(Call: I, EHPadBB);
3323 else if (Fn && Fn->isIntrinsic()) {
3324 switch (Fn->getIntrinsicID()) {
3325 default:
3326 llvm_unreachable("Cannot invoke this intrinsic");
3327 case Intrinsic::donothing:
3328 // Ignore invokes to @llvm.donothing: jump directly to the next BB.
3329 case Intrinsic::seh_try_begin:
3330 case Intrinsic::seh_scope_begin:
3331 case Intrinsic::seh_try_end:
3332 case Intrinsic::seh_scope_end:
3333 if (EHPadMBB)
3334 // a block referenced by EH table
3335 // so dtor-funclet not removed by opts
3336 EHPadMBB->setMachineBlockAddressTaken();
3337 break;
3338 case Intrinsic::experimental_patchpoint_void:
3339 case Intrinsic::experimental_patchpoint:
3340 visitPatchpoint(CB: I, EHPadBB);
3341 break;
3342 case Intrinsic::experimental_gc_statepoint:
3343 LowerStatepoint(I: cast<GCStatepointInst>(Val: I), EHPadBB);
3344 break;
3345 case Intrinsic::wasm_rethrow: {
3346 // This is usually done in visitTargetIntrinsic, but this intrinsic is
3347 // special because it can be invoked, so we manually lower it to a DAG
3348 // node here.
3349 SmallVector<SDValue, 8> Ops;
3350 Ops.push_back(Elt: getRoot()); // inchain
3351 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3352 Ops.push_back(
3353 DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
3354 TLI.getPointerTy(DAG.getDataLayout())));
3355 SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
3356 DAG.setRoot(DAG.getNode(Opcode: ISD::INTRINSIC_VOID, DL: getCurSDLoc(), VTList: VTs, Ops));
3357 break;
3358 }
3359 }
3360 } else if (I.countOperandBundlesOfType(ID: LLVMContext::OB_deopt)) {
3361 // Currently we do not lower any intrinsic calls with deopt operand bundles.
3362 // Eventually we will support lowering the @llvm.experimental.deoptimize
3363 // intrinsic, and right now there are no plans to support other intrinsics
3364 // with deopt state.
3365 LowerCallSiteWithDeoptBundle(Call: &I, Callee: getValue(V: Callee), EHPadBB);
3366 } else {
3367 LowerCallTo(CB: I, Callee: getValue(V: Callee), IsTailCall: false, IsMustTailCall: false, EHPadBB);
3368 }
3369
3370 // If the value of the invoke is used outside of its defining block, make it
3371 // available as a virtual register.
3372 // We already took care of the exported value for the statepoint instruction
3373 // during call to the LowerStatepoint.
3374 if (!isa<GCStatepointInst>(Val: I)) {
3375 CopyToExportRegsIfNeeded(V: &I);
3376 }
3377
3378 SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
3379 BranchProbabilityInfo *BPI = FuncInfo.BPI;
3380 BranchProbability EHPadBBProb =
3381 BPI ? BPI->getEdgeProbability(Src: InvokeMBB->getBasicBlock(), Dst: EHPadBB)
3382 : BranchProbability::getZero();
3383 findUnwindDestinations(FuncInfo, EHPadBB, Prob: EHPadBBProb, UnwindDests);
3384
3385 // Update successor info.
3386 addSuccessorWithProb(Src: InvokeMBB, Dst: Return);
3387 for (auto &UnwindDest : UnwindDests) {
3388 UnwindDest.first->setIsEHPad();
3389 addSuccessorWithProb(Src: InvokeMBB, Dst: UnwindDest.first, Prob: UnwindDest.second);
3390 }
3391 InvokeMBB->normalizeSuccProbs();
3392
3393 // Drop into normal successor.
3394 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
3395 DAG.getBasicBlock(Return)));
3396}
3397
3398void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
3399 MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
3400
3401 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3402 // have to do anything here to lower funclet bundles.
3403 assert(!I.hasOperandBundlesOtherThan(
3404 {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
3405 "Cannot lower callbrs with arbitrary operand bundles yet!");
3406
3407 assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
3408 visitInlineAsm(Call: I);
3409 CopyToExportRegsIfNeeded(V: &I);
3410
3411 // Retrieve successors.
3412 SmallPtrSet<BasicBlock *, 8> Dests;
3413 Dests.insert(Ptr: I.getDefaultDest());
3414 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
3415
3416 // Update successor info.
3417 addSuccessorWithProb(Src: CallBrMBB, Dst: Return, Prob: BranchProbability::getOne());
3418 for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
3419 BasicBlock *Dest = I.getIndirectDest(i);
3420 MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
3421 Target->setIsInlineAsmBrIndirectTarget();
3422 Target->setMachineBlockAddressTaken();
3423 Target->setLabelMustBeEmitted();
3424 // Don't add duplicate machine successors.
3425 if (Dests.insert(Ptr: Dest).second)
3426 addSuccessorWithProb(Src: CallBrMBB, Dst: Target, Prob: BranchProbability::getZero());
3427 }
3428 CallBrMBB->normalizeSuccProbs();
3429
3430 // Drop into default successor.
3431 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
3432 MVT::Other, getControlRoot(),
3433 DAG.getBasicBlock(Return)));
3434}
3435
3436void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
3437 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
3438}
3439
3440void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
3441 assert(FuncInfo.MBB->isEHPad() &&
3442 "Call to landingpad not in landing pad!");
3443
3444 // If there aren't registers to copy the values into (e.g., during SjLj
3445 // exceptions), then don't bother to create these DAG nodes.
3446 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3447 const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
3448 if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
3449 TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
3450 return;
3451
3452 // If landingpad's return type is token type, we don't create DAG nodes
3453 // for its exception pointer and selector value. The extraction of exception
3454 // pointer or selector value from token type landingpads is not currently
3455 // supported.
3456 if (LP.getType()->isTokenTy())
3457 return;
3458
3459 SmallVector<EVT, 2> ValueVTs;
3460 SDLoc dl = getCurSDLoc();
3461 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: LP.getType(), ValueVTs);
3462 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
3463
3464 // Get the two live-in registers as SDValues. The physregs have already been
3465 // copied into virtual registers.
3466 SDValue Ops[2];
3467 if (FuncInfo.ExceptionPointerVirtReg) {
3468 Ops[0] = DAG.getZExtOrTrunc(
3469 Op: DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl,
3470 Reg: FuncInfo.ExceptionPointerVirtReg,
3471 VT: TLI.getPointerTy(DL: DAG.getDataLayout())),
3472 DL: dl, VT: ValueVTs[0]);
3473 } else {
3474 Ops[0] = DAG.getConstant(Val: 0, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
3475 }
3476 Ops[1] = DAG.getZExtOrTrunc(
3477 Op: DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl,
3478 Reg: FuncInfo.ExceptionSelectorVirtReg,
3479 VT: TLI.getPointerTy(DL: DAG.getDataLayout())),
3480 DL: dl, VT: ValueVTs[1]);
3481
3482 // Merge into one.
3483 SDValue Res = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl,
3484 VTList: DAG.getVTList(VTs: ValueVTs), Ops);
3485 setValue(V: &LP, NewN: Res);
3486}
3487
3488void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
3489 MachineBasicBlock *Last) {
3490 // Update JTCases.
3491 for (JumpTableBlock &JTB : SL->JTCases)
3492 if (JTB.first.HeaderBB == First)
3493 JTB.first.HeaderBB = Last;
3494
3495 // Update BitTestCases.
3496 for (BitTestBlock &BTB : SL->BitTestCases)
3497 if (BTB.Parent == First)
3498 BTB.Parent = Last;
3499}
3500
3501void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
3502 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
3503
3504 // Update machine-CFG edges with unique successors.
3505 SmallSet<BasicBlock*, 32> Done;
3506 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
3507 BasicBlock *BB = I.getSuccessor(i);
3508 bool Inserted = Done.insert(Ptr: BB).second;
3509 if (!Inserted)
3510 continue;
3511
3512 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
3513 addSuccessorWithProb(Src: IndirectBrMBB, Dst: Succ);
3514 }
3515 IndirectBrMBB->normalizeSuccProbs();
3516
3517 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
3518 MVT::Other, getControlRoot(),
3519 getValue(I.getAddress())));
3520}
3521
3522void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
3523 if (!DAG.getTarget().Options.TrapUnreachable)
3524 return;
3525
3526 // We may be able to ignore unreachable behind a noreturn call.
3527 if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
3528 if (const CallInst *Call = dyn_cast_or_null<CallInst>(Val: I.getPrevNode())) {
3529 if (Call->doesNotReturn())
3530 return;
3531 }
3532 }
3533
3534 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
3535}
3536
3537void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
3538 SDNodeFlags Flags;
3539 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
3540 Flags.copyFMF(FPMO: *FPOp);
3541
3542 SDValue Op = getValue(V: I.getOperand(i: 0));
3543 SDValue UnNodeValue = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op.getValueType(),
3544 Operand: Op, Flags);
3545 setValue(V: &I, NewN: UnNodeValue);
3546}
3547
3548void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
3549 SDNodeFlags Flags;
3550 if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
3551 Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
3552 Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
3553 }
3554 if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(Val: &I))
3555 Flags.setExact(ExactOp->isExact());
3556 if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(Val: &I))
3557 Flags.setDisjoint(DisjointOp->isDisjoint());
3558 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
3559 Flags.copyFMF(FPMO: *FPOp);
3560
3561 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3562 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3563 SDValue BinNodeValue = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op1.getValueType(),
3564 N1: Op1, N2: Op2, Flags);
3565 setValue(V: &I, NewN: BinNodeValue);
3566}
3567
3568void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
3569 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3570 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3571
3572 EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
3573 LHSTy: Op1.getValueType(), DL: DAG.getDataLayout());
3574
3575 // Coerce the shift amount to the right type if we can. This exposes the
3576 // truncate or zext to optimization early.
3577 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
3578 assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
3579 "Unexpected shift type");
3580 Op2 = DAG.getZExtOrTrunc(Op: Op2, DL: getCurSDLoc(), VT: ShiftTy);
3581 }
3582
3583 bool nuw = false;
3584 bool nsw = false;
3585 bool exact = false;
3586
3587 if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
3588
3589 if (const OverflowingBinaryOperator *OFBinOp =
3590 dyn_cast<const OverflowingBinaryOperator>(Val: &I)) {
3591 nuw = OFBinOp->hasNoUnsignedWrap();
3592 nsw = OFBinOp->hasNoSignedWrap();
3593 }
3594 if (const PossiblyExactOperator *ExactOp =
3595 dyn_cast<const PossiblyExactOperator>(Val: &I))
3596 exact = ExactOp->isExact();
3597 }
3598 SDNodeFlags Flags;
3599 Flags.setExact(exact);
3600 Flags.setNoSignedWrap(nsw);
3601 Flags.setNoUnsignedWrap(nuw);
3602 SDValue Res = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op1.getValueType(), N1: Op1, N2: Op2,
3603 Flags);
3604 setValue(V: &I, NewN: Res);
3605}
3606
3607void SelectionDAGBuilder::visitSDiv(const User &I) {
3608 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3609 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3610
3611 SDNodeFlags Flags;
3612 Flags.setExact(isa<PossiblyExactOperator>(Val: &I) &&
3613 cast<PossiblyExactOperator>(Val: &I)->isExact());
3614 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SDIV, DL: getCurSDLoc(), VT: Op1.getValueType(), N1: Op1,
3615 N2: Op2, Flags));
3616}
3617
3618void SelectionDAGBuilder::visitICmp(const User &I) {
3619 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
3620 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Val: &I))
3621 predicate = IC->getPredicate();
3622 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(Val: &I))
3623 predicate = ICmpInst::Predicate(IC->getPredicate());
3624 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3625 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3626 ISD::CondCode Opcode = getICmpCondCode(Pred: predicate);
3627
3628 auto &TLI = DAG.getTargetLoweringInfo();
3629 EVT MemVT =
3630 TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i: 0)->getType());
3631
3632 // If a pointer's DAG type is larger than its memory type then the DAG values
3633 // are zero-extended. This breaks signed comparisons so truncate back to the
3634 // underlying type before doing the compare.
3635 if (Op1.getValueType() != MemVT) {
3636 Op1 = DAG.getPtrExtOrTrunc(Op: Op1, DL: getCurSDLoc(), VT: MemVT);
3637 Op2 = DAG.getPtrExtOrTrunc(Op: Op2, DL: getCurSDLoc(), VT: MemVT);
3638 }
3639
3640 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3641 Ty: I.getType());
3642 setValue(V: &I, NewN: DAG.getSetCC(DL: getCurSDLoc(), VT: DestVT, LHS: Op1, RHS: Op2, Cond: Opcode));
3643}
3644
3645void SelectionDAGBuilder::visitFCmp(const User &I) {
3646 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
3647 if (const FCmpInst *FC = dyn_cast<FCmpInst>(Val: &I))
3648 predicate = FC->getPredicate();
3649 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(Val: &I))
3650 predicate = FCmpInst::Predicate(FC->getPredicate());
3651 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3652 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3653
3654 ISD::CondCode Condition = getFCmpCondCode(Pred: predicate);
3655 auto *FPMO = cast<FPMathOperator>(Val: &I);
3656 if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
3657 Condition = getFCmpCodeWithoutNaN(CC: Condition);
3658
3659 SDNodeFlags Flags;
3660 Flags.copyFMF(FPMO: *FPMO);
3661 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
3662
3663 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3664 Ty: I.getType());
3665 setValue(V: &I, NewN: DAG.getSetCC(DL: getCurSDLoc(), VT: DestVT, LHS: Op1, RHS: Op2, Cond: Condition));
3666}
3667
3668// Check if the condition of the select has one use or two users that are both
3669// selects with the same condition.
3670static bool hasOnlySelectUsers(const Value *Cond) {
3671 return llvm::all_of(Range: Cond->users(), P: [](const Value *V) {
3672 return isa<SelectInst>(Val: V);
3673 });
3674}
3675
3676void SelectionDAGBuilder::visitSelect(const User &I) {
3677 SmallVector<EVT, 4> ValueVTs;
3678 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty: I.getType(),
3679 ValueVTs);
3680 unsigned NumValues = ValueVTs.size();
3681 if (NumValues == 0) return;
3682
3683 SmallVector<SDValue, 4> Values(NumValues);
3684 SDValue Cond = getValue(V: I.getOperand(i: 0));
3685 SDValue LHSVal = getValue(V: I.getOperand(i: 1));
3686 SDValue RHSVal = getValue(V: I.getOperand(i: 2));
3687 SmallVector<SDValue, 1> BaseOps(1, Cond);
3688 ISD::NodeType OpCode =
3689 Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
3690
3691 bool IsUnaryAbs = false;
3692 bool Negate = false;
3693
3694 SDNodeFlags Flags;
3695 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
3696 Flags.copyFMF(FPMO: *FPOp);
3697
3698 Flags.setUnpredictable(
3699 cast<SelectInst>(Val: I).getMetadata(KindID: LLVMContext::MD_unpredictable));
3700
3701 // Min/max matching is only viable if all output VTs are the same.
3702 if (all_equal(Range&: ValueVTs)) {
3703 EVT VT = ValueVTs[0];
3704 LLVMContext &Ctx = *DAG.getContext();
3705 auto &TLI = DAG.getTargetLoweringInfo();
3706
3707 // We care about the legality of the operation after it has been type
3708 // legalized.
3709 while (TLI.getTypeAction(Context&: Ctx, VT) != TargetLoweringBase::TypeLegal)
3710 VT = TLI.getTypeToTransformTo(Context&: Ctx, VT);
3711
3712 // If the vselect is legal, assume we want to leave this as a vector setcc +
3713 // vselect. Otherwise, if this is going to be scalarized, we want to see if
3714 // min/max is legal on the scalar type.
3715 bool UseScalarMinMax = VT.isVector() &&
3716 !TLI.isOperationLegalOrCustom(Op: ISD::VSELECT, VT);
3717
3718 // ValueTracking's select pattern matching does not account for -0.0,
3719 // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
3720 // -0.0 is less than +0.0.
3721 Value *LHS, *RHS;
3722 auto SPR = matchSelectPattern(V: const_cast<User*>(&I), LHS, RHS);
3723 ISD::NodeType Opc = ISD::DELETED_NODE;
3724 switch (SPR.Flavor) {
3725 case SPF_UMAX: Opc = ISD::UMAX; break;
3726 case SPF_UMIN: Opc = ISD::UMIN; break;
3727 case SPF_SMAX: Opc = ISD::SMAX; break;
3728 case SPF_SMIN: Opc = ISD::SMIN; break;
3729 case SPF_FMINNUM:
3730 switch (SPR.NaNBehavior) {
3731 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3732 case SPNB_RETURNS_NAN: break;
3733 case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
3734 case SPNB_RETURNS_ANY:
3735 if (TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT) ||
3736 (UseScalarMinMax &&
3737 TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT: VT.getScalarType())))
3738 Opc = ISD::FMINNUM;
3739 break;
3740 }
3741 break;
3742 case SPF_FMAXNUM:
3743 switch (SPR.NaNBehavior) {
3744 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3745 case SPNB_RETURNS_NAN: break;
3746 case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
3747 case SPNB_RETURNS_ANY:
3748 if (TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT) ||
3749 (UseScalarMinMax &&
3750 TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT: VT.getScalarType())))
3751 Opc = ISD::FMAXNUM;
3752 break;
3753 }
3754 break;
3755 case SPF_NABS:
3756 Negate = true;
3757 [[fallthrough]];
3758 case SPF_ABS:
3759 IsUnaryAbs = true;
3760 Opc = ISD::ABS;
3761 break;
3762 default: break;
3763 }
3764
3765 if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
3766 (TLI.isOperationLegalOrCustomOrPromote(Op: Opc, VT) ||
3767 (UseScalarMinMax &&
3768 TLI.isOperationLegalOrCustom(Op: Opc, VT: VT.getScalarType()))) &&
3769 // If the underlying comparison instruction is used by any other
3770 // instruction, the consumed instructions won't be destroyed, so it is
3771 // not profitable to convert to a min/max.
3772 hasOnlySelectUsers(Cond: cast<SelectInst>(Val: I).getCondition())) {
3773 OpCode = Opc;
3774 LHSVal = getValue(V: LHS);
3775 RHSVal = getValue(V: RHS);
3776 BaseOps.clear();
3777 }
3778
3779 if (IsUnaryAbs) {
3780 OpCode = Opc;
3781 LHSVal = getValue(V: LHS);
3782 BaseOps.clear();
3783 }
3784 }
3785
3786 if (IsUnaryAbs) {
3787 for (unsigned i = 0; i != NumValues; ++i) {
3788 SDLoc dl = getCurSDLoc();
3789 EVT VT = LHSVal.getNode()->getValueType(ResNo: LHSVal.getResNo() + i);
3790 Values[i] =
3791 DAG.getNode(Opcode: OpCode, DL: dl, VT, Operand: LHSVal.getValue(R: LHSVal.getResNo() + i));
3792 if (Negate)
3793 Values[i] = DAG.getNegative(Val: Values[i], DL: dl, VT);
3794 }
3795 } else {
3796 for (unsigned i = 0; i != NumValues; ++i) {
3797 SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
3798 Ops.push_back(Elt: SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3799 Ops.push_back(Elt: SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
3800 Values[i] = DAG.getNode(
3801 Opcode: OpCode, DL: getCurSDLoc(),
3802 VT: LHSVal.getNode()->getValueType(ResNo: LHSVal.getResNo() + i), Ops, Flags);
3803 }
3804 }
3805
3806 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
3807 VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values));
3808}
3809
3810void SelectionDAGBuilder::visitTrunc(const User &I) {
3811 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3812 SDValue N = getValue(V: I.getOperand(i: 0));
3813 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3814 Ty: I.getType());
3815 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::TRUNCATE, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3816}
3817
3818void SelectionDAGBuilder::visitZExt(const User &I) {
3819 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3820 // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3821 SDValue N = getValue(V: I.getOperand(i: 0));
3822 auto &TLI = DAG.getTargetLoweringInfo();
3823 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3824
3825 SDNodeFlags Flags;
3826 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: &I))
3827 Flags.setNonNeg(PNI->hasNonNeg());
3828
3829 // Eagerly use nonneg information to canonicalize towards sign_extend if
3830 // that is the target's preference.
3831 // TODO: Let the target do this later.
3832 if (Flags.hasNonNeg() &&
3833 TLI.isSExtCheaperThanZExt(FromTy: N.getValueType(), ToTy: DestVT)) {
3834 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3835 return;
3836 }
3837
3838 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N, Flags));
3839}
3840
3841void SelectionDAGBuilder::visitSExt(const User &I) {
3842 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3843 // SExt also can't be a cast to bool for same reason. So, nothing much to do
3844 SDValue N = getValue(V: I.getOperand(i: 0));
3845 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3846 Ty: I.getType());
3847 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3848}
3849
3850void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3851 // FPTrunc is never a no-op cast, no need to check
3852 SDValue N = getValue(V: I.getOperand(i: 0));
3853 SDLoc dl = getCurSDLoc();
3854 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3855 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3856 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: DestVT, N1: N,
3857 N2: DAG.getTargetConstant(
3858 Val: 0, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()))));
3859}
3860
3861void SelectionDAGBuilder::visitFPExt(const User &I) {
3862 // FPExt is never a no-op cast, no need to check
3863 SDValue N = getValue(V: I.getOperand(i: 0));
3864 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3865 Ty: I.getType());
3866 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3867}
3868
3869void SelectionDAGBuilder::visitFPToUI(const User &I) {
3870 // FPToUI is never a no-op cast, no need to check
3871 SDValue N = getValue(V: I.getOperand(i: 0));
3872 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3873 Ty: I.getType());
3874 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_UINT, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3875}
3876
3877void SelectionDAGBuilder::visitFPToSI(const User &I) {
3878 // FPToSI is never a no-op cast, no need to check
3879 SDValue N = getValue(V: I.getOperand(i: 0));
3880 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3881 Ty: I.getType());
3882 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3883}
3884
3885void SelectionDAGBuilder::visitUIToFP(const User &I) {
3886 // UIToFP is never a no-op cast, no need to check
3887 SDValue N = getValue(V: I.getOperand(i: 0));
3888 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3889 Ty: I.getType());
3890 SDNodeFlags Flags;
3891 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: &I))
3892 Flags.setNonNeg(PNI->hasNonNeg());
3893
3894 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UINT_TO_FP, DL: getCurSDLoc(), VT: DestVT, Operand: N, Flags));
3895}
3896
3897void SelectionDAGBuilder::visitSIToFP(const User &I) {
3898 // SIToFP is never a no-op cast, no need to check
3899 SDValue N = getValue(V: I.getOperand(i: 0));
3900 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3901 Ty: I.getType());
3902 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3903}
3904
3905void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3906 // What to do depends on the size of the integer and the size of the pointer.
3907 // We can either truncate, zero extend, or no-op, accordingly.
3908 SDValue N = getValue(V: I.getOperand(i: 0));
3909 auto &TLI = DAG.getTargetLoweringInfo();
3910 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3911 Ty: I.getType());
3912 EVT PtrMemVT =
3913 TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i: 0)->getType());
3914 N = DAG.getPtrExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: PtrMemVT);
3915 N = DAG.getZExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: DestVT);
3916 setValue(V: &I, NewN: N);
3917}
3918
3919void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3920 // What to do depends on the size of the integer and the size of the pointer.
3921 // We can either truncate, zero extend, or no-op, accordingly.
3922 SDValue N = getValue(V: I.getOperand(i: 0));
3923 auto &TLI = DAG.getTargetLoweringInfo();
3924 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3925 EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3926 N = DAG.getZExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: PtrMemVT);
3927 N = DAG.getPtrExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: DestVT);
3928 setValue(V: &I, NewN: N);
3929}
3930
3931void SelectionDAGBuilder::visitBitCast(const User &I) {
3932 SDValue N = getValue(V: I.getOperand(i: 0));
3933 SDLoc dl = getCurSDLoc();
3934 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3935 Ty: I.getType());
3936
3937 // BitCast assures us that source and destination are the same size so this is
3938 // either a BITCAST or a no-op.
3939 if (DestVT != N.getValueType())
3940 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITCAST, DL: dl,
3941 VT: DestVT, Operand: N)); // convert types.
3942 // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3943 // might fold any kind of constant expression to an integer constant and that
3944 // is not what we are looking for. Only recognize a bitcast of a genuine
3945 // constant integer as an opaque constant.
3946 else if(ConstantInt *C = dyn_cast<ConstantInt>(Val: I.getOperand(i: 0)))
3947 setValue(V: &I, NewN: DAG.getConstant(Val: C->getValue(), DL: dl, VT: DestVT, /*isTarget=*/false,
3948 /*isOpaque*/true));
3949 else
3950 setValue(V: &I, NewN: N); // noop cast.
3951}
3952
3953void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3954 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3955 const Value *SV = I.getOperand(i: 0);
3956 SDValue N = getValue(V: SV);
3957 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3958
3959 unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3960 unsigned DestAS = I.getType()->getPointerAddressSpace();
3961
3962 if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
3963 N = DAG.getAddrSpaceCast(dl: getCurSDLoc(), VT: DestVT, Ptr: N, SrcAS, DestAS);
3964
3965 setValue(V: &I, NewN: N);
3966}
3967
3968void SelectionDAGBuilder::visitInsertElement(const User &I) {
3969 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3970 SDValue InVec = getValue(V: I.getOperand(i: 0));
3971 SDValue InVal = getValue(V: I.getOperand(i: 1));
3972 SDValue InIdx = DAG.getZExtOrTrunc(Op: getValue(V: I.getOperand(i: 2)), DL: getCurSDLoc(),
3973 VT: TLI.getVectorIdxTy(DL: DAG.getDataLayout()));
3974 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: getCurSDLoc(),
3975 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()),
3976 N1: InVec, N2: InVal, N3: InIdx));
3977}
3978
3979void SelectionDAGBuilder::visitExtractElement(const User &I) {
3980 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3981 SDValue InVec = getValue(V: I.getOperand(i: 0));
3982 SDValue InIdx = DAG.getZExtOrTrunc(Op: getValue(V: I.getOperand(i: 1)), DL: getCurSDLoc(),
3983 VT: TLI.getVectorIdxTy(DL: DAG.getDataLayout()));
3984 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: getCurSDLoc(),
3985 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()),
3986 N1: InVec, N2: InIdx));
3987}
3988
3989void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3990 SDValue Src1 = getValue(V: I.getOperand(i: 0));
3991 SDValue Src2 = getValue(V: I.getOperand(i: 1));
3992 ArrayRef<int> Mask;
3993 if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: &I))
3994 Mask = SVI->getShuffleMask();
3995 else
3996 Mask = cast<ConstantExpr>(Val: I).getShuffleMask();
3997 SDLoc DL = getCurSDLoc();
3998 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3999 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
4000 EVT SrcVT = Src1.getValueType();
4001
4002 if (all_of(Range&: Mask, P: [](int Elem) { return Elem == 0; }) &&
4003 VT.isScalableVector()) {
4004 // Canonical splat form of first element of first input vector.
4005 SDValue FirstElt =
4006 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: SrcVT.getScalarType(), N1: Src1,
4007 N2: DAG.getVectorIdxConstant(Val: 0, DL));
4008 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: FirstElt));
4009 return;
4010 }
4011
4012 // For now, we only handle splats for scalable vectors.
4013 // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
4014 // for targets that support a SPLAT_VECTOR for non-scalable vector types.
4015 assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
4016
4017 unsigned SrcNumElts = SrcVT.getVectorNumElements();
4018 unsigned MaskNumElts = Mask.size();
4019
4020 if (SrcNumElts == MaskNumElts) {
4021 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: Src1, N2: Src2, Mask));
4022 return;
4023 }
4024
4025 // Normalize the shuffle vector since mask and vector length don't match.
4026 if (SrcNumElts < MaskNumElts) {
4027 // Mask is longer than the source vectors. We can use concatenate vector to
4028 // make the mask and vectors lengths match.
4029
4030 if (MaskNumElts % SrcNumElts == 0) {
4031 // Mask length is a multiple of the source vector length.
4032 // Check if the shuffle is some kind of concatenation of the input
4033 // vectors.
4034 unsigned NumConcat = MaskNumElts / SrcNumElts;
4035 bool IsConcat = true;
4036 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
4037 for (unsigned i = 0; i != MaskNumElts; ++i) {
4038 int Idx = Mask[i];
4039 if (Idx < 0)
4040 continue;
4041 // Ensure the indices in each SrcVT sized piece are sequential and that
4042 // the same source is used for the whole piece.
4043 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
4044 (ConcatSrcs[i / SrcNumElts] >= 0 &&
4045 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
4046 IsConcat = false;
4047 break;
4048 }
4049 // Remember which source this index came from.
4050 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
4051 }
4052
4053 // The shuffle is concatenating multiple vectors together. Just emit
4054 // a CONCAT_VECTORS operation.
4055 if (IsConcat) {
4056 SmallVector<SDValue, 8> ConcatOps;
4057 for (auto Src : ConcatSrcs) {
4058 if (Src < 0)
4059 ConcatOps.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
4060 else if (Src == 0)
4061 ConcatOps.push_back(Elt: Src1);
4062 else
4063 ConcatOps.push_back(Elt: Src2);
4064 }
4065 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps));
4066 return;
4067 }
4068 }
4069
4070 unsigned PaddedMaskNumElts = alignTo(Value: MaskNumElts, Align: SrcNumElts);
4071 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
4072 EVT PaddedVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getScalarType(),
4073 NumElements: PaddedMaskNumElts);
4074
4075 // Pad both vectors with undefs to make them the same length as the mask.
4076 SDValue UndefVal = DAG.getUNDEF(VT: SrcVT);
4077
4078 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
4079 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
4080 MOps1[0] = Src1;
4081 MOps2[0] = Src2;
4082
4083 Src1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PaddedVT, Ops: MOps1);
4084 Src2 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PaddedVT, Ops: MOps2);
4085
4086 // Readjust mask for new input vector length.
4087 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
4088 for (unsigned i = 0; i != MaskNumElts; ++i) {
4089 int Idx = Mask[i];
4090 if (Idx >= (int)SrcNumElts)
4091 Idx -= SrcNumElts - PaddedMaskNumElts;
4092 MappedOps[i] = Idx;
4093 }
4094
4095 SDValue Result = DAG.getVectorShuffle(VT: PaddedVT, dl: DL, N1: Src1, N2: Src2, Mask: MappedOps);
4096
4097 // If the concatenated vector was padded, extract a subvector with the
4098 // correct number of elements.
4099 if (MaskNumElts != PaddedMaskNumElts)
4100 Result = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Result,
4101 N2: DAG.getVectorIdxConstant(Val: 0, DL));
4102
4103 setValue(V: &I, NewN: Result);
4104 return;
4105 }
4106
4107 if (SrcNumElts > MaskNumElts) {
4108 // Analyze the access pattern of the vector to see if we can extract
4109 // two subvectors and do the shuffle.
4110 int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
4111 bool CanExtract = true;
4112 for (int Idx : Mask) {
4113 unsigned Input = 0;
4114 if (Idx < 0)
4115 continue;
4116
4117 if (Idx >= (int)SrcNumElts) {
4118 Input = 1;
4119 Idx -= SrcNumElts;
4120 }
4121
4122 // If all the indices come from the same MaskNumElts sized portion of
4123 // the sources we can use extract. Also make sure the extract wouldn't
4124 // extract past the end of the source.
4125 int NewStartIdx = alignDown(Value: Idx, Align: MaskNumElts);
4126 if (NewStartIdx + MaskNumElts > SrcNumElts ||
4127 (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
4128 CanExtract = false;
4129 // Make sure we always update StartIdx as we use it to track if all
4130 // elements are undef.
4131 StartIdx[Input] = NewStartIdx;
4132 }
4133
4134 if (StartIdx[0] < 0 && StartIdx[1] < 0) {
4135 setValue(V: &I, NewN: DAG.getUNDEF(VT)); // Vectors are not used.
4136 return;
4137 }
4138 if (CanExtract) {
4139 // Extract appropriate subvector and generate a vector shuffle
4140 for (unsigned Input = 0; Input < 2; ++Input) {
4141 SDValue &Src = Input == 0 ? Src1 : Src2;
4142 if (StartIdx[Input] < 0)
4143 Src = DAG.getUNDEF(VT);
4144 else {
4145 Src = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Src,
4146 N2: DAG.getVectorIdxConstant(Val: StartIdx[Input], DL));
4147 }
4148 }
4149
4150 // Calculate new mask.
4151 SmallVector<int, 8> MappedOps(Mask);
4152 for (int &Idx : MappedOps) {
4153 if (Idx >= (int)SrcNumElts)
4154 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
4155 else if (Idx >= 0)
4156 Idx -= StartIdx[0];
4157 }
4158
4159 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: Src1, N2: Src2, Mask: MappedOps));
4160 return;
4161 }
4162 }
4163
4164 // We can't use either concat vectors or extract subvectors so fall back to
4165 // replacing the shuffle with extract and build vector.
4166 // to insert and build vector.
4167 EVT EltVT = VT.getVectorElementType();
4168 SmallVector<SDValue,8> Ops;
4169 for (int Idx : Mask) {
4170 SDValue Res;
4171
4172 if (Idx < 0) {
4173 Res = DAG.getUNDEF(VT: EltVT);
4174 } else {
4175 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
4176 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
4177
4178 Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Src,
4179 N2: DAG.getVectorIdxConstant(Val: Idx, DL));
4180 }
4181
4182 Ops.push_back(Elt: Res);
4183 }
4184
4185 setValue(V: &I, NewN: DAG.getBuildVector(VT, DL, Ops));
4186}
4187
4188void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
4189 ArrayRef<unsigned> Indices = I.getIndices();
4190 const Value *Op0 = I.getOperand(i_nocapture: 0);
4191 const Value *Op1 = I.getOperand(i_nocapture: 1);
4192 Type *AggTy = I.getType();
4193 Type *ValTy = Op1->getType();
4194 bool IntoUndef = isa<UndefValue>(Val: Op0);
4195 bool FromUndef = isa<UndefValue>(Val: Op1);
4196
4197 unsigned LinearIndex = ComputeLinearIndex(Ty: AggTy, Indices);
4198
4199 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4200 SmallVector<EVT, 4> AggValueVTs;
4201 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: AggTy, ValueVTs&: AggValueVTs);
4202 SmallVector<EVT, 4> ValValueVTs;
4203 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: ValTy, ValueVTs&: ValValueVTs);
4204
4205 unsigned NumAggValues = AggValueVTs.size();
4206 unsigned NumValValues = ValValueVTs.size();
4207 SmallVector<SDValue, 4> Values(NumAggValues);
4208
4209 // Ignore an insertvalue that produces an empty object
4210 if (!NumAggValues) {
4211 setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
4212 return;
4213 }
4214
4215 SDValue Agg = getValue(V: Op0);
4216 unsigned i = 0;
4217 // Copy the beginning value(s) from the original aggregate.
4218 for (; i != LinearIndex; ++i)
4219 Values[i] = IntoUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) :
4220 SDValue(Agg.getNode(), Agg.getResNo() + i);
4221 // Copy values from the inserted value(s).
4222 if (NumValValues) {
4223 SDValue Val = getValue(V: Op1);
4224 for (; i != LinearIndex + NumValValues; ++i)
4225 Values[i] = FromUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) :
4226 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
4227 }
4228 // Copy remaining value(s) from the original aggregate.
4229 for (; i != NumAggValues; ++i)
4230 Values[i] = IntoUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) :
4231 SDValue(Agg.getNode(), Agg.getResNo() + i);
4232
4233 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
4234 VTList: DAG.getVTList(VTs: AggValueVTs), Ops: Values));
4235}
4236
4237void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
4238 ArrayRef<unsigned> Indices = I.getIndices();
4239 const Value *Op0 = I.getOperand(i_nocapture: 0);
4240 Type *AggTy = Op0->getType();
4241 Type *ValTy = I.getType();
4242 bool OutOfUndef = isa<UndefValue>(Val: Op0);
4243
4244 unsigned LinearIndex = ComputeLinearIndex(Ty: AggTy, Indices);
4245
4246 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4247 SmallVector<EVT, 4> ValValueVTs;
4248 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: ValTy, ValueVTs&: ValValueVTs);
4249
4250 unsigned NumValValues = ValValueVTs.size();
4251
4252 // Ignore a extractvalue that produces an empty object
4253 if (!NumValValues) {
4254 setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
4255 return;
4256 }
4257
4258 SmallVector<SDValue, 4> Values(NumValValues);
4259
4260 SDValue Agg = getValue(V: Op0);
4261 // Copy out the selected value(s).
4262 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
4263 Values[i - LinearIndex] =
4264 OutOfUndef ?
4265 DAG.getUNDEF(VT: Agg.getNode()->getValueType(ResNo: Agg.getResNo() + i)) :
4266 SDValue(Agg.getNode(), Agg.getResNo() + i);
4267
4268 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
4269 VTList: DAG.getVTList(VTs: ValValueVTs), Ops: Values));
4270}
4271
4272void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
4273 Value *Op0 = I.getOperand(i: 0);
4274 // Note that the pointer operand may be a vector of pointers. Take the scalar
4275 // element which holds a pointer.
4276 unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
4277 SDValue N = getValue(V: Op0);
4278 SDLoc dl = getCurSDLoc();
4279 auto &TLI = DAG.getTargetLoweringInfo();
4280
4281 // Normalize Vector GEP - all scalar operands should be converted to the
4282 // splat vector.
4283 bool IsVectorGEP = I.getType()->isVectorTy();
4284 ElementCount VectorElementCount =
4285 IsVectorGEP ? cast<VectorType>(Val: I.getType())->getElementCount()
4286 : ElementCount::getFixed(MinVal: 0);
4287
4288 if (IsVectorGEP && !N.getValueType().isVector()) {
4289 LLVMContext &Context = *DAG.getContext();
4290 EVT VT = EVT::getVectorVT(Context, VT: N.getValueType(), EC: VectorElementCount);
4291 N = DAG.getSplat(VT, DL: dl, Op: N);
4292 }
4293
4294 for (gep_type_iterator GTI = gep_type_begin(GEP: &I), E = gep_type_end(GEP: &I);
4295 GTI != E; ++GTI) {
4296 const Value *Idx = GTI.getOperand();
4297 if (StructType *StTy = GTI.getStructTypeOrNull()) {
4298 unsigned Field = cast<Constant>(Val: Idx)->getUniqueInteger().getZExtValue();
4299 if (Field) {
4300 // N = N + Offset
4301 uint64_t Offset =
4302 DAG.getDataLayout().getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
4303
4304 // In an inbounds GEP with an offset that is nonnegative even when
4305 // interpreted as signed, assume there is no unsigned overflow.
4306 SDNodeFlags Flags;
4307 if (int64_t(Offset) >= 0 && cast<GEPOperator>(Val: I).isInBounds())
4308 Flags.setNoUnsignedWrap(true);
4309
4310 N = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: N.getValueType(), N1: N,
4311 N2: DAG.getConstant(Val: Offset, DL: dl, VT: N.getValueType()), Flags);
4312 }
4313 } else {
4314 // IdxSize is the width of the arithmetic according to IR semantics.
4315 // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
4316 // (and fix up the result later).
4317 unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
4318 MVT IdxTy = MVT::getIntegerVT(BitWidth: IdxSize);
4319 TypeSize ElementSize =
4320 GTI.getSequentialElementStride(DL: DAG.getDataLayout());
4321 // We intentionally mask away the high bits here; ElementSize may not
4322 // fit in IdxTy.
4323 APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
4324 bool ElementScalable = ElementSize.isScalable();
4325
4326 // If this is a scalar constant or a splat vector of constants,
4327 // handle it quickly.
4328 const auto *C = dyn_cast<Constant>(Val: Idx);
4329 if (C && isa<VectorType>(Val: C->getType()))
4330 C = C->getSplatValue();
4331
4332 const auto *CI = dyn_cast_or_null<ConstantInt>(Val: C);
4333 if (CI && CI->isZero())
4334 continue;
4335 if (CI && !ElementScalable) {
4336 APInt Offs = ElementMul * CI->getValue().sextOrTrunc(width: IdxSize);
4337 LLVMContext &Context = *DAG.getContext();
4338 SDValue OffsVal;
4339 if (IsVectorGEP)
4340 OffsVal = DAG.getConstant(
4341 Val: Offs, DL: dl, VT: EVT::getVectorVT(Context, VT: IdxTy, EC: VectorElementCount));
4342 else
4343 OffsVal = DAG.getConstant(Val: Offs, DL: dl, VT: IdxTy);
4344
4345 // In an inbounds GEP with an offset that is nonnegative even when
4346 // interpreted as signed, assume there is no unsigned overflow.
4347 SDNodeFlags Flags;
4348 if (Offs.isNonNegative() && cast<GEPOperator>(Val: I).isInBounds())
4349 Flags.setNoUnsignedWrap(true);
4350
4351 OffsVal = DAG.getSExtOrTrunc(Op: OffsVal, DL: dl, VT: N.getValueType());
4352
4353 N = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: N.getValueType(), N1: N, N2: OffsVal, Flags);
4354 continue;
4355 }
4356
4357 // N = N + Idx * ElementMul;
4358 SDValue IdxN = getValue(V: Idx);
4359
4360 if (!IdxN.getValueType().isVector() && IsVectorGEP) {
4361 EVT VT = EVT::getVectorVT(Context&: *Context, VT: IdxN.getValueType(),
4362 EC: VectorElementCount);
4363 IdxN = DAG.getSplat(VT, DL: dl, Op: IdxN);
4364 }
4365
4366 // If the index is smaller or larger than intptr_t, truncate or extend
4367 // it.
4368 IdxN = DAG.getSExtOrTrunc(Op: IdxN, DL: dl, VT: N.getValueType());
4369
4370 if (ElementScalable) {
4371 EVT VScaleTy = N.getValueType().getScalarType();
4372 SDValue VScale = DAG.getNode(
4373 Opcode: ISD::VSCALE, DL: dl, VT: VScaleTy,
4374 Operand: DAG.getConstant(Val: ElementMul.getZExtValue(), DL: dl, VT: VScaleTy));
4375 if (IsVectorGEP)
4376 VScale = DAG.getSplatVector(VT: N.getValueType(), DL: dl, Op: VScale);
4377 IdxN = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: N.getValueType(), N1: IdxN, N2: VScale);
4378 } else {
4379 // If this is a multiply by a power of two, turn it into a shl
4380 // immediately. This is a very common case.
4381 if (ElementMul != 1) {
4382 if (ElementMul.isPowerOf2()) {
4383 unsigned Amt = ElementMul.logBase2();
4384 IdxN = DAG.getNode(Opcode: ISD::SHL, DL: dl,
4385 VT: N.getValueType(), N1: IdxN,
4386 N2: DAG.getConstant(Val: Amt, DL: dl, VT: IdxN.getValueType()));
4387 } else {
4388 SDValue Scale = DAG.getConstant(Val: ElementMul.getZExtValue(), DL: dl,
4389 VT: IdxN.getValueType());
4390 IdxN = DAG.getNode(Opcode: ISD::MUL, DL: dl,
4391 VT: N.getValueType(), N1: IdxN, N2: Scale);
4392 }
4393 }
4394 }
4395
4396 N = DAG.getNode(Opcode: ISD::ADD, DL: dl,
4397 VT: N.getValueType(), N1: N, N2: IdxN);
4398 }
4399 }
4400
4401 MVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout(), AS);
4402 MVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout(), AS);
4403 if (IsVectorGEP) {
4404 PtrTy = MVT::getVectorVT(VT: PtrTy, EC: VectorElementCount);
4405 PtrMemTy = MVT::getVectorVT(VT: PtrMemTy, EC: VectorElementCount);
4406 }
4407
4408 if (PtrMemTy != PtrTy && !cast<GEPOperator>(Val: I).isInBounds())
4409 N = DAG.getPtrExtendInReg(Op: N, DL: dl, VT: PtrMemTy);
4410
4411 setValue(V: &I, NewN: N);
4412}
4413
4414void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
4415 // If this is a fixed sized alloca in the entry block of the function,
4416 // allocate it statically on the stack.
4417 if (FuncInfo.StaticAllocaMap.count(Val: &I))
4418 return; // getValue will auto-populate this.
4419
4420 SDLoc dl = getCurSDLoc();
4421 Type *Ty = I.getAllocatedType();
4422 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4423 auto &DL = DAG.getDataLayout();
4424 TypeSize TySize = DL.getTypeAllocSize(Ty);
4425 MaybeAlign Alignment = std::max(a: DL.getPrefTypeAlign(Ty), b: I.getAlign());
4426
4427 SDValue AllocSize = getValue(V: I.getArraySize());
4428
4429 EVT IntPtr = TLI.getPointerTy(DL, AS: I.getAddressSpace());
4430 if (AllocSize.getValueType() != IntPtr)
4431 AllocSize = DAG.getZExtOrTrunc(Op: AllocSize, DL: dl, VT: IntPtr);
4432
4433 if (TySize.isScalable())
4434 AllocSize = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntPtr, N1: AllocSize,
4435 N2: DAG.getVScale(DL: dl, VT: IntPtr,
4436 MulImm: APInt(IntPtr.getScalarSizeInBits(),
4437 TySize.getKnownMinValue())));
4438 else {
4439 SDValue TySizeValue =
4440 DAG.getConstant(Val: TySize.getFixedValue(), DL: dl, VT: MVT::getIntegerVT(BitWidth: 64));
4441 AllocSize = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntPtr, N1: AllocSize,
4442 N2: DAG.getZExtOrTrunc(Op: TySizeValue, DL: dl, VT: IntPtr));
4443 }
4444
4445 // Handle alignment. If the requested alignment is less than or equal to
4446 // the stack alignment, ignore it. If the size is greater than or equal to
4447 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
4448 Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
4449 if (*Alignment <= StackAlign)
4450 Alignment = std::nullopt;
4451
4452 const uint64_t StackAlignMask = StackAlign.value() - 1U;
4453 // Round the size of the allocation up to the stack alignment size
4454 // by add SA-1 to the size. This doesn't overflow because we're computing
4455 // an address inside an alloca.
4456 SDNodeFlags Flags;
4457 Flags.setNoUnsignedWrap(true);
4458 AllocSize = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AllocSize.getValueType(), N1: AllocSize,
4459 N2: DAG.getConstant(Val: StackAlignMask, DL: dl, VT: IntPtr), Flags);
4460
4461 // Mask out the low bits for alignment purposes.
4462 AllocSize = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AllocSize.getValueType(), N1: AllocSize,
4463 N2: DAG.getConstant(Val: ~StackAlignMask, DL: dl, VT: IntPtr));
4464
4465 SDValue Ops[] = {
4466 getRoot(), AllocSize,
4467 DAG.getConstant(Val: Alignment ? Alignment->value() : 0, DL: dl, VT: IntPtr)};
4468 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
4469 SDValue DSA = DAG.getNode(Opcode: ISD::DYNAMIC_STACKALLOC, DL: dl, VTList: VTs, Ops);
4470 setValue(V: &I, NewN: DSA);
4471 DAG.setRoot(DSA.getValue(R: 1));
4472
4473 assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
4474}
4475
4476static const MDNode *getRangeMetadata(const Instruction &I) {
4477 // If !noundef is not present, then !range violation results in a poison
4478 // value rather than immediate undefined behavior. In theory, transferring
4479 // these annotations to SDAG is fine, but in practice there are key SDAG
4480 // transforms that are known not to be poison-safe, such as folding logical
4481 // and/or to bitwise and/or. For now, only transfer !range if !noundef is
4482 // also present.
4483 if (!I.hasMetadata(KindID: LLVMContext::MD_noundef))
4484 return nullptr;
4485 return I.getMetadata(KindID: LLVMContext::MD_range);
4486}
4487
4488void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
4489 if (I.isAtomic())
4490 return visitAtomicLoad(I);
4491
4492 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4493 const Value *SV = I.getOperand(i_nocapture: 0);
4494 if (TLI.supportSwiftError()) {
4495 // Swifterror values can come from either a function parameter with
4496 // swifterror attribute or an alloca with swifterror attribute.
4497 if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
4498 if (Arg->hasSwiftErrorAttr())
4499 return visitLoadFromSwiftError(I);
4500 }
4501
4502 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
4503 if (Alloca->isSwiftError())
4504 return visitLoadFromSwiftError(I);
4505 }
4506 }
4507
4508 SDValue Ptr = getValue(V: SV);
4509
4510 Type *Ty = I.getType();
4511 SmallVector<EVT, 4> ValueVTs, MemVTs;
4512 SmallVector<TypeSize, 4> Offsets;
4513 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty, ValueVTs, MemVTs: &MemVTs, Offsets: &Offsets);
4514 unsigned NumValues = ValueVTs.size();
4515 if (NumValues == 0)
4516 return;
4517
4518 Align Alignment = I.getAlign();
4519 AAMDNodes AAInfo = I.getAAMetadata();
4520 const MDNode *Ranges = getRangeMetadata(I);
4521 bool isVolatile = I.isVolatile();
4522 MachineMemOperand::Flags MMOFlags =
4523 TLI.getLoadMemOperandFlags(LI: I, DL: DAG.getDataLayout(), AC, LibInfo);
4524
4525 SDValue Root;
4526 bool ConstantMemory = false;
4527 if (isVolatile)
4528 // Serialize volatile loads with other side effects.
4529 Root = getRoot();
4530 else if (NumValues > MaxParallelChains)
4531 Root = getMemoryRoot();
4532 else if (AA &&
4533 AA->pointsToConstantMemory(Loc: MemoryLocation(
4534 SV,
4535 LocationSize::precise(Value: DAG.getDataLayout().getTypeStoreSize(Ty)),
4536 AAInfo))) {
4537 // Do not serialize (non-volatile) loads of constant memory with anything.
4538 Root = DAG.getEntryNode();
4539 ConstantMemory = true;
4540 MMOFlags |= MachineMemOperand::MOInvariant;
4541 } else {
4542 // Do not serialize non-volatile loads against each other.
4543 Root = DAG.getRoot();
4544 }
4545
4546 SDLoc dl = getCurSDLoc();
4547
4548 if (isVolatile)
4549 Root = TLI.prepareVolatileOrAtomicLoad(Chain: Root, DL: dl, DAG);
4550
4551 SmallVector<SDValue, 4> Values(NumValues);
4552 SmallVector<SDValue, 4> Chains(std::min(a: MaxParallelChains, b: NumValues));
4553
4554 unsigned ChainI = 0;
4555 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
4556 // Serializing loads here may result in excessive register pressure, and
4557 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
4558 // could recover a bit by hoisting nodes upward in the chain by recognizing
4559 // they are side-effect free or do not alias. The optimizer should really
4560 // avoid this case by converting large object/array copies to llvm.memcpy
4561 // (MaxParallelChains should always remain as failsafe).
4562 if (ChainI == MaxParallelChains) {
4563 assert(PendingLoads.empty() && "PendingLoads must be serialized first");
4564 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4565 ArrayRef(Chains.data(), ChainI));
4566 Root = Chain;
4567 ChainI = 0;
4568 }
4569
4570 // TODO: MachinePointerInfo only supports a fixed length offset.
4571 MachinePointerInfo PtrInfo =
4572 !Offsets[i].isScalable() || Offsets[i].isZero()
4573 ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue())
4574 : MachinePointerInfo();
4575
4576 SDValue A = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: Offsets[i]);
4577 SDValue L = DAG.getLoad(VT: MemVTs[i], dl, Chain: Root, Ptr: A, PtrInfo, Alignment,
4578 MMOFlags, AAInfo, Ranges);
4579 Chains[ChainI] = L.getValue(R: 1);
4580
4581 if (MemVTs[i] != ValueVTs[i])
4582 L = DAG.getPtrExtOrTrunc(Op: L, DL: dl, VT: ValueVTs[i]);
4583
4584 Values[i] = L;
4585 }
4586
4587 if (!ConstantMemory) {
4588 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4589 ArrayRef(Chains.data(), ChainI));
4590 if (isVolatile)
4591 DAG.setRoot(Chain);
4592 else
4593 PendingLoads.push_back(Elt: Chain);
4594 }
4595
4596 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl,
4597 VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values));
4598}
4599
4600void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
4601 assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4602 "call visitStoreToSwiftError when backend supports swifterror");
4603
4604 SmallVector<EVT, 4> ValueVTs;
4605 SmallVector<uint64_t, 4> Offsets;
4606 const Value *SrcV = I.getOperand(i_nocapture: 0);
4607 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(),
4608 Ty: SrcV->getType(), ValueVTs, FixedOffsets: &Offsets, StartingOffset: 0);
4609 assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4610 "expect a single EVT for swifterror");
4611
4612 SDValue Src = getValue(V: SrcV);
4613 // Create a virtual register, then update the virtual register.
4614 Register VReg =
4615 SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
4616 // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
4617 // Chain can be getRoot or getControlRoot.
4618 SDValue CopyNode = DAG.getCopyToReg(Chain: getRoot(), dl: getCurSDLoc(), Reg: VReg,
4619 N: SDValue(Src.getNode(), Src.getResNo()));
4620 DAG.setRoot(CopyNode);
4621}
4622
4623void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
4624 assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4625 "call visitLoadFromSwiftError when backend supports swifterror");
4626
4627 assert(!I.isVolatile() &&
4628 !I.hasMetadata(LLVMContext::MD_nontemporal) &&
4629 !I.hasMetadata(LLVMContext::MD_invariant_load) &&
4630 "Support volatile, non temporal, invariant for load_from_swift_error");
4631
4632 const Value *SV = I.getOperand(i_nocapture: 0);
4633 Type *Ty = I.getType();
4634 assert(
4635 (!AA ||
4636 !AA->pointsToConstantMemory(MemoryLocation(
4637 SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
4638 I.getAAMetadata()))) &&
4639 "load_from_swift_error should not be constant memory");
4640
4641 SmallVector<EVT, 4> ValueVTs;
4642 SmallVector<uint64_t, 4> Offsets;
4643 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty,
4644 ValueVTs, FixedOffsets: &Offsets, StartingOffset: 0);
4645 assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4646 "expect a single EVT for swifterror");
4647
4648 // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
4649 SDValue L = DAG.getCopyFromReg(
4650 Chain: getRoot(), dl: getCurSDLoc(),
4651 Reg: SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), VT: ValueVTs[0]);
4652
4653 setValue(V: &I, NewN: L);
4654}
4655
4656void SelectionDAGBuilder::visitStore(const StoreInst &I) {
4657 if (I.isAtomic())
4658 return visitAtomicStore(I);
4659
4660 const Value *SrcV = I.getOperand(i_nocapture: 0);
4661 const Value *PtrV = I.getOperand(i_nocapture: 1);
4662
4663 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4664 if (TLI.supportSwiftError()) {
4665 // Swifterror values can come from either a function parameter with
4666 // swifterror attribute or an alloca with swifterror attribute.
4667 if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
4668 if (Arg->hasSwiftErrorAttr())
4669 return visitStoreToSwiftError(I);
4670 }
4671
4672 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
4673 if (Alloca->isSwiftError())
4674 return visitStoreToSwiftError(I);
4675 }
4676 }
4677
4678 SmallVector<EVT, 4> ValueVTs, MemVTs;
4679 SmallVector<TypeSize, 4> Offsets;
4680 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(),
4681 Ty: SrcV->getType(), ValueVTs, MemVTs: &MemVTs, Offsets: &Offsets);
4682 unsigned NumValues = ValueVTs.size();
4683 if (NumValues == 0)
4684 return;
4685
4686 // Get the lowered operands. Note that we do this after
4687 // checking if NumResults is zero, because with zero results
4688 // the operands won't have values in the map.
4689 SDValue Src = getValue(V: SrcV);
4690 SDValue Ptr = getValue(V: PtrV);
4691
4692 SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
4693 SmallVector<SDValue, 4> Chains(std::min(a: MaxParallelChains, b: NumValues));
4694 SDLoc dl = getCurSDLoc();
4695 Align Alignment = I.getAlign();
4696 AAMDNodes AAInfo = I.getAAMetadata();
4697
4698 auto MMOFlags = TLI.getStoreMemOperandFlags(SI: I, DL: DAG.getDataLayout());
4699
4700 unsigned ChainI = 0;
4701 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
4702 // See visitLoad comments.
4703 if (ChainI == MaxParallelChains) {
4704 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4705 ArrayRef(Chains.data(), ChainI));
4706 Root = Chain;
4707 ChainI = 0;
4708 }
4709
4710 // TODO: MachinePointerInfo only supports a fixed length offset.
4711 MachinePointerInfo PtrInfo =
4712 !Offsets[i].isScalable() || Offsets[i].isZero()
4713 ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue())
4714 : MachinePointerInfo();
4715
4716 SDValue Add = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: Offsets[i]);
4717 SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
4718 if (MemVTs[i] != ValueVTs[i])
4719 Val = DAG.getPtrExtOrTrunc(Op: Val, DL: dl, VT: MemVTs[i]);
4720 SDValue St =
4721 DAG.getStore(Chain: Root, dl, Val, Ptr: Add, PtrInfo, Alignment, MMOFlags, AAInfo);
4722 Chains[ChainI] = St;
4723 }
4724
4725 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4726 ArrayRef(Chains.data(), ChainI));
4727 setValue(V: &I, NewN: StoreNode);
4728 DAG.setRoot(StoreNode);
4729}
4730
4731void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
4732 bool IsCompressing) {
4733 SDLoc sdl = getCurSDLoc();
4734
4735 auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4736 Align &Alignment) {
4737 // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
4738 Src0 = I.getArgOperand(i: 0);
4739 Ptr = I.getArgOperand(i: 1);
4740 Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getAlignValue();
4741 Mask = I.getArgOperand(i: 3);
4742 };
4743 auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4744 Align &Alignment) {
4745 // llvm.masked.compressstore.*(Src0, Ptr, Mask)
4746 Src0 = I.getArgOperand(i: 0);
4747 Ptr = I.getArgOperand(i: 1);
4748 Mask = I.getArgOperand(i: 2);
4749 Alignment = I.getParamAlign(ArgNo: 1).valueOrOne();
4750 };
4751
4752 Value *PtrOperand, *MaskOperand, *Src0Operand;
4753 Align Alignment;
4754 if (IsCompressing)
4755 getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4756 else
4757 getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4758
4759 SDValue Ptr = getValue(V: PtrOperand);
4760 SDValue Src0 = getValue(V: Src0Operand);
4761 SDValue Mask = getValue(V: MaskOperand);
4762 SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType());
4763
4764 EVT VT = Src0.getValueType();
4765
4766 auto MMOFlags = MachineMemOperand::MOStore;
4767 if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal))
4768 MMOFlags |= MachineMemOperand::MONonTemporal;
4769
4770 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4771 PtrInfo: MachinePointerInfo(PtrOperand), F: MMOFlags,
4772 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo: I.getAAMetadata());
4773 SDValue StoreNode =
4774 DAG.getMaskedStore(Chain: getMemoryRoot(), dl: sdl, Val: Src0, Base: Ptr, Offset, Mask, MemVT: VT, MMO,
4775 AM: ISD::UNINDEXED, IsTruncating: false /* Truncating */, IsCompressing);
4776 DAG.setRoot(StoreNode);
4777 setValue(V: &I, NewN: StoreNode);
4778}
4779
4780// Get a uniform base for the Gather/Scatter intrinsic.
4781// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
4782// We try to represent it as a base pointer + vector of indices.
4783// Usually, the vector of pointers comes from a 'getelementptr' instruction.
4784// The first operand of the GEP may be a single pointer or a vector of pointers
4785// Example:
4786// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
4787// or
4788// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
4789// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
4790//
4791// When the first GEP operand is a single pointer - it is the uniform base we
4792// are looking for. If first operand of the GEP is a splat vector - we
4793// extract the splat value and use it as a uniform base.
4794// In all other cases the function returns 'false'.
4795static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
4796 ISD::MemIndexType &IndexType, SDValue &Scale,
4797 SelectionDAGBuilder *SDB, const BasicBlock *CurBB,
4798 uint64_t ElemSize) {
4799 SelectionDAG& DAG = SDB->DAG;
4800 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4801 const DataLayout &DL = DAG.getDataLayout();
4802
4803 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
4804
4805 // Handle splat constant pointer.
4806 if (auto *C = dyn_cast<Constant>(Val: Ptr)) {
4807 C = C->getSplatValue();
4808 if (!C)
4809 return false;
4810
4811 Base = SDB->getValue(V: C);
4812
4813 ElementCount NumElts = cast<VectorType>(Val: Ptr->getType())->getElementCount();
4814 EVT VT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: TLI.getPointerTy(DL), EC: NumElts);
4815 Index = DAG.getConstant(Val: 0, DL: SDB->getCurSDLoc(), VT);
4816 IndexType = ISD::SIGNED_SCALED;
4817 Scale = DAG.getTargetConstant(Val: 1, DL: SDB->getCurSDLoc(), VT: TLI.getPointerTy(DL));
4818 return true;
4819 }
4820
4821 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr);
4822 if (!GEP || GEP->getParent() != CurBB)
4823 return false;
4824
4825 if (GEP->getNumOperands() != 2)
4826 return false;
4827
4828 const Value *BasePtr = GEP->getPointerOperand();
4829 const Value *IndexVal = GEP->getOperand(i_nocapture: GEP->getNumOperands() - 1);
4830
4831 // Make sure the base is scalar and the index is a vector.
4832 if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
4833 return false;
4834
4835 TypeSize ScaleVal = DL.getTypeAllocSize(Ty: GEP->getResultElementType());
4836 if (ScaleVal.isScalable())
4837 return false;
4838
4839 // Target may not support the required addressing mode.
4840 if (ScaleVal != 1 &&
4841 !TLI.isLegalScaleForGatherScatter(Scale: ScaleVal.getFixedValue(), ElemSize))
4842 return false;
4843
4844 Base = SDB->getValue(V: BasePtr);
4845 Index = SDB->getValue(V: IndexVal);
4846 IndexType = ISD::SIGNED_SCALED;
4847
4848 Scale =
4849 DAG.getTargetConstant(Val: ScaleVal, DL: SDB->getCurSDLoc(), VT: TLI.getPointerTy(DL));
4850 return true;
4851}
4852
4853void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
4854 SDLoc sdl = getCurSDLoc();
4855
4856 // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
4857 const Value *Ptr = I.getArgOperand(i: 1);
4858 SDValue Src0 = getValue(V: I.getArgOperand(i: 0));
4859 SDValue Mask = getValue(V: I.getArgOperand(i: 3));
4860 EVT VT = Src0.getValueType();
4861 Align Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 2))
4862 ->getMaybeAlignValue()
4863 .value_or(u: DAG.getEVTAlign(MemoryVT: VT.getScalarType()));
4864 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4865
4866 SDValue Base;
4867 SDValue Index;
4868 ISD::MemIndexType IndexType;
4869 SDValue Scale;
4870 bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this,
4871 CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize());
4872
4873 unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
4874 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4875 PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOStore,
4876 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo: I.getAAMetadata());
4877 if (!UniformBase) {
4878 Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4879 Index = getValue(V: Ptr);
4880 IndexType = ISD::SIGNED_SCALED;
4881 Scale = DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4882 }
4883
4884 EVT IdxVT = Index.getValueType();
4885 EVT EltTy = IdxVT.getVectorElementType();
4886 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
4887 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
4888 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index);
4889 }
4890
4891 SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
4892 SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
4893 Ops, MMO, IndexType, false);
4894 DAG.setRoot(Scatter);
4895 setValue(V: &I, NewN: Scatter);
4896}
4897
4898void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4899 SDLoc sdl = getCurSDLoc();
4900
4901 auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4902 Align &Alignment) {
4903 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4904 Ptr = I.getArgOperand(i: 0);
4905 Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getAlignValue();
4906 Mask = I.getArgOperand(i: 2);
4907 Src0 = I.getArgOperand(i: 3);
4908 };
4909 auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4910 Align &Alignment) {
4911 // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4912 Ptr = I.getArgOperand(i: 0);
4913 Alignment = I.getParamAlign(ArgNo: 0).valueOrOne();
4914 Mask = I.getArgOperand(i: 1);
4915 Src0 = I.getArgOperand(i: 2);
4916 };
4917
4918 Value *PtrOperand, *MaskOperand, *Src0Operand;
4919 Align Alignment;
4920 if (IsExpanding)
4921 getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4922 else
4923 getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4924
4925 SDValue Ptr = getValue(V: PtrOperand);
4926 SDValue Src0 = getValue(V: Src0Operand);
4927 SDValue Mask = getValue(V: MaskOperand);
4928 SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType());
4929
4930 EVT VT = Src0.getValueType();
4931 AAMDNodes AAInfo = I.getAAMetadata();
4932 const MDNode *Ranges = getRangeMetadata(I);
4933
4934 // Do not serialize masked loads of constant memory with anything.
4935 MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo);
4936 bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML);
4937
4938 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4939
4940 auto MMOFlags = MachineMemOperand::MOLoad;
4941 if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal))
4942 MMOFlags |= MachineMemOperand::MONonTemporal;
4943
4944 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4945 PtrInfo: MachinePointerInfo(PtrOperand), F: MMOFlags,
4946 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo, Ranges);
4947
4948 SDValue Load =
4949 DAG.getMaskedLoad(VT, dl: sdl, Chain: InChain, Base: Ptr, Offset, Mask, Src0, MemVT: VT, MMO,
4950 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
4951 if (AddToChain)
4952 PendingLoads.push_back(Elt: Load.getValue(R: 1));
4953 setValue(V: &I, NewN: Load);
4954}
4955
4956void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4957 SDLoc sdl = getCurSDLoc();
4958
4959 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4960 const Value *Ptr = I.getArgOperand(i: 0);
4961 SDValue Src0 = getValue(V: I.getArgOperand(i: 3));
4962 SDValue Mask = getValue(V: I.getArgOperand(i: 2));
4963
4964 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4965 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
4966 Align Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 1))
4967 ->getMaybeAlignValue()
4968 .value_or(u: DAG.getEVTAlign(MemoryVT: VT.getScalarType()));
4969
4970 const MDNode *Ranges = getRangeMetadata(I);
4971
4972 SDValue Root = DAG.getRoot();
4973 SDValue Base;
4974 SDValue Index;
4975 ISD::MemIndexType IndexType;
4976 SDValue Scale;
4977 bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this,
4978 CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize());
4979 unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
4980 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4981 PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOLoad,
4982 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo: I.getAAMetadata(),
4983 Ranges);
4984
4985 if (!UniformBase) {
4986 Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4987 Index = getValue(V: Ptr);
4988 IndexType = ISD::SIGNED_SCALED;
4989 Scale = DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4990 }
4991
4992 EVT IdxVT = Index.getValueType();
4993 EVT EltTy = IdxVT.getVectorElementType();
4994 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
4995 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
4996 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index);
4997 }
4998
4999 SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
5000 SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
5001 Ops, MMO, IndexType, ISD::NON_EXTLOAD);
5002
5003 PendingLoads.push_back(Elt: Gather.getValue(R: 1));
5004 setValue(V: &I, NewN: Gather);
5005}
5006
5007void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
5008 SDLoc dl = getCurSDLoc();
5009 AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
5010 AtomicOrdering FailureOrdering = I.getFailureOrdering();
5011 SyncScope::ID SSID = I.getSyncScopeID();
5012
5013 SDValue InChain = getRoot();
5014
5015 MVT MemVT = getValue(V: I.getCompareOperand()).getSimpleValueType();
5016 SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
5017
5018 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5019 auto Flags = TLI.getAtomicMemOperandFlags(AI: I, DL: DAG.getDataLayout());
5020
5021 MachineFunction &MF = DAG.getMachineFunction();
5022 MachineMemOperand *MMO = MF.getMachineMemOperand(
5023 PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags,
5024 Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: DAG.getEVTAlign(MemoryVT: MemVT),
5025 AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering: SuccessOrdering, FailureOrdering);
5026
5027 SDValue L = DAG.getAtomicCmpSwap(Opcode: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
5028 dl, MemVT, VTs, Chain: InChain,
5029 Ptr: getValue(V: I.getPointerOperand()),
5030 Cmp: getValue(V: I.getCompareOperand()),
5031 Swp: getValue(V: I.getNewValOperand()), MMO);
5032
5033 SDValue OutChain = L.getValue(R: 2);
5034
5035 setValue(V: &I, NewN: L);
5036 DAG.setRoot(OutChain);
5037}
5038
5039void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
5040 SDLoc dl = getCurSDLoc();
5041 ISD::NodeType NT;
5042 switch (I.getOperation()) {
5043 default: llvm_unreachable("Unknown atomicrmw operation");
5044 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
5045 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
5046 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
5047 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
5048 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
5049 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
5050 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
5051 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
5052 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
5053 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
5054 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
5055 case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
5056 case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
5057 case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
5058 case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
5059 case AtomicRMWInst::UIncWrap:
5060 NT = ISD::ATOMIC_LOAD_UINC_WRAP;
5061 break;
5062 case AtomicRMWInst::UDecWrap:
5063 NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
5064 break;
5065 }
5066 AtomicOrdering Ordering = I.getOrdering();
5067 SyncScope::ID SSID = I.getSyncScopeID();
5068
5069 SDValue InChain = getRoot();
5070
5071 auto MemVT = getValue(V: I.getValOperand()).getSimpleValueType();
5072 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5073 auto Flags = TLI.getAtomicMemOperandFlags(AI: I, DL: DAG.getDataLayout());
5074
5075 MachineFunction &MF = DAG.getMachineFunction();
5076 MachineMemOperand *MMO = MF.getMachineMemOperand(
5077 PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags,
5078 Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: DAG.getEVTAlign(MemoryVT: MemVT),
5079 AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering);
5080
5081 SDValue L =
5082 DAG.getAtomic(Opcode: NT, dl, MemVT, Chain: InChain,
5083 Ptr: getValue(V: I.getPointerOperand()), Val: getValue(V: I.getValOperand()),
5084 MMO);
5085
5086 SDValue OutChain = L.getValue(R: 1);
5087
5088 setValue(V: &I, NewN: L);
5089 DAG.setRoot(OutChain);
5090}
5091
5092void SelectionDAGBuilder::visitFence(const FenceInst &I) {
5093 SDLoc dl = getCurSDLoc();
5094 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5095 SDValue Ops[3];
5096 Ops[0] = getRoot();
5097 Ops[1] = DAG.getTargetConstant(Val: (unsigned)I.getOrdering(), DL: dl,
5098 VT: TLI.getFenceOperandTy(DL: DAG.getDataLayout()));
5099 Ops[2] = DAG.getTargetConstant(Val: I.getSyncScopeID(), DL: dl,
5100 VT: TLI.getFenceOperandTy(DL: DAG.getDataLayout()));
5101 SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
5102 setValue(V: &I, NewN: N);
5103 DAG.setRoot(N);
5104}
5105
5106void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
5107 SDLoc dl = getCurSDLoc();
5108 AtomicOrdering Order = I.getOrdering();
5109 SyncScope::ID SSID = I.getSyncScopeID();
5110
5111 SDValue InChain = getRoot();
5112
5113 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5114 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
5115 EVT MemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType());
5116
5117 if (!TLI.supportsUnalignedAtomics() &&
5118 I.getAlign().value() < MemVT.getSizeInBits() / 8)
5119 report_fatal_error(reason: "Cannot generate unaligned atomic load");
5120
5121 auto Flags = TLI.getLoadMemOperandFlags(LI: I, DL: DAG.getDataLayout(), AC, LibInfo);
5122
5123 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
5124 PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags,
5125 Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: I.getAlign(), AAInfo: AAMDNodes(),
5126 Ranges: nullptr, SSID, Ordering: Order);
5127
5128 InChain = TLI.prepareVolatileOrAtomicLoad(Chain: InChain, DL: dl, DAG);
5129
5130 SDValue Ptr = getValue(V: I.getPointerOperand());
5131 SDValue L = DAG.getAtomic(Opcode: ISD::ATOMIC_LOAD, dl, MemVT, VT: MemVT, Chain: InChain,
5132 Ptr, MMO);
5133
5134 SDValue OutChain = L.getValue(R: 1);
5135 if (MemVT != VT)
5136 L = DAG.getPtrExtOrTrunc(Op: L, DL: dl, VT);
5137
5138 setValue(V: &I, NewN: L);
5139 DAG.setRoot(OutChain);
5140}
5141
5142void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
5143 SDLoc dl = getCurSDLoc();
5144
5145 AtomicOrdering Ordering = I.getOrdering();
5146 SyncScope::ID SSID = I.getSyncScopeID();
5147
5148 SDValue InChain = getRoot();
5149
5150 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5151 EVT MemVT =
5152 TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getValueOperand()->getType());
5153
5154 if (!TLI.supportsUnalignedAtomics() &&
5155 I.getAlign().value() < MemVT.getSizeInBits() / 8)
5156 report_fatal_error(reason: "Cannot generate unaligned atomic store");
5157
5158 auto Flags = TLI.getStoreMemOperandFlags(SI: I, DL: DAG.getDataLayout());
5159
5160 MachineFunction &MF = DAG.getMachineFunction();
5161 MachineMemOperand *MMO = MF.getMachineMemOperand(
5162 PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags,
5163 Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: I.getAlign(), AAInfo: AAMDNodes(),
5164 Ranges: nullptr, SSID, Ordering);
5165
5166 SDValue Val = getValue(V: I.getValueOperand());
5167 if (Val.getValueType() != MemVT)
5168 Val = DAG.getPtrExtOrTrunc(Op: Val, DL: dl, VT: MemVT);
5169 SDValue Ptr = getValue(V: I.getPointerOperand());
5170
5171 SDValue OutChain =
5172 DAG.getAtomic(Opcode: ISD::ATOMIC_STORE, dl, MemVT, Chain: InChain, Ptr: Val, Val: Ptr, MMO);
5173
5174 setValue(V: &I, NewN: OutChain);
5175 DAG.setRoot(OutChain);
5176}
5177
5178/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
5179/// node.
5180void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
5181 unsigned Intrinsic) {
5182 // Ignore the callsite's attributes. A specific call site may be marked with
5183 // readnone, but the lowering code will expect the chain based on the
5184 // definition.
5185 const Function *F = I.getCalledFunction();
5186 bool HasChain = !F->doesNotAccessMemory();
5187 bool OnlyLoad = HasChain && F->onlyReadsMemory();
5188
5189 // Build the operand list.
5190 SmallVector<SDValue, 8> Ops;
5191 if (HasChain) { // If this intrinsic has side-effects, chainify it.
5192 if (OnlyLoad) {
5193 // We don't need to serialize loads against other loads.
5194 Ops.push_back(Elt: DAG.getRoot());
5195 } else {
5196 Ops.push_back(Elt: getRoot());
5197 }
5198 }
5199
5200 // Info is set by getTgtMemIntrinsic
5201 TargetLowering::IntrinsicInfo Info;
5202 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5203 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
5204 DAG.getMachineFunction(),
5205 Intrinsic);
5206
5207 // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
5208 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
5209 Info.opc == ISD::INTRINSIC_W_CHAIN)
5210 Ops.push_back(Elt: DAG.getTargetConstant(Val: Intrinsic, DL: getCurSDLoc(),
5211 VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
5212
5213 // Add all operands of the call to the operand list.
5214 for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
5215 const Value *Arg = I.getArgOperand(i);
5216 if (!I.paramHasAttr(i, Attribute::ImmArg)) {
5217 Ops.push_back(Elt: getValue(V: Arg));
5218 continue;
5219 }
5220
5221 // Use TargetConstant instead of a regular constant for immarg.
5222 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: Arg->getType(), AllowUnknown: true);
5223 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Arg)) {
5224 assert(CI->getBitWidth() <= 64 &&
5225 "large intrinsic immediates not handled");
5226 Ops.push_back(Elt: DAG.getTargetConstant(Val: *CI, DL: SDLoc(), VT));
5227 } else {
5228 Ops.push_back(
5229 Elt: DAG.getTargetConstantFP(Val: *cast<ConstantFP>(Val: Arg), DL: SDLoc(), VT));
5230 }
5231 }
5232
5233 SmallVector<EVT, 4> ValueVTs;
5234 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: I.getType(), ValueVTs);
5235
5236 if (HasChain)
5237 ValueVTs.push_back(MVT::Other);
5238
5239 SDVTList VTs = DAG.getVTList(VTs: ValueVTs);
5240
5241 // Propagate fast-math-flags from IR to node(s).
5242 SDNodeFlags Flags;
5243 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &I))
5244 Flags.copyFMF(FPMO: *FPMO);
5245 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
5246
5247 // Create the node.
5248 SDValue Result;
5249
5250 if (auto Bundle = I.getOperandBundle(ID: LLVMContext::OB_convergencectrl)) {
5251 auto *Token = Bundle->Inputs[0].get();
5252 SDValue ConvControlToken = getValue(V: Token);
5253 assert(Ops.back().getValueType() != MVT::Glue &&
5254 "Did not expected another glue node here.");
5255 ConvControlToken =
5256 DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
5257 Ops.push_back(Elt: ConvControlToken);
5258 }
5259
5260 // In some cases, custom collection of operands from CallInst I may be needed.
5261 TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
5262 if (IsTgtIntrinsic) {
5263 // This is target intrinsic that touches memory
5264 //
5265 // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
5266 // didn't yield anything useful.
5267 MachinePointerInfo MPI;
5268 if (Info.ptrVal)
5269 MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
5270 else if (Info.fallbackAddressSpace)
5271 MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
5272 Result = DAG.getMemIntrinsicNode(Opcode: Info.opc, dl: getCurSDLoc(), VTList: VTs, Ops,
5273 MemVT: Info.memVT, PtrInfo: MPI, Alignment: Info.align, Flags: Info.flags,
5274 Size: Info.size, AAInfo: I.getAAMetadata());
5275 } else if (!HasChain) {
5276 Result = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: getCurSDLoc(), VTList: VTs, Ops);
5277 } else if (!I.getType()->isVoidTy()) {
5278 Result = DAG.getNode(Opcode: ISD::INTRINSIC_W_CHAIN, DL: getCurSDLoc(), VTList: VTs, Ops);
5279 } else {
5280 Result = DAG.getNode(Opcode: ISD::INTRINSIC_VOID, DL: getCurSDLoc(), VTList: VTs, Ops);
5281 }
5282
5283 if (HasChain) {
5284 SDValue Chain = Result.getValue(R: Result.getNode()->getNumValues()-1);
5285 if (OnlyLoad)
5286 PendingLoads.push_back(Elt: Chain);
5287 else
5288 DAG.setRoot(Chain);
5289 }
5290
5291 if (!I.getType()->isVoidTy()) {
5292 if (!isa<VectorType>(Val: I.getType()))
5293 Result = lowerRangeToAssertZExt(DAG, I, Op: Result);
5294
5295 MaybeAlign Alignment = I.getRetAlign();
5296
5297 // Insert `assertalign` node if there's an alignment.
5298 if (InsertAssertAlign && Alignment) {
5299 Result =
5300 DAG.getAssertAlign(DL: getCurSDLoc(), V: Result, A: Alignment.valueOrOne());
5301 }
5302 }
5303
5304 setValue(V: &I, NewN: Result);
5305}
5306
5307/// GetSignificand - Get the significand and build it into a floating-point
5308/// number with exponent of 1:
5309///
5310/// Op = (Op & 0x007fffff) | 0x3f800000;
5311///
5312/// where Op is the hexadecimal representation of floating point value.
5313static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
5314 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
5315 DAG.getConstant(0x007fffff, dl, MVT::i32));
5316 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
5317 DAG.getConstant(0x3f800000, dl, MVT::i32));
5318 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
5319}
5320
5321/// GetExponent - Get the exponent:
5322///
5323/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
5324///
5325/// where Op is the hexadecimal representation of floating point value.
5326static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
5327 const TargetLowering &TLI, const SDLoc &dl) {
5328 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
5329 DAG.getConstant(0x7f800000, dl, MVT::i32));
5330 SDValue t1 = DAG.getNode(
5331 ISD::SRL, dl, MVT::i32, t0,
5332 DAG.getConstant(23, dl,
5333 TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
5334 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
5335 DAG.getConstant(127, dl, MVT::i32));
5336 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
5337}
5338
5339/// getF32Constant - Get 32-bit floating point constant.
5340static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
5341 const SDLoc &dl) {
5342 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
5343 MVT::f32);
5344}
5345
5346static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
5347 SelectionDAG &DAG) {
5348 // TODO: What fast-math-flags should be set on the floating-point nodes?
5349
5350 // IntegerPartOfX = ((int32_t)(t0);
5351 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
5352
5353 // FractionalPartOfX = t0 - (float)IntegerPartOfX;
5354 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
5355 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
5356
5357 // IntegerPartOfX <<= 23;
5358 IntegerPartOfX =
5359 DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
5360 DAG.getConstant(23, dl,
5361 DAG.getTargetLoweringInfo().getShiftAmountTy(
5362 MVT::i32, DAG.getDataLayout())));
5363
5364 SDValue TwoToFractionalPartOfX;
5365 if (LimitFloatPrecision <= 6) {
5366 // For floating-point precision of 6:
5367 //
5368 // TwoToFractionalPartOfX =
5369 // 0.997535578f +
5370 // (0.735607626f + 0.252464424f * x) * x;
5371 //
5372 // error 0.0144103317, which is 6 bits
5373 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5374 getF32Constant(DAG, 0x3e814304, dl));
5375 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5376 getF32Constant(DAG, 0x3f3c50c8, dl));
5377 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5378 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5379 getF32Constant(DAG, 0x3f7f5e7e, dl));
5380 } else if (LimitFloatPrecision <= 12) {
5381 // For floating-point precision of 12:
5382 //
5383 // TwoToFractionalPartOfX =
5384 // 0.999892986f +
5385 // (0.696457318f +
5386 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
5387 //
5388 // error 0.000107046256, which is 13 to 14 bits
5389 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5390 getF32Constant(DAG, 0x3da235e3, dl));
5391 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5392 getF32Constant(DAG, 0x3e65b8f3, dl));
5393 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5394 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5395 getF32Constant(DAG, 0x3f324b07, dl));
5396 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5397 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5398 getF32Constant(DAG, 0x3f7ff8fd, dl));
5399 } else { // LimitFloatPrecision <= 18
5400 // For floating-point precision of 18:
5401 //
5402 // TwoToFractionalPartOfX =
5403 // 0.999999982f +
5404 // (0.693148872f +
5405 // (0.240227044f +
5406 // (0.554906021e-1f +
5407 // (0.961591928e-2f +
5408 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
5409 // error 2.47208000*10^(-7), which is better than 18 bits
5410 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5411 getF32Constant(DAG, 0x3924b03e, dl));
5412 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5413 getF32Constant(DAG, 0x3ab24b87, dl));
5414 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5415 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5416 getF32Constant(DAG, 0x3c1d8c17, dl));
5417 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5418 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5419 getF32Constant(DAG, 0x3d634a1d, dl));
5420 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5421 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5422 getF32Constant(DAG, 0x3e75fe14, dl));
5423 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5424 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
5425 getF32Constant(DAG, 0x3f317234, dl));
5426 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
5427 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
5428 getF32Constant(DAG, 0x3f800000, dl));
5429 }
5430
5431 // Add the exponent into the result in integer domain.
5432 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
5433 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5434 DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
5435}
5436
5437/// expandExp - Lower an exp intrinsic. Handles the special sequences for
5438/// limited-precision mode.
5439static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5440 const TargetLowering &TLI, SDNodeFlags Flags) {
5441 if (Op.getValueType() == MVT::f32 &&
5442 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5443
5444 // Put the exponent in the right bit position for later addition to the
5445 // final result:
5446 //
5447 // t0 = Op * log2(e)
5448
5449 // TODO: What fast-math-flags should be set here?
5450 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
5451 DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
5452 return getLimitedPrecisionExp2(t0, dl, DAG);
5453 }
5454
5455 // No special expansion.
5456 return DAG.getNode(Opcode: ISD::FEXP, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5457}
5458
5459/// expandLog - Lower a log intrinsic. Handles the special sequences for
5460/// limited-precision mode.
5461static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5462 const TargetLowering &TLI, SDNodeFlags Flags) {
5463 // TODO: What fast-math-flags should be set on the floating-point nodes?
5464
5465 if (Op.getValueType() == MVT::f32 &&
5466 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5467 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5468
5469 // Scale the exponent by log(2).
5470 SDValue Exp = GetExponent(DAG, Op: Op1, TLI, dl);
5471 SDValue LogOfExponent =
5472 DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5473 DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
5474
5475 // Get the significand and build it into a floating-point number with
5476 // exponent of 1.
5477 SDValue X = GetSignificand(DAG, Op: Op1, dl);
5478
5479 SDValue LogOfMantissa;
5480 if (LimitFloatPrecision <= 6) {
5481 // For floating-point precision of 6:
5482 //
5483 // LogofMantissa =
5484 // -1.1609546f +
5485 // (1.4034025f - 0.23903021f * x) * x;
5486 //
5487 // error 0.0034276066, which is better than 8 bits
5488 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5489 getF32Constant(DAG, 0xbe74c456, dl));
5490 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5491 getF32Constant(DAG, 0x3fb3a2b1, dl));
5492 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5493 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5494 getF32Constant(DAG, 0x3f949a29, dl));
5495 } else if (LimitFloatPrecision <= 12) {
5496 // For floating-point precision of 12:
5497 //
5498 // LogOfMantissa =
5499 // -1.7417939f +
5500 // (2.8212026f +
5501 // (-1.4699568f +
5502 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
5503 //
5504 // error 0.000061011436, which is 14 bits
5505 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5506 getF32Constant(DAG, 0xbd67b6d6, dl));
5507 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5508 getF32Constant(DAG, 0x3ee4f4b8, dl));
5509 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5510 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5511 getF32Constant(DAG, 0x3fbc278b, dl));
5512 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5513 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5514 getF32Constant(DAG, 0x40348e95, dl));
5515 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5516 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5517 getF32Constant(DAG, 0x3fdef31a, dl));
5518 } else { // LimitFloatPrecision <= 18
5519 // For floating-point precision of 18:
5520 //
5521 // LogOfMantissa =
5522 // -2.1072184f +
5523 // (4.2372794f +
5524 // (-3.7029485f +
5525 // (2.2781945f +
5526 // (-0.87823314f +
5527 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
5528 //
5529 // error 0.0000023660568, which is better than 18 bits
5530 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5531 getF32Constant(DAG, 0xbc91e5ac, dl));
5532 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5533 getF32Constant(DAG, 0x3e4350aa, dl));
5534 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5535 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5536 getF32Constant(DAG, 0x3f60d3e3, dl));
5537 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5538 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5539 getF32Constant(DAG, 0x4011cdf0, dl));
5540 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5541 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5542 getF32Constant(DAG, 0x406cfd1c, dl));
5543 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5544 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5545 getF32Constant(DAG, 0x408797cb, dl));
5546 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5547 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5548 getF32Constant(DAG, 0x4006dcab, dl));
5549 }
5550
5551 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
5552 }
5553
5554 // No special expansion.
5555 return DAG.getNode(Opcode: ISD::FLOG, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5556}
5557
5558/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
5559/// limited-precision mode.
5560static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5561 const TargetLowering &TLI, SDNodeFlags Flags) {
5562 // TODO: What fast-math-flags should be set on the floating-point nodes?
5563
5564 if (Op.getValueType() == MVT::f32 &&
5565 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5566 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5567
5568 // Get the exponent.
5569 SDValue LogOfExponent = GetExponent(DAG, Op: Op1, TLI, dl);
5570
5571 // Get the significand and build it into a floating-point number with
5572 // exponent of 1.
5573 SDValue X = GetSignificand(DAG, Op: Op1, dl);
5574
5575 // Different possible minimax approximations of significand in
5576 // floating-point for various degrees of accuracy over [1,2].
5577 SDValue Log2ofMantissa;
5578 if (LimitFloatPrecision <= 6) {
5579 // For floating-point precision of 6:
5580 //
5581 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
5582 //
5583 // error 0.0049451742, which is more than 7 bits
5584 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5585 getF32Constant(DAG, 0xbeb08fe0, dl));
5586 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5587 getF32Constant(DAG, 0x40019463, dl));
5588 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5589 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5590 getF32Constant(DAG, 0x3fd6633d, dl));
5591 } else if (LimitFloatPrecision <= 12) {
5592 // For floating-point precision of 12:
5593 //
5594 // Log2ofMantissa =
5595 // -2.51285454f +
5596 // (4.07009056f +
5597 // (-2.12067489f +
5598 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
5599 //
5600 // error 0.0000876136000, which is better than 13 bits
5601 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5602 getF32Constant(DAG, 0xbda7262e, dl));
5603 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5604 getF32Constant(DAG, 0x3f25280b, dl));
5605 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5606 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5607 getF32Constant(DAG, 0x4007b923, dl));
5608 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5609 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5610 getF32Constant(DAG, 0x40823e2f, dl));
5611 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5612 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5613 getF32Constant(DAG, 0x4020d29c, dl));
5614 } else { // LimitFloatPrecision <= 18
5615 // For floating-point precision of 18:
5616 //
5617 // Log2ofMantissa =
5618 // -3.0400495f +
5619 // (6.1129976f +
5620 // (-5.3420409f +
5621 // (3.2865683f +
5622 // (-1.2669343f +
5623 // (0.27515199f -
5624 // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
5625 //
5626 // error 0.0000018516, which is better than 18 bits
5627 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5628 getF32Constant(DAG, 0xbcd2769e, dl));
5629 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5630 getF32Constant(DAG, 0x3e8ce0b9, dl));
5631 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5632 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5633 getF32Constant(DAG, 0x3fa22ae7, dl));
5634 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5635 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5636 getF32Constant(DAG, 0x40525723, dl));
5637 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5638 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5639 getF32Constant(DAG, 0x40aaf200, dl));
5640 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5641 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5642 getF32Constant(DAG, 0x40c39dad, dl));
5643 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5644 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5645 getF32Constant(DAG, 0x4042902c, dl));
5646 }
5647
5648 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
5649 }
5650
5651 // No special expansion.
5652 return DAG.getNode(Opcode: ISD::FLOG2, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5653}
5654
5655/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
5656/// limited-precision mode.
5657static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5658 const TargetLowering &TLI, SDNodeFlags Flags) {
5659 // TODO: What fast-math-flags should be set on the floating-point nodes?
5660
5661 if (Op.getValueType() == MVT::f32 &&
5662 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5663 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5664
5665 // Scale the exponent by log10(2) [0.30102999f].
5666 SDValue Exp = GetExponent(DAG, Op: Op1, TLI, dl);
5667 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5668 getF32Constant(DAG, 0x3e9a209a, dl));
5669
5670 // Get the significand and build it into a floating-point number with
5671 // exponent of 1.
5672 SDValue X = GetSignificand(DAG, Op: Op1, dl);
5673
5674 SDValue Log10ofMantissa;
5675 if (LimitFloatPrecision <= 6) {
5676 // For floating-point precision of 6:
5677 //
5678 // Log10ofMantissa =
5679 // -0.50419619f +
5680 // (0.60948995f - 0.10380950f * x) * x;
5681 //
5682 // error 0.0014886165, which is 6 bits
5683 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5684 getF32Constant(DAG, 0xbdd49a13, dl));
5685 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5686 getF32Constant(DAG, 0x3f1c0789, dl));
5687 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5688 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5689 getF32Constant(DAG, 0x3f011300, dl));
5690 } else if (LimitFloatPrecision <= 12) {
5691 // For floating-point precision of 12:
5692 //
5693 // Log10ofMantissa =
5694 // -0.64831180f +
5695 // (0.91751397f +
5696 // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
5697 //
5698 // error 0.00019228036, which is better than 12 bits
5699 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5700 getF32Constant(DAG, 0x3d431f31, dl));
5701 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5702 getF32Constant(DAG, 0x3ea21fb2, dl));
5703 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5704 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5705 getF32Constant(DAG, 0x3f6ae232, dl));
5706 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5707 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5708 getF32Constant(DAG, 0x3f25f7c3, dl));
5709 } else { // LimitFloatPrecision <= 18
5710 // For floating-point precision of 18:
5711 //
5712 // Log10ofMantissa =
5713 // -0.84299375f +
5714 // (1.5327582f +
5715 // (-1.0688956f +
5716 // (0.49102474f +
5717 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
5718 //
5719 // error 0.0000037995730, which is better than 18 bits
5720 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5721 getF32Constant(DAG, 0x3c5d51ce, dl));
5722 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5723 getF32Constant(DAG, 0x3e00685a, dl));
5724 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5725 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5726 getF32Constant(DAG, 0x3efb6798, dl));
5727 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5728 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5729 getF32Constant(DAG, 0x3f88d192, dl));
5730 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5731 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5732 getF32Constant(DAG, 0x3fc4316c, dl));
5733 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5734 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
5735 getF32Constant(DAG, 0x3f57ce70, dl));
5736 }
5737
5738 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
5739 }
5740
5741 // No special expansion.
5742 return DAG.getNode(Opcode: ISD::FLOG10, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5743}
5744
5745/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
5746/// limited-precision mode.
5747static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5748 const TargetLowering &TLI, SDNodeFlags Flags) {
5749 if (Op.getValueType() == MVT::f32 &&
5750 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
5751 return getLimitedPrecisionExp2(t0: Op, dl, DAG);
5752
5753 // No special expansion.
5754 return DAG.getNode(Opcode: ISD::FEXP2, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5755}
5756
5757/// visitPow - Lower a pow intrinsic. Handles the special sequences for
5758/// limited-precision mode with x == 10.0f.
5759static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
5760 SelectionDAG &DAG, const TargetLowering &TLI,
5761 SDNodeFlags Flags) {
5762 bool IsExp10 = false;
5763 if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
5764 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5765 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(Val&: LHS)) {
5766 APFloat Ten(10.0f);
5767 IsExp10 = LHSC->isExactlyValue(V: Ten);
5768 }
5769 }
5770
5771 // TODO: What fast-math-flags should be set on the FMUL node?
5772 if (IsExp10) {
5773 // Put the exponent in the right bit position for later addition to the
5774 // final result:
5775 //
5776 // #define LOG2OF10 3.3219281f
5777 // t0 = Op * LOG2OF10;
5778 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
5779 getF32Constant(DAG, 0x40549a78, dl));
5780 return getLimitedPrecisionExp2(t0, dl, DAG);
5781 }
5782
5783 // No special expansion.
5784 return DAG.getNode(Opcode: ISD::FPOW, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS, Flags);
5785}
5786
5787/// ExpandPowI - Expand a llvm.powi intrinsic.
5788static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
5789 SelectionDAG &DAG) {
5790 // If RHS is a constant, we can expand this out to a multiplication tree if
5791 // it's beneficial on the target, otherwise we end up lowering to a call to
5792 // __powidf2 (for example).
5793 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
5794 unsigned Val = RHSC->getSExtValue();
5795
5796 // powi(x, 0) -> 1.0
5797 if (Val == 0)
5798 return DAG.getConstantFP(Val: 1.0, DL, VT: LHS.getValueType());
5799
5800 if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
5801 Exponent: Val, OptForSize: DAG.shouldOptForSize())) {
5802 // Get the exponent as a positive value.
5803 if ((int)Val < 0)
5804 Val = -Val;
5805 // We use the simple binary decomposition method to generate the multiply
5806 // sequence. There are more optimal ways to do this (for example,
5807 // powi(x,15) generates one more multiply than it should), but this has
5808 // the benefit of being both really simple and much better than a libcall.
5809 SDValue Res; // Logically starts equal to 1.0
5810 SDValue CurSquare = LHS;
5811 // TODO: Intrinsics should have fast-math-flags that propagate to these
5812 // nodes.
5813 while (Val) {
5814 if (Val & 1) {
5815 if (Res.getNode())
5816 Res =
5817 DAG.getNode(Opcode: ISD::FMUL, DL, VT: Res.getValueType(), N1: Res, N2: CurSquare);
5818 else
5819 Res = CurSquare; // 1.0*CurSquare.
5820 }
5821
5822 CurSquare = DAG.getNode(Opcode: ISD::FMUL, DL, VT: CurSquare.getValueType(),
5823 N1: CurSquare, N2: CurSquare);
5824 Val >>= 1;
5825 }
5826
5827 // If the original was negative, invert the result, producing 1/(x*x*x).
5828 if (RHSC->getSExtValue() < 0)
5829 Res = DAG.getNode(Opcode: ISD::FDIV, DL, VT: LHS.getValueType(),
5830 N1: DAG.getConstantFP(Val: 1.0, DL, VT: LHS.getValueType()), N2: Res);
5831 return Res;
5832 }
5833 }
5834
5835 // Otherwise, expand to a libcall.
5836 return DAG.getNode(Opcode: ISD::FPOWI, DL, VT: LHS.getValueType(), N1: LHS, N2: RHS);
5837}
5838
5839static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
5840 SDValue LHS, SDValue RHS, SDValue Scale,
5841 SelectionDAG &DAG, const TargetLowering &TLI) {
5842 EVT VT = LHS.getValueType();
5843 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
5844 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
5845 LLVMContext &Ctx = *DAG.getContext();
5846
5847 // If the type is legal but the operation isn't, this node might survive all
5848 // the way to operation legalization. If we end up there and we do not have
5849 // the ability to widen the type (if VT*2 is not legal), we cannot expand the
5850 // node.
5851
5852 // Coax the legalizer into expanding the node during type legalization instead
5853 // by bumping the size by one bit. This will force it to Promote, enabling the
5854 // early expansion and avoiding the need to expand later.
5855
5856 // We don't have to do this if Scale is 0; that can always be expanded, unless
5857 // it's a saturating signed operation. Those can experience true integer
5858 // division overflow, a case which we must avoid.
5859
5860 // FIXME: We wouldn't have to do this (or any of the early
5861 // expansion/promotion) if it was possible to expand a libcall of an
5862 // illegal type during operation legalization. But it's not, so things
5863 // get a bit hacky.
5864 unsigned ScaleInt = Scale->getAsZExtVal();
5865 if ((ScaleInt > 0 || (Saturating && Signed)) &&
5866 (TLI.isTypeLegal(VT) ||
5867 (VT.isVector() && TLI.isTypeLegal(VT: VT.getVectorElementType())))) {
5868 TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
5869 Op: Opcode, VT, Scale: ScaleInt);
5870 if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
5871 EVT PromVT;
5872 if (VT.isScalarInteger())
5873 PromVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: VT.getSizeInBits() + 1);
5874 else if (VT.isVector()) {
5875 PromVT = VT.getVectorElementType();
5876 PromVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: PromVT.getSizeInBits() + 1);
5877 PromVT = EVT::getVectorVT(Context&: Ctx, VT: PromVT, EC: VT.getVectorElementCount());
5878 } else
5879 llvm_unreachable("Wrong VT for DIVFIX?");
5880 LHS = DAG.getExtOrTrunc(IsSigned: Signed, Op: LHS, DL, VT: PromVT);
5881 RHS = DAG.getExtOrTrunc(IsSigned: Signed, Op: RHS, DL, VT: PromVT);
5882 EVT ShiftTy = TLI.getShiftAmountTy(LHSTy: PromVT, DL: DAG.getDataLayout());
5883 // For saturating operations, we need to shift up the LHS to get the
5884 // proper saturation width, and then shift down again afterwards.
5885 if (Saturating)
5886 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: PromVT, N1: LHS,
5887 N2: DAG.getConstant(Val: 1, DL, VT: ShiftTy));
5888 SDValue Res = DAG.getNode(Opcode, DL, VT: PromVT, N1: LHS, N2: RHS, N3: Scale);
5889 if (Saturating)
5890 Res = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL, VT: PromVT, N1: Res,
5891 N2: DAG.getConstant(Val: 1, DL, VT: ShiftTy));
5892 return DAG.getZExtOrTrunc(Op: Res, DL, VT);
5893 }
5894 }
5895
5896 return DAG.getNode(Opcode, DL, VT, N1: LHS, N2: RHS, N3: Scale);
5897}
5898
5899// getUnderlyingArgRegs - Find underlying registers used for a truncated,
5900// bitcasted, or split argument. Returns a list of <Register, size in bits>
5901static void
5902getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
5903 const SDValue &N) {
5904 switch (N.getOpcode()) {
5905 case ISD::CopyFromReg: {
5906 SDValue Op = N.getOperand(i: 1);
5907 Regs.emplace_back(Args: cast<RegisterSDNode>(Val&: Op)->getReg(),
5908 Args: Op.getValueType().getSizeInBits());
5909 return;
5910 }
5911 case ISD::BITCAST:
5912 case ISD::AssertZext:
5913 case ISD::AssertSext:
5914 case ISD::TRUNCATE:
5915 getUnderlyingArgRegs(Regs, N: N.getOperand(i: 0));
5916 return;
5917 case ISD::BUILD_PAIR:
5918 case ISD::BUILD_VECTOR:
5919 case ISD::CONCAT_VECTORS:
5920 for (SDValue Op : N->op_values())
5921 getUnderlyingArgRegs(Regs, N: Op);
5922 return;
5923 default:
5924 return;
5925 }
5926}
5927
5928/// If the DbgValueInst is a dbg_value of a function argument, create the
5929/// corresponding DBG_VALUE machine instruction for it now. At the end of
5930/// instruction selection, they will be inserted to the entry BB.
5931/// We don't currently support this for variadic dbg_values, as they shouldn't
5932/// appear for function arguments or in the prologue.
5933bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
5934 const Value *V, DILocalVariable *Variable, DIExpression *Expr,
5935 DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
5936 const Argument *Arg = dyn_cast<Argument>(Val: V);
5937 if (!Arg)
5938 return false;
5939
5940 MachineFunction &MF = DAG.getMachineFunction();
5941 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
5942
5943 // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
5944 // we've been asked to pursue.
5945 auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
5946 bool Indirect) {
5947 if (Reg.isVirtual() && MF.useDebugInstrRef()) {
5948 // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
5949 // pointing at the VReg, which will be patched up later.
5950 auto &Inst = TII->get(Opcode: TargetOpcode::DBG_INSTR_REF);
5951 SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
5952 /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
5953 /* isKill */ false, /* isDead */ false,
5954 /* isUndef */ false, /* isEarlyClobber */ false,
5955 /* SubReg */ 0, /* isDebug */ true)});
5956
5957 auto *NewDIExpr = FragExpr;
5958 // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
5959 // the DIExpression.
5960 if (Indirect)
5961 NewDIExpr = DIExpression::prepend(Expr: FragExpr, Flags: DIExpression::DerefBefore);
5962 SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
5963 NewDIExpr = DIExpression::prependOpcodes(Expr: NewDIExpr, Ops);
5964 return BuildMI(MF, DL, MCID: Inst, IsIndirect: false, MOs, Variable, Expr: NewDIExpr);
5965 } else {
5966 // Create a completely standard DBG_VALUE.
5967 auto &Inst = TII->get(Opcode: TargetOpcode::DBG_VALUE);
5968 return BuildMI(MF, DL, MCID: Inst, IsIndirect: Indirect, Reg, Variable, Expr: FragExpr);
5969 }
5970 };
5971
5972 if (Kind == FuncArgumentDbgValueKind::Value) {
5973 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5974 // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
5975 // the entry block.
5976 bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
5977 if (!IsInEntryBlock)
5978 return false;
5979
5980 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5981 // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
5982 // variable that also is a param.
5983 //
5984 // Although, if we are at the top of the entry block already, we can still
5985 // emit using ArgDbgValue. This might catch some situations when the
5986 // dbg.value refers to an argument that isn't used in the entry block, so
5987 // any CopyToReg node would be optimized out and the only way to express
5988 // this DBG_VALUE is by using the physical reg (or FI) as done in this
5989 // method. ArgDbgValues are hoisted to the beginning of the entry block. So
5990 // we should only emit as ArgDbgValue if the Variable is an argument to the
5991 // current function, and the dbg.value intrinsic is found in the entry
5992 // block.
5993 bool VariableIsFunctionInputArg = Variable->isParameter() &&
5994 !DL->getInlinedAt();
5995 bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
5996 if (!IsInPrologue && !VariableIsFunctionInputArg)
5997 return false;
5998
5999 // Here we assume that a function argument on IR level only can be used to
6000 // describe one input parameter on source level. If we for example have
6001 // source code like this
6002 //
6003 // struct A { long x, y; };
6004 // void foo(struct A a, long b) {
6005 // ...
6006 // b = a.x;
6007 // ...
6008 // }
6009 //
6010 // and IR like this
6011 //
6012 // define void @foo(i32 %a1, i32 %a2, i32 %b) {
6013 // entry:
6014 // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
6015 // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
6016 // call void @llvm.dbg.value(metadata i32 %b, "b",
6017 // ...
6018 // call void @llvm.dbg.value(metadata i32 %a1, "b"
6019 // ...
6020 //
6021 // then the last dbg.value is describing a parameter "b" using a value that
6022 // is an argument. But since we already has used %a1 to describe a parameter
6023 // we should not handle that last dbg.value here (that would result in an
6024 // incorrect hoisting of the DBG_VALUE to the function entry).
6025 // Notice that we allow one dbg.value per IR level argument, to accommodate
6026 // for the situation with fragments above.
6027 // If there is no node for the value being handled, we return true to skip
6028 // the normal generation of debug info, as it would kill existing debug
6029 // info for the parameter in case of duplicates.
6030 if (VariableIsFunctionInputArg) {
6031 unsigned ArgNo = Arg->getArgNo();
6032 if (ArgNo >= FuncInfo.DescribedArgs.size())
6033 FuncInfo.DescribedArgs.resize(N: ArgNo + 1, t: false);
6034 else if (!IsInPrologue && FuncInfo.DescribedArgs.test(Idx: ArgNo))
6035 return !NodeMap[V].getNode();
6036 FuncInfo.DescribedArgs.set(ArgNo);
6037 }
6038 }
6039
6040 bool IsIndirect = false;
6041 std::optional<MachineOperand> Op;
6042 // Some arguments' frame index is recorded during argument lowering.
6043 int FI = FuncInfo.getArgumentFrameIndex(A: Arg);
6044 if (FI != std::numeric_limits<int>::max())
6045 Op = MachineOperand::CreateFI(Idx: FI);
6046
6047 SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
6048 if (!Op && N.getNode()) {
6049 getUnderlyingArgRegs(Regs&: ArgRegsAndSizes, N);
6050 Register Reg;
6051 if (ArgRegsAndSizes.size() == 1)
6052 Reg = ArgRegsAndSizes.front().first;
6053
6054 if (Reg && Reg.isVirtual()) {
6055 MachineRegisterInfo &RegInfo = MF.getRegInfo();
6056 Register PR = RegInfo.getLiveInPhysReg(VReg: Reg);
6057 if (PR)
6058 Reg = PR;
6059 }
6060 if (Reg) {
6061 Op = MachineOperand::CreateReg(Reg, isDef: false);
6062 IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
6063 }
6064 }
6065
6066 if (!Op && N.getNode()) {
6067 // Check if frame index is available.
6068 SDValue LCandidate = peekThroughBitcasts(V: N);
6069 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Val: LCandidate.getNode()))
6070 if (FrameIndexSDNode *FINode =
6071 dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode()))
6072 Op = MachineOperand::CreateFI(Idx: FINode->getIndex());
6073 }
6074
6075 if (!Op) {
6076 // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
6077 auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
6078 SplitRegs) {
6079 unsigned Offset = 0;
6080 for (const auto &RegAndSize : SplitRegs) {
6081 // If the expression is already a fragment, the current register
6082 // offset+size might extend beyond the fragment. In this case, only
6083 // the register bits that are inside the fragment are relevant.
6084 int RegFragmentSizeInBits = RegAndSize.second;
6085 if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
6086 uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
6087 // The register is entirely outside the expression fragment,
6088 // so is irrelevant for debug info.
6089 if (Offset >= ExprFragmentSizeInBits)
6090 break;
6091 // The register is partially outside the expression fragment, only
6092 // the low bits within the fragment are relevant for debug info.
6093 if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
6094 RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
6095 }
6096 }
6097
6098 auto FragmentExpr = DIExpression::createFragmentExpression(
6099 Expr, OffsetInBits: Offset, SizeInBits: RegFragmentSizeInBits);
6100 Offset += RegAndSize.second;
6101 // If a valid fragment expression cannot be created, the variable's
6102 // correct value cannot be determined and so it is set as Undef.
6103 if (!FragmentExpr) {
6104 SDDbgValue *SDV = DAG.getConstantDbgValue(
6105 Var: Variable, Expr, C: UndefValue::get(T: V->getType()), DL, O: SDNodeOrder);
6106 DAG.AddDbgValue(DB: SDV, isParameter: false);
6107 continue;
6108 }
6109 MachineInstr *NewMI =
6110 MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
6111 Kind != FuncArgumentDbgValueKind::Value);
6112 FuncInfo.ArgDbgValues.push_back(Elt: NewMI);
6113 }
6114 };
6115
6116 // Check if ValueMap has reg number.
6117 DenseMap<const Value *, Register>::const_iterator
6118 VMI = FuncInfo.ValueMap.find(Val: V);
6119 if (VMI != FuncInfo.ValueMap.end()) {
6120 const auto &TLI = DAG.getTargetLoweringInfo();
6121 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
6122 V->getType(), std::nullopt);
6123 if (RFV.occupiesMultipleRegs()) {
6124 splitMultiRegDbgValue(RFV.getRegsAndSizes());
6125 return true;
6126 }
6127
6128 Op = MachineOperand::CreateReg(Reg: VMI->second, isDef: false);
6129 IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
6130 } else if (ArgRegsAndSizes.size() > 1) {
6131 // This was split due to the calling convention, and no virtual register
6132 // mapping exists for the value.
6133 splitMultiRegDbgValue(ArgRegsAndSizes);
6134 return true;
6135 }
6136 }
6137
6138 if (!Op)
6139 return false;
6140
6141 assert(Variable->isValidLocationForIntrinsic(DL) &&
6142 "Expected inlined-at fields to agree");
6143 MachineInstr *NewMI = nullptr;
6144
6145 if (Op->isReg())
6146 NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
6147 else
6148 NewMI = BuildMI(MF, DL, MCID: TII->get(Opcode: TargetOpcode::DBG_VALUE), IsIndirect: true, MOs: *Op,
6149 Variable, Expr);
6150
6151 // Otherwise, use ArgDbgValues.
6152 FuncInfo.ArgDbgValues.push_back(Elt: NewMI);
6153 return true;
6154}
6155
6156/// Return the appropriate SDDbgValue based on N.
6157SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
6158 DILocalVariable *Variable,
6159 DIExpression *Expr,
6160 const DebugLoc &dl,
6161 unsigned DbgSDNodeOrder) {
6162 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: N.getNode())) {
6163 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
6164 // stack slot locations.
6165 //
6166 // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
6167 // debug values here after optimization:
6168 //
6169 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
6170 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
6171 //
6172 // Both describe the direct values of their associated variables.
6173 return DAG.getFrameIndexDbgValue(Var: Variable, Expr, FI: FISDN->getIndex(),
6174 /*IsIndirect*/ false, DL: dl, O: DbgSDNodeOrder);
6175 }
6176 return DAG.getDbgValue(Var: Variable, Expr, N: N.getNode(), R: N.getResNo(),
6177 /*IsIndirect*/ false, DL: dl, O: DbgSDNodeOrder);
6178}
6179
6180static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
6181 switch (Intrinsic) {
6182 case Intrinsic::smul_fix:
6183 return ISD::SMULFIX;
6184 case Intrinsic::umul_fix:
6185 return ISD::UMULFIX;
6186 case Intrinsic::smul_fix_sat:
6187 return ISD::SMULFIXSAT;
6188 case Intrinsic::umul_fix_sat:
6189 return ISD::UMULFIXSAT;
6190 case Intrinsic::sdiv_fix:
6191 return ISD::SDIVFIX;
6192 case Intrinsic::udiv_fix:
6193 return ISD::UDIVFIX;
6194 case Intrinsic::sdiv_fix_sat:
6195 return ISD::SDIVFIXSAT;
6196 case Intrinsic::udiv_fix_sat:
6197 return ISD::UDIVFIXSAT;
6198 default:
6199 llvm_unreachable("Unhandled fixed point intrinsic");
6200 }
6201}
6202
6203void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
6204 const char *FunctionName) {
6205 assert(FunctionName && "FunctionName must not be nullptr");
6206 SDValue Callee = DAG.getExternalSymbol(
6207 Sym: FunctionName,
6208 VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()));
6209 LowerCallTo(CB: I, Callee, IsTailCall: I.isTailCall(), IsMustTailCall: I.isMustTailCall());
6210}
6211
6212/// Given a @llvm.call.preallocated.setup, return the corresponding
6213/// preallocated call.
6214static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
6215 assert(cast<CallBase>(PreallocatedSetup)
6216 ->getCalledFunction()
6217 ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
6218 "expected call_preallocated_setup Value");
6219 for (const auto *U : PreallocatedSetup->users()) {
6220 auto *UseCall = cast<CallBase>(Val: U);
6221 const Function *Fn = UseCall->getCalledFunction();
6222 if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
6223 return UseCall;
6224 }
6225 }
6226 llvm_unreachable("expected corresponding call to preallocated setup/arg");
6227}
6228
6229/// If DI is a debug value with an EntryValue expression, lower it using the
6230/// corresponding physical register of the associated Argument value
6231/// (guaranteed to exist by the verifier).
6232bool SelectionDAGBuilder::visitEntryValueDbgValue(
6233 ArrayRef<const Value *> Values, DILocalVariable *Variable,
6234 DIExpression *Expr, DebugLoc DbgLoc) {
6235 if (!Expr->isEntryValue() || !hasSingleElement(C&: Values))
6236 return false;
6237
6238 // These properties are guaranteed by the verifier.
6239 const Argument *Arg = cast<Argument>(Val: Values[0]);
6240 assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
6241
6242 auto ArgIt = FuncInfo.ValueMap.find(Val: Arg);
6243 if (ArgIt == FuncInfo.ValueMap.end()) {
6244 LLVM_DEBUG(
6245 dbgs() << "Dropping dbg.value: expression is entry_value but "
6246 "couldn't find an associated register for the Argument\n");
6247 return true;
6248 }
6249 Register ArgVReg = ArgIt->getSecond();
6250
6251 for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
6252 if (ArgVReg == VirtReg || ArgVReg == PhysReg) {
6253 SDDbgValue *SDV = DAG.getVRegDbgValue(
6254 Var: Variable, Expr, VReg: PhysReg, IsIndirect: false /*IsIndidrect*/, DL: DbgLoc, O: SDNodeOrder);
6255 DAG.AddDbgValue(DB: SDV, isParameter: false /*treat as dbg.declare byval parameter*/);
6256 return true;
6257 }
6258 LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
6259 "couldn't find a physical register\n");
6260 return true;
6261}
6262
6263/// Lower the call to the specified intrinsic function.
6264void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
6265 unsigned Intrinsic) {
6266 SDLoc sdl = getCurSDLoc();
6267 switch (Intrinsic) {
6268 case Intrinsic::experimental_convergence_anchor:
6269 setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped));
6270 break;
6271 case Intrinsic::experimental_convergence_entry:
6272 setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped));
6273 break;
6274 case Intrinsic::experimental_convergence_loop: {
6275 auto Bundle = I.getOperandBundle(ID: LLVMContext::OB_convergencectrl);
6276 auto *Token = Bundle->Inputs[0].get();
6277 setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_LOOP, sdl, MVT::Untyped,
6278 getValue(Token)));
6279 break;
6280 }
6281 }
6282}
6283
6284/// Lower the call to the specified intrinsic function.
6285void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
6286 unsigned Intrinsic) {
6287 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6288 SDLoc sdl = getCurSDLoc();
6289 DebugLoc dl = getCurDebugLoc();
6290 SDValue Res;
6291
6292 SDNodeFlags Flags;
6293 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
6294 Flags.copyFMF(FPMO: *FPOp);
6295
6296 switch (Intrinsic) {
6297 default:
6298 // By default, turn this into a target intrinsic node.
6299 visitTargetIntrinsic(I, Intrinsic);
6300 return;
6301 case Intrinsic::vscale: {
6302 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6303 setValue(V: &I, NewN: DAG.getVScale(DL: sdl, VT, MulImm: APInt(VT.getSizeInBits(), 1)));
6304 return;
6305 }
6306 case Intrinsic::vastart: visitVAStart(I); return;
6307 case Intrinsic::vaend: visitVAEnd(I); return;
6308 case Intrinsic::vacopy: visitVACopy(I); return;
6309 case Intrinsic::returnaddress:
6310 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::RETURNADDR, DL: sdl,
6311 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()),
6312 Operand: getValue(V: I.getArgOperand(i: 0))));
6313 return;
6314 case Intrinsic::addressofreturnaddress:
6315 setValue(V: &I,
6316 NewN: DAG.getNode(Opcode: ISD::ADDROFRETURNADDR, DL: sdl,
6317 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())));
6318 return;
6319 case Intrinsic::sponentry:
6320 setValue(V: &I,
6321 NewN: DAG.getNode(Opcode: ISD::SPONENTRY, DL: sdl,
6322 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())));
6323 return;
6324 case Intrinsic::frameaddress:
6325 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FRAMEADDR, DL: sdl,
6326 VT: TLI.getFrameIndexTy(DL: DAG.getDataLayout()),
6327 Operand: getValue(V: I.getArgOperand(i: 0))));
6328 return;
6329 case Intrinsic::read_volatile_register:
6330 case Intrinsic::read_register: {
6331 Value *Reg = I.getArgOperand(i: 0);
6332 SDValue Chain = getRoot();
6333 SDValue RegName =
6334 DAG.getMDNode(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Reg)->getMetadata()));
6335 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6336 Res = DAG.getNode(ISD::READ_REGISTER, sdl,
6337 DAG.getVTList(VT, MVT::Other), Chain, RegName);
6338 setValue(V: &I, NewN: Res);
6339 DAG.setRoot(Res.getValue(R: 1));
6340 return;
6341 }
6342 case Intrinsic::write_register: {
6343 Value *Reg = I.getArgOperand(i: 0);
6344 Value *RegValue = I.getArgOperand(i: 1);
6345 SDValue Chain = getRoot();
6346 SDValue RegName =
6347 DAG.getMDNode(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Reg)->getMetadata()));
6348 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
6349 RegName, getValue(RegValue)));
6350 return;
6351 }
6352 case Intrinsic::memcpy: {
6353 const auto &MCI = cast<MemCpyInst>(Val: I);
6354 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6355 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6356 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6357 // @llvm.memcpy defines 0 and 1 to both mean no alignment.
6358 Align DstAlign = MCI.getDestAlign().valueOrOne();
6359 Align SrcAlign = MCI.getSourceAlign().valueOrOne();
6360 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
6361 bool isVol = MCI.isVolatile();
6362 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6363 // FIXME: Support passing different dest/src alignments to the memcpy DAG
6364 // node.
6365 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6366 SDValue MC = DAG.getMemcpy(
6367 Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol,
6368 /* AlwaysInline */ false, isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6369 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), AAInfo: I.getAAMetadata(), AA);
6370 updateDAGForMaybeTailCall(MaybeTC: MC);
6371 return;
6372 }
6373 case Intrinsic::memcpy_inline: {
6374 const auto &MCI = cast<MemCpyInlineInst>(Val: I);
6375 SDValue Dst = getValue(V: I.getArgOperand(i: 0));
6376 SDValue Src = getValue(V: I.getArgOperand(i: 1));
6377 SDValue Size = getValue(V: I.getArgOperand(i: 2));
6378 assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
6379 // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
6380 Align DstAlign = MCI.getDestAlign().valueOrOne();
6381 Align SrcAlign = MCI.getSourceAlign().valueOrOne();
6382 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
6383 bool isVol = MCI.isVolatile();
6384 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6385 // FIXME: Support passing different dest/src alignments to the memcpy DAG
6386 // node.
6387 SDValue MC = DAG.getMemcpy(
6388 Chain: getRoot(), dl: sdl, Dst, Src, Size, Alignment, isVol,
6389 /* AlwaysInline */ true, isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6390 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), AAInfo: I.getAAMetadata(), AA);
6391 updateDAGForMaybeTailCall(MaybeTC: MC);
6392 return;
6393 }
6394 case Intrinsic::memset: {
6395 const auto &MSI = cast<MemSetInst>(Val: I);
6396 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6397 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6398 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6399 // @llvm.memset defines 0 and 1 to both mean no alignment.
6400 Align Alignment = MSI.getDestAlign().valueOrOne();
6401 bool isVol = MSI.isVolatile();
6402 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6403 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6404 SDValue MS = DAG.getMemset(
6405 Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol, /* AlwaysInline */ false,
6406 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), AAInfo: I.getAAMetadata());
6407 updateDAGForMaybeTailCall(MaybeTC: MS);
6408 return;
6409 }
6410 case Intrinsic::memset_inline: {
6411 const auto &MSII = cast<MemSetInlineInst>(Val: I);
6412 SDValue Dst = getValue(V: I.getArgOperand(i: 0));
6413 SDValue Value = getValue(V: I.getArgOperand(i: 1));
6414 SDValue Size = getValue(V: I.getArgOperand(i: 2));
6415 assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
6416 // @llvm.memset defines 0 and 1 to both mean no alignment.
6417 Align DstAlign = MSII.getDestAlign().valueOrOne();
6418 bool isVol = MSII.isVolatile();
6419 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6420 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6421 SDValue MC = DAG.getMemset(Chain: Root, dl: sdl, Dst, Src: Value, Size, Alignment: DstAlign, isVol,
6422 /* AlwaysInline */ true, isTailCall: isTC,
6423 DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6424 AAInfo: I.getAAMetadata());
6425 updateDAGForMaybeTailCall(MaybeTC: MC);
6426 return;
6427 }
6428 case Intrinsic::memmove: {
6429 const auto &MMI = cast<MemMoveInst>(Val: I);
6430 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6431 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6432 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6433 // @llvm.memmove defines 0 and 1 to both mean no alignment.
6434 Align DstAlign = MMI.getDestAlign().valueOrOne();
6435 Align SrcAlign = MMI.getSourceAlign().valueOrOne();
6436 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
6437 bool isVol = MMI.isVolatile();
6438 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6439 // FIXME: Support passing different dest/src alignments to the memmove DAG
6440 // node.
6441 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6442 SDValue MM = DAG.getMemmove(Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol,
6443 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6444 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)),
6445 AAInfo: I.getAAMetadata(), AA);
6446 updateDAGForMaybeTailCall(MaybeTC: MM);
6447 return;
6448 }
6449 case Intrinsic::memcpy_element_unordered_atomic: {
6450 const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(Val: I);
6451 SDValue Dst = getValue(V: MI.getRawDest());
6452 SDValue Src = getValue(V: MI.getRawSource());
6453 SDValue Length = getValue(V: MI.getLength());
6454
6455 Type *LengthTy = MI.getLength()->getType();
6456 unsigned ElemSz = MI.getElementSizeInBytes();
6457 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6458 SDValue MC =
6459 DAG.getAtomicMemcpy(Chain: getRoot(), dl: sdl, Dst, Src, Size: Length, SizeTy: LengthTy, ElemSz,
6460 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()),
6461 SrcPtrInfo: MachinePointerInfo(MI.getRawSource()));
6462 updateDAGForMaybeTailCall(MaybeTC: MC);
6463 return;
6464 }
6465 case Intrinsic::memmove_element_unordered_atomic: {
6466 auto &MI = cast<AtomicMemMoveInst>(Val: I);
6467 SDValue Dst = getValue(V: MI.getRawDest());
6468 SDValue Src = getValue(V: MI.getRawSource());
6469 SDValue Length = getValue(V: MI.getLength());
6470
6471 Type *LengthTy = MI.getLength()->getType();
6472 unsigned ElemSz = MI.getElementSizeInBytes();
6473 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6474 SDValue MC =
6475 DAG.getAtomicMemmove(Chain: getRoot(), dl: sdl, Dst, Src, Size: Length, SizeTy: LengthTy, ElemSz,
6476 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()),
6477 SrcPtrInfo: MachinePointerInfo(MI.getRawSource()));
6478 updateDAGForMaybeTailCall(MaybeTC: MC);
6479 return;
6480 }
6481 case Intrinsic::memset_element_unordered_atomic: {
6482 auto &MI = cast<AtomicMemSetInst>(Val: I);
6483 SDValue Dst = getValue(V: MI.getRawDest());
6484 SDValue Val = getValue(V: MI.getValue());
6485 SDValue Length = getValue(V: MI.getLength());
6486
6487 Type *LengthTy = MI.getLength()->getType();
6488 unsigned ElemSz = MI.getElementSizeInBytes();
6489 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6490 SDValue MC =
6491 DAG.getAtomicMemset(Chain: getRoot(), dl: sdl, Dst, Value: Val, Size: Length, SizeTy: LengthTy, ElemSz,
6492 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()));
6493 updateDAGForMaybeTailCall(MaybeTC: MC);
6494 return;
6495 }
6496 case Intrinsic::call_preallocated_setup: {
6497 const CallBase *PreallocatedCall = FindPreallocatedCall(PreallocatedSetup: &I);
6498 SDValue SrcValue = DAG.getSrcValue(v: PreallocatedCall);
6499 SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
6500 getRoot(), SrcValue);
6501 setValue(V: &I, NewN: Res);
6502 DAG.setRoot(Res);
6503 return;
6504 }
6505 case Intrinsic::call_preallocated_arg: {
6506 const CallBase *PreallocatedCall = FindPreallocatedCall(PreallocatedSetup: I.getOperand(i_nocapture: 0));
6507 SDValue SrcValue = DAG.getSrcValue(v: PreallocatedCall);
6508 SDValue Ops[3];
6509 Ops[0] = getRoot();
6510 Ops[1] = SrcValue;
6511 Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
6512 MVT::i32); // arg index
6513 SDValue Res = DAG.getNode(
6514 ISD::PREALLOCATED_ARG, sdl,
6515 DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
6516 setValue(V: &I, NewN: Res);
6517 DAG.setRoot(Res.getValue(R: 1));
6518 return;
6519 }
6520 case Intrinsic::dbg_declare: {
6521 const auto &DI = cast<DbgDeclareInst>(Val: I);
6522 // Debug intrinsics are handled separately in assignment tracking mode.
6523 // Some intrinsics are handled right after Argument lowering.
6524 if (AssignmentTrackingEnabled ||
6525 FuncInfo.PreprocessedDbgDeclares.count(Ptr: &DI))
6526 return;
6527 LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DI << "\n");
6528 DILocalVariable *Variable = DI.getVariable();
6529 DIExpression *Expression = DI.getExpression();
6530 dropDanglingDebugInfo(Variable, Expr: Expression);
6531 // Assume dbg.declare can not currently use DIArgList, i.e.
6532 // it is non-variadic.
6533 assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
6534 handleDebugDeclare(Address: DI.getVariableLocationOp(OpIdx: 0), Variable, Expression,
6535 DL: DI.getDebugLoc());
6536 return;
6537 }
6538 case Intrinsic::dbg_label: {
6539 const DbgLabelInst &DI = cast<DbgLabelInst>(Val: I);
6540 DILabel *Label = DI.getLabel();
6541 assert(Label && "Missing label");
6542
6543 SDDbgLabel *SDV;
6544 SDV = DAG.getDbgLabel(Label, DL: dl, O: SDNodeOrder);
6545 DAG.AddDbgLabel(DB: SDV);
6546 return;
6547 }
6548 case Intrinsic::dbg_assign: {
6549 // Debug intrinsics are handled seperately in assignment tracking mode.
6550 if (AssignmentTrackingEnabled)
6551 return;
6552 // If assignment tracking hasn't been enabled then fall through and treat
6553 // the dbg.assign as a dbg.value.
6554 [[fallthrough]];
6555 }
6556 case Intrinsic::dbg_value: {
6557 // Debug intrinsics are handled seperately in assignment tracking mode.
6558 if (AssignmentTrackingEnabled)
6559 return;
6560 const DbgValueInst &DI = cast<DbgValueInst>(Val: I);
6561 assert(DI.getVariable() && "Missing variable");
6562
6563 DILocalVariable *Variable = DI.getVariable();
6564 DIExpression *Expression = DI.getExpression();
6565 dropDanglingDebugInfo(Variable, Expr: Expression);
6566
6567 if (DI.isKillLocation()) {
6568 handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DI.getDebugLoc(), Order: SDNodeOrder);
6569 return;
6570 }
6571
6572 SmallVector<Value *, 4> Values(DI.getValues());
6573 if (Values.empty())
6574 return;
6575
6576 bool IsVariadic = DI.hasArgList();
6577 if (!handleDebugValue(Values, Var: Variable, Expr: Expression, DbgLoc: DI.getDebugLoc(),
6578 Order: SDNodeOrder, IsVariadic))
6579 addDanglingDebugInfo(Values, Var: Variable, Expr: Expression, IsVariadic,
6580 DL: DI.getDebugLoc(), Order: SDNodeOrder);
6581 return;
6582 }
6583
6584 case Intrinsic::eh_typeid_for: {
6585 // Find the type id for the given typeinfo.
6586 GlobalValue *GV = ExtractTypeInfo(V: I.getArgOperand(i: 0));
6587 unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(TI: GV);
6588 Res = DAG.getConstant(TypeID, sdl, MVT::i32);
6589 setValue(V: &I, NewN: Res);
6590 return;
6591 }
6592
6593 case Intrinsic::eh_return_i32:
6594 case Intrinsic::eh_return_i64:
6595 DAG.getMachineFunction().setCallsEHReturn(true);
6596 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
6597 MVT::Other,
6598 getControlRoot(),
6599 getValue(I.getArgOperand(0)),
6600 getValue(I.getArgOperand(1))));
6601 return;
6602 case Intrinsic::eh_unwind_init:
6603 DAG.getMachineFunction().setCallsUnwindInit(true);
6604 return;
6605 case Intrinsic::eh_dwarf_cfa:
6606 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::EH_DWARF_CFA, DL: sdl,
6607 VT: TLI.getPointerTy(DL: DAG.getDataLayout()),
6608 Operand: getValue(V: I.getArgOperand(i: 0))));
6609 return;
6610 case Intrinsic::eh_sjlj_callsite: {
6611 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
6612 ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 0));
6613 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
6614
6615 MMI.setCurrentCallSite(CI->getZExtValue());
6616 return;
6617 }
6618 case Intrinsic::eh_sjlj_functioncontext: {
6619 // Get and store the index of the function context.
6620 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6621 AllocaInst *FnCtx =
6622 cast<AllocaInst>(Val: I.getArgOperand(i: 0)->stripPointerCasts());
6623 int FI = FuncInfo.StaticAllocaMap[FnCtx];
6624 MFI.setFunctionContextIndex(FI);
6625 return;
6626 }
6627 case Intrinsic::eh_sjlj_setjmp: {
6628 SDValue Ops[2];
6629 Ops[0] = getRoot();
6630 Ops[1] = getValue(V: I.getArgOperand(i: 0));
6631 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
6632 DAG.getVTList(MVT::i32, MVT::Other), Ops);
6633 setValue(V: &I, NewN: Op.getValue(R: 0));
6634 DAG.setRoot(Op.getValue(R: 1));
6635 return;
6636 }
6637 case Intrinsic::eh_sjlj_longjmp:
6638 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
6639 getRoot(), getValue(I.getArgOperand(0))));
6640 return;
6641 case Intrinsic::eh_sjlj_setup_dispatch:
6642 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
6643 getRoot()));
6644 return;
6645 case Intrinsic::masked_gather:
6646 visitMaskedGather(I);
6647 return;
6648 case Intrinsic::masked_load:
6649 visitMaskedLoad(I);
6650 return;
6651 case Intrinsic::masked_scatter:
6652 visitMaskedScatter(I);
6653 return;
6654 case Intrinsic::masked_store:
6655 visitMaskedStore(I);
6656 return;
6657 case Intrinsic::masked_expandload:
6658 visitMaskedLoad(I, IsExpanding: true /* IsExpanding */);
6659 return;
6660 case Intrinsic::masked_compressstore:
6661 visitMaskedStore(I, IsCompressing: true /* IsCompressing */);
6662 return;
6663 case Intrinsic::powi:
6664 setValue(V: &I, NewN: ExpandPowI(DL: sdl, LHS: getValue(V: I.getArgOperand(i: 0)),
6665 RHS: getValue(V: I.getArgOperand(i: 1)), DAG));
6666 return;
6667 case Intrinsic::log:
6668 setValue(V: &I, NewN: expandLog(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6669 return;
6670 case Intrinsic::log2:
6671 setValue(V: &I,
6672 NewN: expandLog2(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6673 return;
6674 case Intrinsic::log10:
6675 setValue(V: &I,
6676 NewN: expandLog10(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6677 return;
6678 case Intrinsic::exp:
6679 setValue(V: &I, NewN: expandExp(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6680 return;
6681 case Intrinsic::exp2:
6682 setValue(V: &I,
6683 NewN: expandExp2(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6684 return;
6685 case Intrinsic::pow:
6686 setValue(V: &I, NewN: expandPow(dl: sdl, LHS: getValue(V: I.getArgOperand(i: 0)),
6687 RHS: getValue(V: I.getArgOperand(i: 1)), DAG, TLI, Flags));
6688 return;
6689 case Intrinsic::sqrt:
6690 case Intrinsic::fabs:
6691 case Intrinsic::sin:
6692 case Intrinsic::cos:
6693 case Intrinsic::exp10:
6694 case Intrinsic::floor:
6695 case Intrinsic::ceil:
6696 case Intrinsic::trunc:
6697 case Intrinsic::rint:
6698 case Intrinsic::nearbyint:
6699 case Intrinsic::round:
6700 case Intrinsic::roundeven:
6701 case Intrinsic::canonicalize: {
6702 unsigned Opcode;
6703 switch (Intrinsic) {
6704 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6705 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
6706 case Intrinsic::fabs: Opcode = ISD::FABS; break;
6707 case Intrinsic::sin: Opcode = ISD::FSIN; break;
6708 case Intrinsic::cos: Opcode = ISD::FCOS; break;
6709 case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
6710 case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
6711 case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
6712 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
6713 case Intrinsic::rint: Opcode = ISD::FRINT; break;
6714 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
6715 case Intrinsic::round: Opcode = ISD::FROUND; break;
6716 case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
6717 case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
6718 }
6719
6720 setValue(V: &I, NewN: DAG.getNode(Opcode, DL: sdl,
6721 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6722 Operand: getValue(V: I.getArgOperand(i: 0)), Flags));
6723 return;
6724 }
6725 case Intrinsic::lround:
6726 case Intrinsic::llround:
6727 case Intrinsic::lrint:
6728 case Intrinsic::llrint: {
6729 unsigned Opcode;
6730 switch (Intrinsic) {
6731 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6732 case Intrinsic::lround: Opcode = ISD::LROUND; break;
6733 case Intrinsic::llround: Opcode = ISD::LLROUND; break;
6734 case Intrinsic::lrint: Opcode = ISD::LRINT; break;
6735 case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
6736 }
6737
6738 EVT RetVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6739 setValue(V: &I, NewN: DAG.getNode(Opcode, DL: sdl, VT: RetVT,
6740 Operand: getValue(V: I.getArgOperand(i: 0))));
6741 return;
6742 }
6743 case Intrinsic::minnum:
6744 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMINNUM, DL: sdl,
6745 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6746 N1: getValue(V: I.getArgOperand(i: 0)),
6747 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6748 return;
6749 case Intrinsic::maxnum:
6750 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMAXNUM, DL: sdl,
6751 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6752 N1: getValue(V: I.getArgOperand(i: 0)),
6753 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6754 return;
6755 case Intrinsic::minimum:
6756 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMINIMUM, DL: sdl,
6757 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6758 N1: getValue(V: I.getArgOperand(i: 0)),
6759 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6760 return;
6761 case Intrinsic::maximum:
6762 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMAXIMUM, DL: sdl,
6763 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6764 N1: getValue(V: I.getArgOperand(i: 0)),
6765 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6766 return;
6767 case Intrinsic::copysign:
6768 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: sdl,
6769 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6770 N1: getValue(V: I.getArgOperand(i: 0)),
6771 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6772 return;
6773 case Intrinsic::ldexp:
6774 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FLDEXP, DL: sdl,
6775 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6776 N1: getValue(V: I.getArgOperand(i: 0)),
6777 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6778 return;
6779 case Intrinsic::frexp: {
6780 SmallVector<EVT, 2> ValueVTs;
6781 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: I.getType(), ValueVTs);
6782 SDVTList VTs = DAG.getVTList(VTs: ValueVTs);
6783 setValue(V: &I,
6784 NewN: DAG.getNode(Opcode: ISD::FFREXP, DL: sdl, VTList: VTs, N: getValue(V: I.getArgOperand(i: 0))));
6785 return;
6786 }
6787 case Intrinsic::arithmetic_fence: {
6788 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ARITH_FENCE, DL: sdl,
6789 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6790 Operand: getValue(V: I.getArgOperand(i: 0)), Flags));
6791 return;
6792 }
6793 case Intrinsic::fma:
6794 setValue(V: &I, NewN: DAG.getNode(
6795 Opcode: ISD::FMA, DL: sdl, VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6796 N1: getValue(V: I.getArgOperand(i: 0)), N2: getValue(V: I.getArgOperand(i: 1)),
6797 N3: getValue(V: I.getArgOperand(i: 2)), Flags));
6798 return;
6799#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
6800 case Intrinsic::INTRINSIC:
6801#include "llvm/IR/ConstrainedOps.def"
6802 visitConstrainedFPIntrinsic(FPI: cast<ConstrainedFPIntrinsic>(Val: I));
6803 return;
6804#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
6805#include "llvm/IR/VPIntrinsics.def"
6806 visitVectorPredicationIntrinsic(VPIntrin: cast<VPIntrinsic>(Val: I));
6807 return;
6808 case Intrinsic::fptrunc_round: {
6809 // Get the last argument, the metadata and convert it to an integer in the
6810 // call
6811 Metadata *MD = cast<MetadataAsValue>(Val: I.getArgOperand(i: 1))->getMetadata();
6812 std::optional<RoundingMode> RoundMode =
6813 convertStrToRoundingMode(cast<MDString>(Val: MD)->getString());
6814
6815 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6816
6817 // Propagate fast-math-flags from IR to node(s).
6818 SDNodeFlags Flags;
6819 Flags.copyFMF(FPMO: *cast<FPMathOperator>(Val: &I));
6820 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
6821
6822 SDValue Result;
6823 Result = DAG.getNode(
6824 Opcode: ISD::FPTRUNC_ROUND, DL: sdl, VT, N1: getValue(V: I.getArgOperand(i: 0)),
6825 N2: DAG.getTargetConstant(Val: (int)*RoundMode, DL: sdl,
6826 VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
6827 setValue(V: &I, NewN: Result);
6828
6829 return;
6830 }
6831 case Intrinsic::fmuladd: {
6832 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6833 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
6834 TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) {
6835 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMA, DL: sdl,
6836 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6837 N1: getValue(V: I.getArgOperand(i: 0)),
6838 N2: getValue(V: I.getArgOperand(i: 1)),
6839 N3: getValue(V: I.getArgOperand(i: 2)), Flags));
6840 } else {
6841 // TODO: Intrinsic calls should have fast-math-flags.
6842 SDValue Mul = DAG.getNode(
6843 Opcode: ISD::FMUL, DL: sdl, VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6844 N1: getValue(V: I.getArgOperand(i: 0)), N2: getValue(V: I.getArgOperand(i: 1)), Flags);
6845 SDValue Add = DAG.getNode(Opcode: ISD::FADD, DL: sdl,
6846 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6847 N1: Mul, N2: getValue(V: I.getArgOperand(i: 2)), Flags);
6848 setValue(V: &I, NewN: Add);
6849 }
6850 return;
6851 }
6852 case Intrinsic::convert_to_fp16:
6853 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
6854 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
6855 getValue(I.getArgOperand(0)),
6856 DAG.getTargetConstant(0, sdl,
6857 MVT::i32))));
6858 return;
6859 case Intrinsic::convert_from_fp16:
6860 setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
6861 TLI.getValueType(DAG.getDataLayout(), I.getType()),
6862 DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
6863 getValue(I.getArgOperand(0)))));
6864 return;
6865 case Intrinsic::fptosi_sat: {
6866 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6867 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_SINT_SAT, DL: sdl, VT,
6868 N1: getValue(V: I.getArgOperand(i: 0)),
6869 N2: DAG.getValueType(VT.getScalarType())));
6870 return;
6871 }
6872 case Intrinsic::fptoui_sat: {
6873 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6874 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_UINT_SAT, DL: sdl, VT,
6875 N1: getValue(V: I.getArgOperand(i: 0)),
6876 N2: DAG.getValueType(VT.getScalarType())));
6877 return;
6878 }
6879 case Intrinsic::set_rounding:
6880 Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
6881 {getRoot(), getValue(I.getArgOperand(0))});
6882 setValue(V: &I, NewN: Res);
6883 DAG.setRoot(Res.getValue(R: 0));
6884 return;
6885 case Intrinsic::is_fpclass: {
6886 const DataLayout DLayout = DAG.getDataLayout();
6887 EVT DestVT = TLI.getValueType(DL: DLayout, Ty: I.getType());
6888 EVT ArgVT = TLI.getValueType(DL: DLayout, Ty: I.getArgOperand(i: 0)->getType());
6889 FPClassTest Test = static_cast<FPClassTest>(
6890 cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue());
6891 MachineFunction &MF = DAG.getMachineFunction();
6892 const Function &F = MF.getFunction();
6893 SDValue Op = getValue(V: I.getArgOperand(i: 0));
6894 SDNodeFlags Flags;
6895 Flags.setNoFPExcept(
6896 !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
6897 // If ISD::IS_FPCLASS should be expanded, do it right now, because the
6898 // expansion can use illegal types. Making expansion early allows
6899 // legalizing these types prior to selection.
6900 if (!TLI.isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: ArgVT)) {
6901 SDValue Result = TLI.expandIS_FPCLASS(ResultVT: DestVT, Op, Test, Flags, DL: sdl, DAG);
6902 setValue(V: &I, NewN: Result);
6903 return;
6904 }
6905
6906 SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
6907 SDValue V = DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: sdl, VT: DestVT, Ops: {Op, Check}, Flags);
6908 setValue(V: &I, NewN: V);
6909 return;
6910 }
6911 case Intrinsic::get_fpenv: {
6912 const DataLayout DLayout = DAG.getDataLayout();
6913 EVT EnvVT = TLI.getValueType(DL: DLayout, Ty: I.getType());
6914 Align TempAlign = DAG.getEVTAlign(MemoryVT: EnvVT);
6915 SDValue Chain = getRoot();
6916 // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node
6917 // and temporary storage in stack.
6918 if (TLI.isOperationLegalOrCustom(Op: ISD::GET_FPENV, VT: EnvVT)) {
6919 Res = DAG.getNode(
6920 ISD::GET_FPENV, sdl,
6921 DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
6922 MVT::Other),
6923 Chain);
6924 } else {
6925 SDValue Temp = DAG.CreateStackTemporary(VT: EnvVT, minAlign: TempAlign.value());
6926 int SPFI = cast<FrameIndexSDNode>(Val: Temp.getNode())->getIndex();
6927 auto MPI =
6928 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI);
6929 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
6930 PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: LocationSize::beforeOrAfterPointer(),
6931 BaseAlignment: TempAlign);
6932 Chain = DAG.getGetFPEnv(Chain, dl: sdl, Ptr: Temp, MemVT: EnvVT, MMO);
6933 Res = DAG.getLoad(VT: EnvVT, dl: sdl, Chain, Ptr: Temp, PtrInfo: MPI);
6934 }
6935 setValue(V: &I, NewN: Res);
6936 DAG.setRoot(Res.getValue(R: 1));
6937 return;
6938 }
6939 case Intrinsic::set_fpenv: {
6940 const DataLayout DLayout = DAG.getDataLayout();
6941 SDValue Env = getValue(V: I.getArgOperand(i: 0));
6942 EVT EnvVT = Env.getValueType();
6943 Align TempAlign = DAG.getEVTAlign(MemoryVT: EnvVT);
6944 SDValue Chain = getRoot();
6945 // If SET_FPENV is custom or legal, use it. Otherwise use loading
6946 // environment from memory.
6947 if (TLI.isOperationLegalOrCustom(Op: ISD::SET_FPENV, VT: EnvVT)) {
6948 Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env);
6949 } else {
6950 // Allocate space in stack, copy environment bits into it and use this
6951 // memory in SET_FPENV_MEM.
6952 SDValue Temp = DAG.CreateStackTemporary(VT: EnvVT, minAlign: TempAlign.value());
6953 int SPFI = cast<FrameIndexSDNode>(Val: Temp.getNode())->getIndex();
6954 auto MPI =
6955 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI);
6956 Chain = DAG.getStore(Chain, dl: sdl, Val: Env, Ptr: Temp, PtrInfo: MPI, Alignment: TempAlign,
6957 MMOFlags: MachineMemOperand::MOStore);
6958 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
6959 PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: LocationSize::beforeOrAfterPointer(),
6960 BaseAlignment: TempAlign);
6961 Chain = DAG.getSetFPEnv(Chain, dl: sdl, Ptr: Temp, MemVT: EnvVT, MMO);
6962 }
6963 DAG.setRoot(Chain);
6964 return;
6965 }
6966 case Intrinsic::reset_fpenv:
6967 DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
6968 return;
6969 case Intrinsic::get_fpmode:
6970 Res = DAG.getNode(
6971 ISD::GET_FPMODE, sdl,
6972 DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
6973 MVT::Other),
6974 DAG.getRoot());
6975 setValue(V: &I, NewN: Res);
6976 DAG.setRoot(Res.getValue(R: 1));
6977 return;
6978 case Intrinsic::set_fpmode:
6979 Res = DAG.getNode(ISD::SET_FPMODE, sdl, MVT::Other, {DAG.getRoot()},
6980 getValue(I.getArgOperand(0)));
6981 DAG.setRoot(Res);
6982 return;
6983 case Intrinsic::reset_fpmode: {
6984 Res = DAG.getNode(ISD::RESET_FPMODE, sdl, MVT::Other, getRoot());
6985 DAG.setRoot(Res);
6986 return;
6987 }
6988 case Intrinsic::pcmarker: {
6989 SDValue Tmp = getValue(V: I.getArgOperand(i: 0));
6990 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
6991 return;
6992 }
6993 case Intrinsic::readcyclecounter: {
6994 SDValue Op = getRoot();
6995 Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
6996 DAG.getVTList(MVT::i64, MVT::Other), Op);
6997 setValue(V: &I, NewN: Res);
6998 DAG.setRoot(Res.getValue(R: 1));
6999 return;
7000 }
7001 case Intrinsic::readsteadycounter: {
7002 SDValue Op = getRoot();
7003 Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl,
7004 DAG.getVTList(MVT::i64, MVT::Other), Op);
7005 setValue(V: &I, NewN: Res);
7006 DAG.setRoot(Res.getValue(R: 1));
7007 return;
7008 }
7009 case Intrinsic::bitreverse:
7010 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITREVERSE, DL: sdl,
7011 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
7012 Operand: getValue(V: I.getArgOperand(i: 0))));
7013 return;
7014 case Intrinsic::bswap:
7015 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BSWAP, DL: sdl,
7016 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
7017 Operand: getValue(V: I.getArgOperand(i: 0))));
7018 return;
7019 case Intrinsic::cttz: {
7020 SDValue Arg = getValue(V: I.getArgOperand(i: 0));
7021 ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 1));
7022 EVT Ty = Arg.getValueType();
7023 setValue(V: &I, NewN: DAG.getNode(Opcode: CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
7024 DL: sdl, VT: Ty, Operand: Arg));
7025 return;
7026 }
7027 case Intrinsic::ctlz: {
7028 SDValue Arg = getValue(V: I.getArgOperand(i: 0));
7029 ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 1));
7030 EVT Ty = Arg.getValueType();
7031 setValue(V: &I, NewN: DAG.getNode(Opcode: CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
7032 DL: sdl, VT: Ty, Operand: Arg));
7033 return;
7034 }
7035 case Intrinsic::ctpop: {
7036 SDValue Arg = getValue(V: I.getArgOperand(i: 0));
7037 EVT Ty = Arg.getValueType();
7038 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CTPOP, DL: sdl, VT: Ty, Operand: Arg));
7039 return;
7040 }
7041 case Intrinsic::fshl:
7042 case Intrinsic::fshr: {
7043 bool IsFSHL = Intrinsic == Intrinsic::fshl;
7044 SDValue X = getValue(V: I.getArgOperand(i: 0));
7045 SDValue Y = getValue(V: I.getArgOperand(i: 1));
7046 SDValue Z = getValue(V: I.getArgOperand(i: 2));
7047 EVT VT = X.getValueType();
7048
7049 if (X == Y) {
7050 auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
7051 setValue(V: &I, NewN: DAG.getNode(Opcode: RotateOpcode, DL: sdl, VT, N1: X, N2: Z));
7052 } else {
7053 auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
7054 setValue(V: &I, NewN: DAG.getNode(Opcode: FunnelOpcode, DL: sdl, VT, N1: X, N2: Y, N3: Z));
7055 }
7056 return;
7057 }
7058 case Intrinsic::sadd_sat: {
7059 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7060 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7061 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SADDSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7062 return;
7063 }
7064 case Intrinsic::uadd_sat: {
7065 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7066 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7067 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UADDSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7068 return;
7069 }
7070 case Intrinsic::ssub_sat: {
7071 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7072 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7073 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SSUBSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7074 return;
7075 }
7076 case Intrinsic::usub_sat: {
7077 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7078 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7079 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::USUBSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7080 return;
7081 }
7082 case Intrinsic::sshl_sat: {
7083 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7084 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7085 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SSHLSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7086 return;
7087 }
7088 case Intrinsic::ushl_sat: {
7089 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7090 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7091 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::USHLSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7092 return;
7093 }
7094 case Intrinsic::smul_fix:
7095 case Intrinsic::umul_fix:
7096 case Intrinsic::smul_fix_sat:
7097 case Intrinsic::umul_fix_sat: {
7098 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7099 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7100 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
7101 setValue(V: &I, NewN: DAG.getNode(Opcode: FixedPointIntrinsicToOpcode(Intrinsic), DL: sdl,
7102 VT: Op1.getValueType(), N1: Op1, N2: Op2, N3: Op3));
7103 return;
7104 }
7105 case Intrinsic::sdiv_fix:
7106 case Intrinsic::udiv_fix:
7107 case Intrinsic::sdiv_fix_sat:
7108 case Intrinsic::udiv_fix_sat: {
7109 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7110 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7111 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
7112 setValue(V: &I, NewN: expandDivFix(Opcode: FixedPointIntrinsicToOpcode(Intrinsic), DL: sdl,
7113 LHS: Op1, RHS: Op2, Scale: Op3, DAG, TLI));
7114 return;
7115 }
7116 case Intrinsic::smax: {
7117 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7118 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7119 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SMAX, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7120 return;
7121 }
7122 case Intrinsic::smin: {
7123 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7124 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7125 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SMIN, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7126 return;
7127 }
7128 case Intrinsic::umax: {
7129 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7130 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7131 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UMAX, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7132 return;
7133 }
7134 case Intrinsic::umin: {
7135 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7136 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7137 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UMIN, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
7138 return;
7139 }
7140 case Intrinsic::abs: {
7141 // TODO: Preserve "int min is poison" arg in SDAG?
7142 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7143 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ABS, DL: sdl, VT: Op1.getValueType(), Operand: Op1));
7144 return;
7145 }
7146 case Intrinsic::stacksave: {
7147 SDValue Op = getRoot();
7148 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7149 Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
7150 setValue(V: &I, NewN: Res);
7151 DAG.setRoot(Res.getValue(R: 1));
7152 return;
7153 }
7154 case Intrinsic::stackrestore:
7155 Res = getValue(V: I.getArgOperand(i: 0));
7156 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
7157 return;
7158 case Intrinsic::get_dynamic_area_offset: {
7159 SDValue Op = getRoot();
7160 EVT PtrTy = TLI.getFrameIndexTy(DL: DAG.getDataLayout());
7161 EVT ResTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7162 // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
7163 // target.
7164 if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
7165 report_fatal_error(reason: "Wrong result type for @llvm.get.dynamic.area.offset"
7166 " intrinsic!");
7167 Res = DAG.getNode(Opcode: ISD::GET_DYNAMIC_AREA_OFFSET, DL: sdl, VTList: DAG.getVTList(VT: ResTy),
7168 N: Op);
7169 DAG.setRoot(Op);
7170 setValue(V: &I, NewN: Res);
7171 return;
7172 }
7173 case Intrinsic::stackguard: {
7174 MachineFunction &MF = DAG.getMachineFunction();
7175 const Module &M = *MF.getFunction().getParent();
7176 EVT PtrTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7177 SDValue Chain = getRoot();
7178 if (TLI.useLoadStackGuardNode()) {
7179 Res = getLoadStackGuard(DAG, DL: sdl, Chain);
7180 Res = DAG.getPtrExtOrTrunc(Op: Res, DL: sdl, VT: PtrTy);
7181 } else {
7182 const Value *Global = TLI.getSDagStackGuard(M);
7183 Align Align = DAG.getDataLayout().getPrefTypeAlign(Ty: Global->getType());
7184 Res = DAG.getLoad(VT: PtrTy, dl: sdl, Chain, Ptr: getValue(V: Global),
7185 PtrInfo: MachinePointerInfo(Global, 0), Alignment: Align,
7186 MMOFlags: MachineMemOperand::MOVolatile);
7187 }
7188 if (TLI.useStackGuardXorFP())
7189 Res = TLI.emitStackGuardXorFP(DAG, Val: Res, DL: sdl);
7190 DAG.setRoot(Chain);
7191 setValue(V: &I, NewN: Res);
7192 return;
7193 }
7194 case Intrinsic::stackprotector: {
7195 // Emit code into the DAG to store the stack guard onto the stack.
7196 MachineFunction &MF = DAG.getMachineFunction();
7197 MachineFrameInfo &MFI = MF.getFrameInfo();
7198 SDValue Src, Chain = getRoot();
7199
7200 if (TLI.useLoadStackGuardNode())
7201 Src = getLoadStackGuard(DAG, DL: sdl, Chain);
7202 else
7203 Src = getValue(V: I.getArgOperand(i: 0)); // The guard's value.
7204
7205 AllocaInst *Slot = cast<AllocaInst>(Val: I.getArgOperand(i: 1));
7206
7207 int FI = FuncInfo.StaticAllocaMap[Slot];
7208 MFI.setStackProtectorIndex(FI);
7209 EVT PtrTy = TLI.getFrameIndexTy(DL: DAG.getDataLayout());
7210
7211 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrTy);
7212
7213 // Store the stack protector onto the stack.
7214 Res = DAG.getStore(
7215 Chain, dl: sdl, Val: Src, Ptr: FIN,
7216 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI),
7217 Alignment: MaybeAlign(), MMOFlags: MachineMemOperand::MOVolatile);
7218 setValue(V: &I, NewN: Res);
7219 DAG.setRoot(Res);
7220 return;
7221 }
7222 case Intrinsic::objectsize:
7223 llvm_unreachable("llvm.objectsize.* should have been lowered already");
7224
7225 case Intrinsic::is_constant:
7226 llvm_unreachable("llvm.is.constant.* should have been lowered already");
7227
7228 case Intrinsic::annotation:
7229 case Intrinsic::ptr_annotation:
7230 case Intrinsic::launder_invariant_group:
7231 case Intrinsic::strip_invariant_group:
7232 // Drop the intrinsic, but forward the value
7233 setValue(V: &I, NewN: getValue(V: I.getOperand(i_nocapture: 0)));
7234 return;
7235
7236 case Intrinsic::assume:
7237 case Intrinsic::experimental_noalias_scope_decl:
7238 case Intrinsic::var_annotation:
7239 case Intrinsic::sideeffect:
7240 // Discard annotate attributes, noalias scope declarations, assumptions, and
7241 // artificial side-effects.
7242 return;
7243
7244 case Intrinsic::codeview_annotation: {
7245 // Emit a label associated with this metadata.
7246 MachineFunction &MF = DAG.getMachineFunction();
7247 MCSymbol *Label =
7248 MF.getMMI().getContext().createTempSymbol(Name: "annotation", AlwaysAddSuffix: true);
7249 Metadata *MD = cast<MetadataAsValue>(Val: I.getArgOperand(i: 0))->getMetadata();
7250 MF.addCodeViewAnnotation(Label, MD: cast<MDNode>(Val: MD));
7251 Res = DAG.getLabelNode(Opcode: ISD::ANNOTATION_LABEL, dl: sdl, Root: getRoot(), Label);
7252 DAG.setRoot(Res);
7253 return;
7254 }
7255
7256 case Intrinsic::init_trampoline: {
7257 const Function *F = cast<Function>(Val: I.getArgOperand(i: 1)->stripPointerCasts());
7258
7259 SDValue Ops[6];
7260 Ops[0] = getRoot();
7261 Ops[1] = getValue(V: I.getArgOperand(i: 0));
7262 Ops[2] = getValue(V: I.getArgOperand(i: 1));
7263 Ops[3] = getValue(V: I.getArgOperand(i: 2));
7264 Ops[4] = DAG.getSrcValue(v: I.getArgOperand(i: 0));
7265 Ops[5] = DAG.getSrcValue(v: F);
7266
7267 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
7268
7269 DAG.setRoot(Res);
7270 return;
7271 }
7272 case Intrinsic::adjust_trampoline:
7273 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ADJUST_TRAMPOLINE, DL: sdl,
7274 VT: TLI.getPointerTy(DL: DAG.getDataLayout()),
7275 Operand: getValue(V: I.getArgOperand(i: 0))));
7276 return;
7277 case Intrinsic::gcroot: {
7278 assert(DAG.getMachineFunction().getFunction().hasGC() &&
7279 "only valid in functions with gc specified, enforced by Verifier");
7280 assert(GFI && "implied by previous");
7281 const Value *Alloca = I.getArgOperand(i: 0)->stripPointerCasts();
7282 const Constant *TypeMap = cast<Constant>(Val: I.getArgOperand(i: 1));
7283
7284 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(Val: getValue(V: Alloca).getNode());
7285 GFI->addStackRoot(Num: FI->getIndex(), Metadata: TypeMap);
7286 return;
7287 }
7288 case Intrinsic::gcread:
7289 case Intrinsic::gcwrite:
7290 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
7291 case Intrinsic::get_rounding:
7292 Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot());
7293 setValue(V: &I, NewN: Res);
7294 DAG.setRoot(Res.getValue(R: 1));
7295 return;
7296
7297 case Intrinsic::expect:
7298 // Just replace __builtin_expect(exp, c) with EXP.
7299 setValue(V: &I, NewN: getValue(V: I.getArgOperand(i: 0)));
7300 return;
7301
7302 case Intrinsic::ubsantrap:
7303 case Intrinsic::debugtrap:
7304 case Intrinsic::trap: {
7305 StringRef TrapFuncName =
7306 I.getAttributes().getFnAttr(Kind: "trap-func-name").getValueAsString();
7307 if (TrapFuncName.empty()) {
7308 switch (Intrinsic) {
7309 case Intrinsic::trap:
7310 DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
7311 break;
7312 case Intrinsic::debugtrap:
7313 DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
7314 break;
7315 case Intrinsic::ubsantrap:
7316 DAG.setRoot(DAG.getNode(
7317 ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
7318 DAG.getTargetConstant(
7319 cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
7320 MVT::i32)));
7321 break;
7322 default: llvm_unreachable("unknown trap intrinsic");
7323 }
7324 return;
7325 }
7326 TargetLowering::ArgListTy Args;
7327 if (Intrinsic == Intrinsic::ubsantrap) {
7328 Args.push_back(x: TargetLoweringBase::ArgListEntry());
7329 Args[0].Val = I.getArgOperand(i: 0);
7330 Args[0].Node = getValue(V: Args[0].Val);
7331 Args[0].Ty = Args[0].Val->getType();
7332 }
7333
7334 TargetLowering::CallLoweringInfo CLI(DAG);
7335 CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
7336 CC: CallingConv::C, ResultType: I.getType(),
7337 Target: DAG.getExternalSymbol(Sym: TrapFuncName.data(),
7338 VT: TLI.getPointerTy(DL: DAG.getDataLayout())),
7339 ArgsList: std::move(Args));
7340
7341 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
7342 DAG.setRoot(Result.second);
7343 return;
7344 }
7345
7346 case Intrinsic::allow_runtime_check:
7347 case Intrinsic::allow_ubsan_check:
7348 setValue(V: &I, NewN: getValue(V: ConstantInt::getTrue(Ty: I.getType())));
7349 return;
7350
7351 case Intrinsic::uadd_with_overflow:
7352 case Intrinsic::sadd_with_overflow:
7353 case Intrinsic::usub_with_overflow:
7354 case Intrinsic::ssub_with_overflow:
7355 case Intrinsic::umul_with_overflow:
7356 case Intrinsic::smul_with_overflow: {
7357 ISD::NodeType Op;
7358 switch (Intrinsic) {
7359 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
7360 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
7361 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
7362 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
7363 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
7364 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
7365 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
7366 }
7367 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7368 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7369
7370 EVT ResultVT = Op1.getValueType();
7371 EVT OverflowVT = MVT::i1;
7372 if (ResultVT.isVector())
7373 OverflowVT = EVT::getVectorVT(
7374 Context&: *Context, VT: OverflowVT, EC: ResultVT.getVectorElementCount());
7375
7376 SDVTList VTs = DAG.getVTList(VT1: ResultVT, VT2: OverflowVT);
7377 setValue(V: &I, NewN: DAG.getNode(Opcode: Op, DL: sdl, VTList: VTs, N1: Op1, N2: Op2));
7378 return;
7379 }
7380 case Intrinsic::prefetch: {
7381 SDValue Ops[5];
7382 unsigned rw = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue();
7383 auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
7384 Ops[0] = DAG.getRoot();
7385 Ops[1] = getValue(V: I.getArgOperand(i: 0));
7386 Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
7387 MVT::i32);
7388 Ops[3] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(2)), sdl,
7389 MVT::i32);
7390 Ops[4] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(3)), sdl,
7391 MVT::i32);
7392 SDValue Result = DAG.getMemIntrinsicNode(
7393 ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
7394 EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
7395 /* align */ std::nullopt, Flags);
7396
7397 // Chain the prefetch in parallel with any pending loads, to stay out of
7398 // the way of later optimizations.
7399 PendingLoads.push_back(Elt: Result);
7400 Result = getRoot();
7401 DAG.setRoot(Result);
7402 return;
7403 }
7404 case Intrinsic::lifetime_start:
7405 case Intrinsic::lifetime_end: {
7406 bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
7407 // Stack coloring is not enabled in O0, discard region information.
7408 if (TM.getOptLevel() == CodeGenOptLevel::None)
7409 return;
7410
7411 const int64_t ObjectSize =
7412 cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getSExtValue();
7413 Value *const ObjectPtr = I.getArgOperand(i: 1);
7414 SmallVector<const Value *, 4> Allocas;
7415 getUnderlyingObjects(V: ObjectPtr, Objects&: Allocas);
7416
7417 for (const Value *Alloca : Allocas) {
7418 const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Val: Alloca);
7419
7420 // Could not find an Alloca.
7421 if (!LifetimeObject)
7422 continue;
7423
7424 // First check that the Alloca is static, otherwise it won't have a
7425 // valid frame index.
7426 auto SI = FuncInfo.StaticAllocaMap.find(Val: LifetimeObject);
7427 if (SI == FuncInfo.StaticAllocaMap.end())
7428 return;
7429
7430 const int FrameIndex = SI->second;
7431 int64_t Offset;
7432 if (GetPointerBaseWithConstantOffset(
7433 Ptr: ObjectPtr, Offset, DL: DAG.getDataLayout()) != LifetimeObject)
7434 Offset = -1; // Cannot determine offset from alloca to lifetime object.
7435 Res = DAG.getLifetimeNode(IsStart, dl: sdl, Chain: getRoot(), FrameIndex, Size: ObjectSize,
7436 Offset);
7437 DAG.setRoot(Res);
7438 }
7439 return;
7440 }
7441 case Intrinsic::pseudoprobe: {
7442 auto Guid = cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getZExtValue();
7443 auto Index = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue();
7444 auto Attr = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue();
7445 Res = DAG.getPseudoProbeNode(Dl: sdl, Chain: getRoot(), Guid, Index, Attr);
7446 DAG.setRoot(Res);
7447 return;
7448 }
7449 case Intrinsic::invariant_start:
7450 // Discard region information.
7451 setValue(V: &I,
7452 NewN: DAG.getUNDEF(VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())));
7453 return;
7454 case Intrinsic::invariant_end:
7455 // Discard region information.
7456 return;
7457 case Intrinsic::clear_cache:
7458 /// FunctionName may be null.
7459 if (const char *FunctionName = TLI.getClearCacheBuiltinName())
7460 lowerCallToExternalSymbol(I, FunctionName);
7461 return;
7462 case Intrinsic::donothing:
7463 case Intrinsic::seh_try_begin:
7464 case Intrinsic::seh_scope_begin:
7465 case Intrinsic::seh_try_end:
7466 case Intrinsic::seh_scope_end:
7467 // ignore
7468 return;
7469 case Intrinsic::experimental_stackmap:
7470 visitStackmap(I);
7471 return;
7472 case Intrinsic::experimental_patchpoint_void:
7473 case Intrinsic::experimental_patchpoint:
7474 visitPatchpoint(CB: I);
7475 return;
7476 case Intrinsic::experimental_gc_statepoint:
7477 LowerStatepoint(I: cast<GCStatepointInst>(Val: I));
7478 return;
7479 case Intrinsic::experimental_gc_result:
7480 visitGCResult(I: cast<GCResultInst>(Val: I));
7481 return;
7482 case Intrinsic::experimental_gc_relocate:
7483 visitGCRelocate(Relocate: cast<GCRelocateInst>(Val: I));
7484 return;
7485 case Intrinsic::instrprof_cover:
7486 llvm_unreachable("instrprof failed to lower a cover");
7487 case Intrinsic::instrprof_increment:
7488 llvm_unreachable("instrprof failed to lower an increment");
7489 case Intrinsic::instrprof_timestamp:
7490 llvm_unreachable("instrprof failed to lower a timestamp");
7491 case Intrinsic::instrprof_value_profile:
7492 llvm_unreachable("instrprof failed to lower a value profiling call");
7493 case Intrinsic::instrprof_mcdc_parameters:
7494 llvm_unreachable("instrprof failed to lower mcdc parameters");
7495 case Intrinsic::instrprof_mcdc_tvbitmap_update:
7496 llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update");
7497 case Intrinsic::instrprof_mcdc_condbitmap_update:
7498 llvm_unreachable("instrprof failed to lower an mcdc condbitmap update");
7499 case Intrinsic::localescape: {
7500 MachineFunction &MF = DAG.getMachineFunction();
7501 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
7502
7503 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
7504 // is the same on all targets.
7505 for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
7506 Value *Arg = I.getArgOperand(i: Idx)->stripPointerCasts();
7507 if (isa<ConstantPointerNull>(Val: Arg))
7508 continue; // Skip null pointers. They represent a hole in index space.
7509 AllocaInst *Slot = cast<AllocaInst>(Val: Arg);
7510 assert(FuncInfo.StaticAllocaMap.count(Slot) &&
7511 "can only escape static allocas");
7512 int FI = FuncInfo.StaticAllocaMap[Slot];
7513 MCSymbol *FrameAllocSym =
7514 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
7515 FuncName: GlobalValue::dropLLVMManglingEscape(Name: MF.getName()), Idx);
7516 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD: dl,
7517 MCID: TII->get(Opcode: TargetOpcode::LOCAL_ESCAPE))
7518 .addSym(Sym: FrameAllocSym)
7519 .addFrameIndex(Idx: FI);
7520 }
7521
7522 return;
7523 }
7524
7525 case Intrinsic::localrecover: {
7526 // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
7527 MachineFunction &MF = DAG.getMachineFunction();
7528
7529 // Get the symbol that defines the frame offset.
7530 auto *Fn = cast<Function>(Val: I.getArgOperand(i: 0)->stripPointerCasts());
7531 auto *Idx = cast<ConstantInt>(Val: I.getArgOperand(i: 2));
7532 unsigned IdxVal =
7533 unsigned(Idx->getLimitedValue(Limit: std::numeric_limits<int>::max()));
7534 MCSymbol *FrameAllocSym =
7535 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
7536 FuncName: GlobalValue::dropLLVMManglingEscape(Name: Fn->getName()), Idx: IdxVal);
7537
7538 Value *FP = I.getArgOperand(i: 1);
7539 SDValue FPVal = getValue(V: FP);
7540 EVT PtrVT = FPVal.getValueType();
7541
7542 // Create a MCSymbol for the label to avoid any target lowering
7543 // that would make this PC relative.
7544 SDValue OffsetSym = DAG.getMCSymbol(Sym: FrameAllocSym, VT: PtrVT);
7545 SDValue OffsetVal =
7546 DAG.getNode(Opcode: ISD::LOCAL_RECOVER, DL: sdl, VT: PtrVT, Operand: OffsetSym);
7547
7548 // Add the offset to the FP.
7549 SDValue Add = DAG.getMemBasePlusOffset(Base: FPVal, Offset: OffsetVal, DL: sdl);
7550 setValue(V: &I, NewN: Add);
7551
7552 return;
7553 }
7554
7555 case Intrinsic::eh_exceptionpointer:
7556 case Intrinsic::eh_exceptioncode: {
7557 // Get the exception pointer vreg, copy from it, and resize it to fit.
7558 const auto *CPI = cast<CatchPadInst>(Val: I.getArgOperand(i: 0));
7559 MVT PtrVT = TLI.getPointerTy(DL: DAG.getDataLayout());
7560 const TargetRegisterClass *PtrRC = TLI.getRegClassFor(VT: PtrVT);
7561 unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, RC: PtrRC);
7562 SDValue N = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: sdl, Reg: VReg, VT: PtrVT);
7563 if (Intrinsic == Intrinsic::eh_exceptioncode)
7564 N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
7565 setValue(V: &I, NewN: N);
7566 return;
7567 }
7568 case Intrinsic::xray_customevent: {
7569 // Here we want to make sure that the intrinsic behaves as if it has a
7570 // specific calling convention.
7571 const auto &Triple = DAG.getTarget().getTargetTriple();
7572 if (!Triple.isAArch64(PointerWidth: 64) && Triple.getArch() != Triple::x86_64)
7573 return;
7574
7575 SmallVector<SDValue, 8> Ops;
7576
7577 // We want to say that we always want the arguments in registers.
7578 SDValue LogEntryVal = getValue(V: I.getArgOperand(i: 0));
7579 SDValue StrSizeVal = getValue(V: I.getArgOperand(i: 1));
7580 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7581 SDValue Chain = getRoot();
7582 Ops.push_back(Elt: LogEntryVal);
7583 Ops.push_back(Elt: StrSizeVal);
7584 Ops.push_back(Elt: Chain);
7585
7586 // We need to enforce the calling convention for the callsite, so that
7587 // argument ordering is enforced correctly, and that register allocation can
7588 // see that some registers may be assumed clobbered and have to preserve
7589 // them across calls to the intrinsic.
7590 MachineSDNode *MN = DAG.getMachineNode(Opcode: TargetOpcode::PATCHABLE_EVENT_CALL,
7591 dl: sdl, VTs: NodeTys, Ops);
7592 SDValue patchableNode = SDValue(MN, 0);
7593 DAG.setRoot(patchableNode);
7594 setValue(V: &I, NewN: patchableNode);
7595 return;
7596 }
7597 case Intrinsic::xray_typedevent: {
7598 // Here we want to make sure that the intrinsic behaves as if it has a
7599 // specific calling convention.
7600 const auto &Triple = DAG.getTarget().getTargetTriple();
7601 if (!Triple.isAArch64(PointerWidth: 64) && Triple.getArch() != Triple::x86_64)
7602 return;
7603
7604 SmallVector<SDValue, 8> Ops;
7605
7606 // We want to say that we always want the arguments in registers.
7607 // It's unclear to me how manipulating the selection DAG here forces callers
7608 // to provide arguments in registers instead of on the stack.
7609 SDValue LogTypeId = getValue(V: I.getArgOperand(i: 0));
7610 SDValue LogEntryVal = getValue(V: I.getArgOperand(i: 1));
7611 SDValue StrSizeVal = getValue(V: I.getArgOperand(i: 2));
7612 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7613 SDValue Chain = getRoot();
7614 Ops.push_back(Elt: LogTypeId);
7615 Ops.push_back(Elt: LogEntryVal);
7616 Ops.push_back(Elt: StrSizeVal);
7617 Ops.push_back(Elt: Chain);
7618
7619 // We need to enforce the calling convention for the callsite, so that
7620 // argument ordering is enforced correctly, and that register allocation can
7621 // see that some registers may be assumed clobbered and have to preserve
7622 // them across calls to the intrinsic.
7623 MachineSDNode *MN = DAG.getMachineNode(
7624 Opcode: TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, dl: sdl, VTs: NodeTys, Ops);
7625 SDValue patchableNode = SDValue(MN, 0);
7626 DAG.setRoot(patchableNode);
7627 setValue(V: &I, NewN: patchableNode);
7628 return;
7629 }
7630 case Intrinsic::experimental_deoptimize:
7631 LowerDeoptimizeCall(CI: &I);
7632 return;
7633 case Intrinsic::experimental_stepvector:
7634 visitStepVector(I);
7635 return;
7636 case Intrinsic::vector_reduce_fadd:
7637 case Intrinsic::vector_reduce_fmul:
7638 case Intrinsic::vector_reduce_add:
7639 case Intrinsic::vector_reduce_mul:
7640 case Intrinsic::vector_reduce_and:
7641 case Intrinsic::vector_reduce_or:
7642 case Intrinsic::vector_reduce_xor:
7643 case Intrinsic::vector_reduce_smax:
7644 case Intrinsic::vector_reduce_smin:
7645 case Intrinsic::vector_reduce_umax:
7646 case Intrinsic::vector_reduce_umin:
7647 case Intrinsic::vector_reduce_fmax:
7648 case Intrinsic::vector_reduce_fmin:
7649 case Intrinsic::vector_reduce_fmaximum:
7650 case Intrinsic::vector_reduce_fminimum:
7651 visitVectorReduce(I, Intrinsic);
7652 return;
7653
7654 case Intrinsic::icall_branch_funnel: {
7655 SmallVector<SDValue, 16> Ops;
7656 Ops.push_back(Elt: getValue(V: I.getArgOperand(i: 0)));
7657
7658 int64_t Offset;
7659 auto *Base = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset(
7660 Ptr: I.getArgOperand(i: 1), Offset, DL: DAG.getDataLayout()));
7661 if (!Base)
7662 report_fatal_error(
7663 reason: "llvm.icall.branch.funnel operand must be a GlobalValue");
7664 Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
7665
7666 struct BranchFunnelTarget {
7667 int64_t Offset;
7668 SDValue Target;
7669 };
7670 SmallVector<BranchFunnelTarget, 8> Targets;
7671
7672 for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
7673 auto *ElemBase = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset(
7674 Ptr: I.getArgOperand(i: Op), Offset, DL: DAG.getDataLayout()));
7675 if (ElemBase != Base)
7676 report_fatal_error(reason: "all llvm.icall.branch.funnel operands must refer "
7677 "to the same GlobalValue");
7678
7679 SDValue Val = getValue(V: I.getArgOperand(i: Op + 1));
7680 auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
7681 if (!GA)
7682 report_fatal_error(
7683 reason: "llvm.icall.branch.funnel operand must be a GlobalValue");
7684 Targets.push_back(Elt: {.Offset: Offset, .Target: DAG.getTargetGlobalAddress(
7685 GV: GA->getGlobal(), DL: sdl, VT: Val.getValueType(),
7686 offset: GA->getOffset())});
7687 }
7688 llvm::sort(C&: Targets,
7689 Comp: [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
7690 return T1.Offset < T2.Offset;
7691 });
7692
7693 for (auto &T : Targets) {
7694 Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
7695 Ops.push_back(Elt: T.Target);
7696 }
7697
7698 Ops.push_back(Elt: DAG.getRoot()); // Chain
7699 SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
7700 MVT::Other, Ops),
7701 0);
7702 DAG.setRoot(N);
7703 setValue(V: &I, NewN: N);
7704 HasTailCall = true;
7705 return;
7706 }
7707
7708 case Intrinsic::wasm_landingpad_index:
7709 // Information this intrinsic contained has been transferred to
7710 // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
7711 // delete it now.
7712 return;
7713
7714 case Intrinsic::aarch64_settag:
7715 case Intrinsic::aarch64_settag_zero: {
7716 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7717 bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
7718 SDValue Val = TSI.EmitTargetCodeForSetTag(
7719 DAG, dl: sdl, Chain: getRoot(), Addr: getValue(V: I.getArgOperand(i: 0)),
7720 Size: getValue(V: I.getArgOperand(i: 1)), DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
7721 ZeroData: ZeroMemory);
7722 DAG.setRoot(Val);
7723 setValue(V: &I, NewN: Val);
7724 return;
7725 }
7726 case Intrinsic::amdgcn_cs_chain: {
7727 assert(I.arg_size() == 5 && "Additional args not supported yet");
7728 assert(cast<ConstantInt>(I.getOperand(4))->isZero() &&
7729 "Non-zero flags not supported yet");
7730
7731 // At this point we don't care if it's amdgpu_cs_chain or
7732 // amdgpu_cs_chain_preserve.
7733 CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain;
7734
7735 Type *RetTy = I.getType();
7736 assert(RetTy->isVoidTy() && "Should not return");
7737
7738 SDValue Callee = getValue(V: I.getOperand(i_nocapture: 0));
7739
7740 // We only have 2 actual args: one for the SGPRs and one for the VGPRs.
7741 // We'll also tack the value of the EXEC mask at the end.
7742 TargetLowering::ArgListTy Args;
7743 Args.reserve(n: 3);
7744
7745 for (unsigned Idx : {2, 3, 1}) {
7746 TargetLowering::ArgListEntry Arg;
7747 Arg.Node = getValue(V: I.getOperand(i_nocapture: Idx));
7748 Arg.Ty = I.getOperand(i_nocapture: Idx)->getType();
7749 Arg.setAttributes(Call: &I, ArgIdx: Idx);
7750 Args.push_back(x: Arg);
7751 }
7752
7753 assert(Args[0].IsInReg && "SGPR args should be marked inreg");
7754 assert(!Args[1].IsInReg && "VGPR args should not be marked inreg");
7755 Args[2].IsInReg = true; // EXEC should be inreg
7756
7757 TargetLowering::CallLoweringInfo CLI(DAG);
7758 CLI.setDebugLoc(getCurSDLoc())
7759 .setChain(getRoot())
7760 .setCallee(CC, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
7761 .setNoReturn(true)
7762 .setTailCall(true)
7763 .setConvergent(I.isConvergent());
7764 CLI.CB = &I;
7765 std::pair<SDValue, SDValue> Result =
7766 lowerInvokable(CLI, /*EHPadBB*/ nullptr);
7767 (void)Result;
7768 assert(!Result.first.getNode() && !Result.second.getNode() &&
7769 "Should've lowered as tail call");
7770
7771 HasTailCall = true;
7772 return;
7773 }
7774 case Intrinsic::ptrmask: {
7775 SDValue Ptr = getValue(V: I.getOperand(i_nocapture: 0));
7776 SDValue Mask = getValue(V: I.getOperand(i_nocapture: 1));
7777
7778 EVT PtrVT = Ptr.getValueType();
7779 assert(PtrVT == Mask.getValueType() &&
7780 "Pointers with different index type are not supported by SDAG");
7781 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::AND, DL: sdl, VT: PtrVT, N1: Ptr, N2: Mask));
7782 return;
7783 }
7784 case Intrinsic::threadlocal_address: {
7785 setValue(V: &I, NewN: getValue(V: I.getOperand(i_nocapture: 0)));
7786 return;
7787 }
7788 case Intrinsic::get_active_lane_mask: {
7789 EVT CCVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7790 SDValue Index = getValue(V: I.getOperand(i_nocapture: 0));
7791 EVT ElementVT = Index.getValueType();
7792
7793 if (!TLI.shouldExpandGetActiveLaneMask(VT: CCVT, OpVT: ElementVT)) {
7794 visitTargetIntrinsic(I, Intrinsic);
7795 return;
7796 }
7797
7798 SDValue TripCount = getValue(V: I.getOperand(i_nocapture: 1));
7799 EVT VecTy = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElementVT,
7800 EC: CCVT.getVectorElementCount());
7801
7802 SDValue VectorIndex = DAG.getSplat(VT: VecTy, DL: sdl, Op: Index);
7803 SDValue VectorTripCount = DAG.getSplat(VT: VecTy, DL: sdl, Op: TripCount);
7804 SDValue VectorStep = DAG.getStepVector(DL: sdl, ResVT: VecTy);
7805 SDValue VectorInduction = DAG.getNode(
7806 Opcode: ISD::UADDSAT, DL: sdl, VT: VecTy, N1: VectorIndex, N2: VectorStep);
7807 SDValue SetCC = DAG.getSetCC(DL: sdl, VT: CCVT, LHS: VectorInduction,
7808 RHS: VectorTripCount, Cond: ISD::CondCode::SETULT);
7809 setValue(V: &I, NewN: SetCC);
7810 return;
7811 }
7812 case Intrinsic::experimental_get_vector_length: {
7813 assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 &&
7814 "Expected positive VF");
7815 unsigned VF = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 1))->getZExtValue();
7816 bool IsScalable = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 2))->isOne();
7817
7818 SDValue Count = getValue(V: I.getOperand(i_nocapture: 0));
7819 EVT CountVT = Count.getValueType();
7820
7821 if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) {
7822 visitTargetIntrinsic(I, Intrinsic);
7823 return;
7824 }
7825
7826 // Expand to a umin between the trip count and the maximum elements the type
7827 // can hold.
7828 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7829
7830 // Extend the trip count to at least the result VT.
7831 if (CountVT.bitsLT(VT)) {
7832 Count = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: sdl, VT, Operand: Count);
7833 CountVT = VT;
7834 }
7835
7836 SDValue MaxEVL = DAG.getElementCount(DL: sdl, VT: CountVT,
7837 EC: ElementCount::get(MinVal: VF, Scalable: IsScalable));
7838
7839 SDValue UMin = DAG.getNode(Opcode: ISD::UMIN, DL: sdl, VT: CountVT, N1: Count, N2: MaxEVL);
7840 // Clip to the result type if needed.
7841 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: sdl, VT, Operand: UMin);
7842
7843 setValue(V: &I, NewN: Trunc);
7844 return;
7845 }
7846 case Intrinsic::experimental_cttz_elts: {
7847 auto DL = getCurSDLoc();
7848 SDValue Op = getValue(V: I.getOperand(i_nocapture: 0));
7849 EVT OpVT = Op.getValueType();
7850
7851 if (!TLI.shouldExpandCttzElements(VT: OpVT)) {
7852 visitTargetIntrinsic(I, Intrinsic);
7853 return;
7854 }
7855
7856 if (OpVT.getScalarType() != MVT::i1) {
7857 // Compare the input vector elements to zero & use to count trailing zeros
7858 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: OpVT);
7859 OpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
7860 OpVT.getVectorElementCount());
7861 Op = DAG.getSetCC(DL, VT: OpVT, LHS: Op, RHS: AllZero, Cond: ISD::SETNE);
7862 }
7863
7864 // Find the smallest "sensible" element type to use for the expansion.
7865 ConstantRange CR(
7866 APInt(64, OpVT.getVectorElementCount().getKnownMinValue()));
7867 if (OpVT.isScalableVT())
7868 CR = CR.umul_sat(Other: getVScaleRange(F: I.getCaller(), BitWidth: 64));
7869
7870 // If the zero-is-poison flag is set, we can assume the upper limit
7871 // of the result is VF-1.
7872 if (!cast<ConstantSDNode>(Val: getValue(V: I.getOperand(i_nocapture: 1)))->isZero())
7873 CR = CR.subtract(CI: APInt(64, 1));
7874
7875 unsigned EltWidth = I.getType()->getScalarSizeInBits();
7876 EltWidth = std::min(a: EltWidth, b: (unsigned)CR.getActiveBits());
7877 EltWidth = std::max(a: llvm::bit_ceil(Value: EltWidth), b: (unsigned)8);
7878
7879 MVT NewEltTy = MVT::getIntegerVT(BitWidth: EltWidth);
7880
7881 // Create the new vector type & get the vector length
7882 EVT NewVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NewEltTy,
7883 EC: OpVT.getVectorElementCount());
7884
7885 SDValue VL =
7886 DAG.getElementCount(DL, VT: NewEltTy, EC: OpVT.getVectorElementCount());
7887
7888 SDValue StepVec = DAG.getStepVector(DL, ResVT: NewVT);
7889 SDValue SplatVL = DAG.getSplat(VT: NewVT, DL, Op: VL);
7890 SDValue StepVL = DAG.getNode(Opcode: ISD::SUB, DL, VT: NewVT, N1: SplatVL, N2: StepVec);
7891 SDValue Ext = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: Op);
7892 SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: NewVT, N1: StepVL, N2: Ext);
7893 SDValue Max = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: NewEltTy, Operand: And);
7894 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: NewEltTy, N1: VL, N2: Max);
7895
7896 EVT RetTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7897 SDValue Ret = DAG.getZExtOrTrunc(Op: Sub, DL, VT: RetTy);
7898
7899 setValue(V: &I, NewN: Ret);
7900 return;
7901 }
7902 case Intrinsic::vector_insert: {
7903 SDValue Vec = getValue(V: I.getOperand(i_nocapture: 0));
7904 SDValue SubVec = getValue(V: I.getOperand(i_nocapture: 1));
7905 SDValue Index = getValue(V: I.getOperand(i_nocapture: 2));
7906
7907 // The intrinsic's index type is i64, but the SDNode requires an index type
7908 // suitable for the target. Convert the index as required.
7909 MVT VectorIdxTy = TLI.getVectorIdxTy(DL: DAG.getDataLayout());
7910 if (Index.getValueType() != VectorIdxTy)
7911 Index = DAG.getVectorIdxConstant(Val: Index->getAsZExtVal(), DL: sdl);
7912
7913 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7914 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: sdl, VT: ResultVT, N1: Vec, N2: SubVec,
7915 N3: Index));
7916 return;
7917 }
7918 case Intrinsic::vector_extract: {
7919 SDValue Vec = getValue(V: I.getOperand(i_nocapture: 0));
7920 SDValue Index = getValue(V: I.getOperand(i_nocapture: 1));
7921 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7922
7923 // The intrinsic's index type is i64, but the SDNode requires an index type
7924 // suitable for the target. Convert the index as required.
7925 MVT VectorIdxTy = TLI.getVectorIdxTy(DL: DAG.getDataLayout());
7926 if (Index.getValueType() != VectorIdxTy)
7927 Index = DAG.getVectorIdxConstant(Val: Index->getAsZExtVal(), DL: sdl);
7928
7929 setValue(V: &I,
7930 NewN: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: sdl, VT: ResultVT, N1: Vec, N2: Index));
7931 return;
7932 }
7933 case Intrinsic::experimental_vector_reverse:
7934 visitVectorReverse(I);
7935 return;
7936 case Intrinsic::experimental_vector_splice:
7937 visitVectorSplice(I);
7938 return;
7939 case Intrinsic::callbr_landingpad:
7940 visitCallBrLandingPad(I);
7941 return;
7942 case Intrinsic::experimental_vector_interleave2:
7943 visitVectorInterleave(I);
7944 return;
7945 case Intrinsic::experimental_vector_deinterleave2:
7946 visitVectorDeinterleave(I);
7947 return;
7948 case Intrinsic::experimental_convergence_anchor:
7949 case Intrinsic::experimental_convergence_entry:
7950 case Intrinsic::experimental_convergence_loop:
7951 visitConvergenceControl(I, Intrinsic);
7952 }
7953}
7954
7955void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
7956 const ConstrainedFPIntrinsic &FPI) {
7957 SDLoc sdl = getCurSDLoc();
7958
7959 // We do not need to serialize constrained FP intrinsics against
7960 // each other or against (nonvolatile) loads, so they can be
7961 // chained like loads.
7962 SDValue Chain = DAG.getRoot();
7963 SmallVector<SDValue, 4> Opers;
7964 Opers.push_back(Elt: Chain);
7965 if (FPI.isUnaryOp()) {
7966 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 0)));
7967 } else if (FPI.isTernaryOp()) {
7968 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 0)));
7969 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 1)));
7970 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 2)));
7971 } else {
7972 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 0)));
7973 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 1)));
7974 }
7975
7976 auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
7977 assert(Result.getNode()->getNumValues() == 2);
7978
7979 // Push node to the appropriate list so that future instructions can be
7980 // chained up correctly.
7981 SDValue OutChain = Result.getValue(R: 1);
7982 switch (EB) {
7983 case fp::ExceptionBehavior::ebIgnore:
7984 // The only reason why ebIgnore nodes still need to be chained is that
7985 // they might depend on the current rounding mode, and therefore must
7986 // not be moved across instruction that may change that mode.
7987 [[fallthrough]];
7988 case fp::ExceptionBehavior::ebMayTrap:
7989 // These must not be moved across calls or instructions that may change
7990 // floating-point exception masks.
7991 PendingConstrainedFP.push_back(Elt: OutChain);
7992 break;
7993 case fp::ExceptionBehavior::ebStrict:
7994 // These must not be moved across calls or instructions that may change
7995 // floating-point exception masks or read floating-point exception flags.
7996 // In addition, they cannot be optimized out even if unused.
7997 PendingConstrainedFPStrict.push_back(Elt: OutChain);
7998 break;
7999 }
8000 };
8001
8002 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8003 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: FPI.getType());
8004 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
8005 fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
8006
8007 SDNodeFlags Flags;
8008 if (EB == fp::ExceptionBehavior::ebIgnore)
8009 Flags.setNoFPExcept(true);
8010
8011 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &FPI))
8012 Flags.copyFMF(FPMO: *FPOp);
8013
8014 unsigned Opcode;
8015 switch (FPI.getIntrinsicID()) {
8016 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
8017#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
8018 case Intrinsic::INTRINSIC: \
8019 Opcode = ISD::STRICT_##DAGN; \
8020 break;
8021#include "llvm/IR/ConstrainedOps.def"
8022 case Intrinsic::experimental_constrained_fmuladd: {
8023 Opcode = ISD::STRICT_FMA;
8024 // Break fmuladd into fmul and fadd.
8025 if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
8026 !TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) {
8027 Opers.pop_back();
8028 SDValue Mul = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL: sdl, VTList: VTs, Ops: Opers, Flags);
8029 pushOutChain(Mul, EB);
8030 Opcode = ISD::STRICT_FADD;
8031 Opers.clear();
8032 Opers.push_back(Elt: Mul.getValue(R: 1));
8033 Opers.push_back(Elt: Mul.getValue(R: 0));
8034 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 2)));
8035 }
8036 break;
8037 }
8038 }
8039
8040 // A few strict DAG nodes carry additional operands that are not
8041 // set up by the default code above.
8042 switch (Opcode) {
8043 default: break;
8044 case ISD::STRICT_FP_ROUND:
8045 Opers.push_back(
8046 Elt: DAG.getTargetConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
8047 break;
8048 case ISD::STRICT_FSETCC:
8049 case ISD::STRICT_FSETCCS: {
8050 auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(Val: &FPI);
8051 ISD::CondCode Condition = getFCmpCondCode(Pred: FPCmp->getPredicate());
8052 if (TM.Options.NoNaNsFPMath)
8053 Condition = getFCmpCodeWithoutNaN(CC: Condition);
8054 Opers.push_back(Elt: DAG.getCondCode(Cond: Condition));
8055 break;
8056 }
8057 }
8058
8059 SDValue Result = DAG.getNode(Opcode, DL: sdl, VTList: VTs, Ops: Opers, Flags);
8060 pushOutChain(Result, EB);
8061
8062 SDValue FPResult = Result.getValue(R: 0);
8063 setValue(V: &FPI, NewN: FPResult);
8064}
8065
8066static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
8067 std::optional<unsigned> ResOPC;
8068 switch (VPIntrin.getIntrinsicID()) {
8069 case Intrinsic::vp_ctlz: {
8070 bool IsZeroUndef = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne();
8071 ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
8072 break;
8073 }
8074 case Intrinsic::vp_cttz: {
8075 bool IsZeroUndef = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne();
8076 ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
8077 break;
8078 }
8079#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
8080 case Intrinsic::VPID: \
8081 ResOPC = ISD::VPSD; \
8082 break;
8083#include "llvm/IR/VPIntrinsics.def"
8084 }
8085
8086 if (!ResOPC)
8087 llvm_unreachable(
8088 "Inconsistency: no SDNode available for this VPIntrinsic!");
8089
8090 if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
8091 *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
8092 if (VPIntrin.getFastMathFlags().allowReassoc())
8093 return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
8094 : ISD::VP_REDUCE_FMUL;
8095 }
8096
8097 return *ResOPC;
8098}
8099
8100void SelectionDAGBuilder::visitVPLoad(
8101 const VPIntrinsic &VPIntrin, EVT VT,
8102 const SmallVectorImpl<SDValue> &OpValues) {
8103 SDLoc DL = getCurSDLoc();
8104 Value *PtrOperand = VPIntrin.getArgOperand(i: 0);
8105 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8106 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8107 const MDNode *Ranges = getRangeMetadata(I: VPIntrin);
8108 SDValue LD;
8109 // Do not serialize variable-length loads of constant memory with
8110 // anything.
8111 if (!Alignment)
8112 Alignment = DAG.getEVTAlign(MemoryVT: VT);
8113 MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo);
8114 bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML);
8115 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
8116 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8117 PtrInfo: MachinePointerInfo(PtrOperand), F: MachineMemOperand::MOLoad,
8118 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo, Ranges);
8119 LD = DAG.getLoadVP(VT, dl: DL, Chain: InChain, Ptr: OpValues[0], Mask: OpValues[1], EVL: OpValues[2],
8120 MMO, IsExpanding: false /*IsExpanding */);
8121 if (AddToChain)
8122 PendingLoads.push_back(Elt: LD.getValue(R: 1));
8123 setValue(V: &VPIntrin, NewN: LD);
8124}
8125
8126void SelectionDAGBuilder::visitVPGather(
8127 const VPIntrinsic &VPIntrin, EVT VT,
8128 const SmallVectorImpl<SDValue> &OpValues) {
8129 SDLoc DL = getCurSDLoc();
8130 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8131 Value *PtrOperand = VPIntrin.getArgOperand(i: 0);
8132 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8133 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8134 const MDNode *Ranges = getRangeMetadata(I: VPIntrin);
8135 SDValue LD;
8136 if (!Alignment)
8137 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
8138 unsigned AS =
8139 PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
8140 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8141 PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOLoad,
8142 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo, Ranges);
8143 SDValue Base, Index, Scale;
8144 ISD::MemIndexType IndexType;
8145 bool UniformBase = getUniformBase(Ptr: PtrOperand, Base, Index, IndexType, Scale,
8146 SDB: this, CurBB: VPIntrin.getParent(),
8147 ElemSize: VT.getScalarStoreSize());
8148 if (!UniformBase) {
8149 Base = DAG.getConstant(Val: 0, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
8150 Index = getValue(V: PtrOperand);
8151 IndexType = ISD::SIGNED_SCALED;
8152 Scale = DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
8153 }
8154 EVT IdxVT = Index.getValueType();
8155 EVT EltTy = IdxVT.getVectorElementType();
8156 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
8157 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
8158 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewIdxVT, Operand: Index);
8159 }
8160 LD = DAG.getGatherVP(
8161 DAG.getVTList(VT, MVT::Other), VT, DL,
8162 {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
8163 IndexType);
8164 PendingLoads.push_back(Elt: LD.getValue(R: 1));
8165 setValue(V: &VPIntrin, NewN: LD);
8166}
8167
8168void SelectionDAGBuilder::visitVPStore(
8169 const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
8170 SDLoc DL = getCurSDLoc();
8171 Value *PtrOperand = VPIntrin.getArgOperand(i: 1);
8172 EVT VT = OpValues[0].getValueType();
8173 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8174 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8175 SDValue ST;
8176 if (!Alignment)
8177 Alignment = DAG.getEVTAlign(MemoryVT: VT);
8178 SDValue Ptr = OpValues[1];
8179 SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType());
8180 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8181 PtrInfo: MachinePointerInfo(PtrOperand), F: MachineMemOperand::MOStore,
8182 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo);
8183 ST = DAG.getStoreVP(Chain: getMemoryRoot(), dl: DL, Val: OpValues[0], Ptr, Offset,
8184 Mask: OpValues[2], EVL: OpValues[3], MemVT: VT, MMO, AM: ISD::UNINDEXED,
8185 /* IsTruncating */ false, /*IsCompressing*/ false);
8186 DAG.setRoot(ST);
8187 setValue(V: &VPIntrin, NewN: ST);
8188}
8189
8190void SelectionDAGBuilder::visitVPScatter(
8191 const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
8192 SDLoc DL = getCurSDLoc();
8193 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8194 Value *PtrOperand = VPIntrin.getArgOperand(i: 1);
8195 EVT VT = OpValues[0].getValueType();
8196 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8197 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8198 SDValue ST;
8199 if (!Alignment)
8200 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
8201 unsigned AS =
8202 PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
8203 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8204 PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOStore,
8205 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo);
8206 SDValue Base, Index, Scale;
8207 ISD::MemIndexType IndexType;
8208 bool UniformBase = getUniformBase(Ptr: PtrOperand, Base, Index, IndexType, Scale,
8209 SDB: this, CurBB: VPIntrin.getParent(),
8210 ElemSize: VT.getScalarStoreSize());
8211 if (!UniformBase) {
8212 Base = DAG.getConstant(Val: 0, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
8213 Index = getValue(V: PtrOperand);
8214 IndexType = ISD::SIGNED_SCALED;
8215 Scale =
8216 DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
8217 }
8218 EVT IdxVT = Index.getValueType();
8219 EVT EltTy = IdxVT.getVectorElementType();
8220 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
8221 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
8222 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewIdxVT, Operand: Index);
8223 }
8224 ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
8225 {getMemoryRoot(), OpValues[0], Base, Index, Scale,
8226 OpValues[2], OpValues[3]},
8227 MMO, IndexType);
8228 DAG.setRoot(ST);
8229 setValue(V: &VPIntrin, NewN: ST);
8230}
8231
8232void SelectionDAGBuilder::visitVPStridedLoad(
8233 const VPIntrinsic &VPIntrin, EVT VT,
8234 const SmallVectorImpl<SDValue> &OpValues) {
8235 SDLoc DL = getCurSDLoc();
8236 Value *PtrOperand = VPIntrin.getArgOperand(i: 0);
8237 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8238 if (!Alignment)
8239 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
8240 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8241 const MDNode *Ranges = getRangeMetadata(I: VPIntrin);
8242 MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo);
8243 bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML);
8244 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
8245 unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
8246 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8247 PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOLoad,
8248 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo, Ranges);
8249
8250 SDValue LD = DAG.getStridedLoadVP(VT, DL, Chain: InChain, Ptr: OpValues[0], Stride: OpValues[1],
8251 Mask: OpValues[2], EVL: OpValues[3], MMO,
8252 IsExpanding: false /*IsExpanding*/);
8253
8254 if (AddToChain)
8255 PendingLoads.push_back(Elt: LD.getValue(R: 1));
8256 setValue(V: &VPIntrin, NewN: LD);
8257}
8258
8259void SelectionDAGBuilder::visitVPStridedStore(
8260 const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
8261 SDLoc DL = getCurSDLoc();
8262 Value *PtrOperand = VPIntrin.getArgOperand(i: 1);
8263 EVT VT = OpValues[0].getValueType();
8264 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8265 if (!Alignment)
8266 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
8267 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8268 unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
8269 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8270 PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOStore,
8271 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo);
8272
8273 SDValue ST = DAG.getStridedStoreVP(
8274 Chain: getMemoryRoot(), DL, Val: OpValues[0], Ptr: OpValues[1],
8275 Offset: DAG.getUNDEF(VT: OpValues[1].getValueType()), Stride: OpValues[2], Mask: OpValues[3],
8276 EVL: OpValues[4], MemVT: VT, MMO, AM: ISD::UNINDEXED, /*IsTruncating*/ false,
8277 /*IsCompressing*/ false);
8278
8279 DAG.setRoot(ST);
8280 setValue(V: &VPIntrin, NewN: ST);
8281}
8282
8283void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
8284 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8285 SDLoc DL = getCurSDLoc();
8286
8287 ISD::CondCode Condition;
8288 CmpInst::Predicate CondCode = VPIntrin.getPredicate();
8289 bool IsFP = VPIntrin.getOperand(i_nocapture: 0)->getType()->isFPOrFPVectorTy();
8290 if (IsFP) {
8291 // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
8292 // flags, but calls that don't return floating-point types can't be
8293 // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
8294 Condition = getFCmpCondCode(Pred: CondCode);
8295 if (TM.Options.NoNaNsFPMath)
8296 Condition = getFCmpCodeWithoutNaN(CC: Condition);
8297 } else {
8298 Condition = getICmpCondCode(Pred: CondCode);
8299 }
8300
8301 SDValue Op1 = getValue(V: VPIntrin.getOperand(i_nocapture: 0));
8302 SDValue Op2 = getValue(V: VPIntrin.getOperand(i_nocapture: 1));
8303 // #2 is the condition code
8304 SDValue MaskOp = getValue(V: VPIntrin.getOperand(i_nocapture: 3));
8305 SDValue EVL = getValue(V: VPIntrin.getOperand(i_nocapture: 4));
8306 MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
8307 assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
8308 "Unexpected target EVL type");
8309 EVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: EVLParamVT, Operand: EVL);
8310
8311 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8312 Ty: VPIntrin.getType());
8313 setValue(V: &VPIntrin,
8314 NewN: DAG.getSetCCVP(DL, VT: DestVT, LHS: Op1, RHS: Op2, Cond: Condition, Mask: MaskOp, EVL));
8315}
8316
8317void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
8318 const VPIntrinsic &VPIntrin) {
8319 SDLoc DL = getCurSDLoc();
8320 unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
8321
8322 auto IID = VPIntrin.getIntrinsicID();
8323
8324 if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(Val: &VPIntrin))
8325 return visitVPCmp(VPIntrin: *CmpI);
8326
8327 SmallVector<EVT, 4> ValueVTs;
8328 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8329 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: VPIntrin.getType(), ValueVTs);
8330 SDVTList VTs = DAG.getVTList(VTs: ValueVTs);
8331
8332 auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IntrinsicID: IID);
8333
8334 MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
8335 assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
8336 "Unexpected target EVL type");
8337
8338 // Request operands.
8339 SmallVector<SDValue, 7> OpValues;
8340 for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
8341 auto Op = getValue(V: VPIntrin.getArgOperand(i: I));
8342 if (I == EVLParamPos)
8343 Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: EVLParamVT, Operand: Op);
8344 OpValues.push_back(Elt: Op);
8345 }
8346
8347 switch (Opcode) {
8348 default: {
8349 SDNodeFlags SDFlags;
8350 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &VPIntrin))
8351 SDFlags.copyFMF(FPMO: *FPMO);
8352 SDValue Result = DAG.getNode(Opcode, DL, VTList: VTs, Ops: OpValues, Flags: SDFlags);
8353 setValue(V: &VPIntrin, NewN: Result);
8354 break;
8355 }
8356 case ISD::VP_LOAD:
8357 visitVPLoad(VPIntrin, VT: ValueVTs[0], OpValues);
8358 break;
8359 case ISD::VP_GATHER:
8360 visitVPGather(VPIntrin, VT: ValueVTs[0], OpValues);
8361 break;
8362 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8363 visitVPStridedLoad(VPIntrin, VT: ValueVTs[0], OpValues);
8364 break;
8365 case ISD::VP_STORE:
8366 visitVPStore(VPIntrin, OpValues);
8367 break;
8368 case ISD::VP_SCATTER:
8369 visitVPScatter(VPIntrin, OpValues);
8370 break;
8371 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8372 visitVPStridedStore(VPIntrin, OpValues);
8373 break;
8374 case ISD::VP_FMULADD: {
8375 assert(OpValues.size() == 5 && "Unexpected number of operands");
8376 SDNodeFlags SDFlags;
8377 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &VPIntrin))
8378 SDFlags.copyFMF(FPMO: *FPMO);
8379 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
8380 TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), ValueVTs[0])) {
8381 setValue(V: &VPIntrin, NewN: DAG.getNode(Opcode: ISD::VP_FMA, DL, VTList: VTs, Ops: OpValues, Flags: SDFlags));
8382 } else {
8383 SDValue Mul = DAG.getNode(
8384 Opcode: ISD::VP_FMUL, DL, VTList: VTs,
8385 Ops: {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, Flags: SDFlags);
8386 SDValue Add =
8387 DAG.getNode(Opcode: ISD::VP_FADD, DL, VTList: VTs,
8388 Ops: {Mul, OpValues[2], OpValues[3], OpValues[4]}, Flags: SDFlags);
8389 setValue(V: &VPIntrin, NewN: Add);
8390 }
8391 break;
8392 }
8393 case ISD::VP_IS_FPCLASS: {
8394 const DataLayout DLayout = DAG.getDataLayout();
8395 EVT DestVT = TLI.getValueType(DL: DLayout, Ty: VPIntrin.getType());
8396 auto Constant = OpValues[1]->getAsZExtVal();
8397 SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32);
8398 SDValue V = DAG.getNode(Opcode: ISD::VP_IS_FPCLASS, DL, VT: DestVT,
8399 Ops: {OpValues[0], Check, OpValues[2], OpValues[3]});
8400 setValue(V: &VPIntrin, NewN: V);
8401 return;
8402 }
8403 case ISD::VP_INTTOPTR: {
8404 SDValue N = OpValues[0];
8405 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: VPIntrin.getType());
8406 EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: VPIntrin.getType());
8407 N = DAG.getVPPtrExtOrTrunc(DL: getCurSDLoc(), VT: DestVT, Op: N, Mask: OpValues[1],
8408 EVL: OpValues[2]);
8409 N = DAG.getVPZExtOrTrunc(DL: getCurSDLoc(), VT: PtrMemVT, Op: N, Mask: OpValues[1],
8410 EVL: OpValues[2]);
8411 setValue(V: &VPIntrin, NewN: N);
8412 break;
8413 }
8414 case ISD::VP_PTRTOINT: {
8415 SDValue N = OpValues[0];
8416 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8417 Ty: VPIntrin.getType());
8418 EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(),
8419 Ty: VPIntrin.getOperand(i_nocapture: 0)->getType());
8420 N = DAG.getVPPtrExtOrTrunc(DL: getCurSDLoc(), VT: PtrMemVT, Op: N, Mask: OpValues[1],
8421 EVL: OpValues[2]);
8422 N = DAG.getVPZExtOrTrunc(DL: getCurSDLoc(), VT: DestVT, Op: N, Mask: OpValues[1],
8423 EVL: OpValues[2]);
8424 setValue(V: &VPIntrin, NewN: N);
8425 break;
8426 }
8427 case ISD::VP_ABS:
8428 case ISD::VP_CTLZ:
8429 case ISD::VP_CTLZ_ZERO_UNDEF:
8430 case ISD::VP_CTTZ:
8431 case ISD::VP_CTTZ_ZERO_UNDEF: {
8432 SDValue Result =
8433 DAG.getNode(Opcode, DL, VTList: VTs, Ops: {OpValues[0], OpValues[2], OpValues[3]});
8434 setValue(V: &VPIntrin, NewN: Result);
8435 break;
8436 }
8437 }
8438}
8439
8440SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
8441 const BasicBlock *EHPadBB,
8442 MCSymbol *&BeginLabel) {
8443 MachineFunction &MF = DAG.getMachineFunction();
8444 MachineModuleInfo &MMI = MF.getMMI();
8445
8446 // Insert a label before the invoke call to mark the try range. This can be
8447 // used to detect deletion of the invoke via the MachineModuleInfo.
8448 BeginLabel = MMI.getContext().createTempSymbol();
8449
8450 // For SjLj, keep track of which landing pads go with which invokes
8451 // so as to maintain the ordering of pads in the LSDA.
8452 unsigned CallSiteIndex = MMI.getCurrentCallSite();
8453 if (CallSiteIndex) {
8454 MF.setCallSiteBeginLabel(BeginLabel, Site: CallSiteIndex);
8455 LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(Elt: CallSiteIndex);
8456
8457 // Now that the call site is handled, stop tracking it.
8458 MMI.setCurrentCallSite(0);
8459 }
8460
8461 return DAG.getEHLabel(dl: getCurSDLoc(), Root: Chain, Label: BeginLabel);
8462}
8463
8464SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
8465 const BasicBlock *EHPadBB,
8466 MCSymbol *BeginLabel) {
8467 assert(BeginLabel && "BeginLabel should've been set");
8468
8469 MachineFunction &MF = DAG.getMachineFunction();
8470 MachineModuleInfo &MMI = MF.getMMI();
8471
8472 // Insert a label at the end of the invoke call to mark the try range. This
8473 // can be used to detect deletion of the invoke via the MachineModuleInfo.
8474 MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
8475 Chain = DAG.getEHLabel(dl: getCurSDLoc(), Root: Chain, Label: EndLabel);
8476
8477 // Inform MachineModuleInfo of range.
8478 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
8479 // There is a platform (e.g. wasm) that uses funclet style IR but does not
8480 // actually use outlined funclets and their LSDA info style.
8481 if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
8482 assert(II && "II should've been set");
8483 WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
8484 EHInfo->addIPToStateRange(II, InvokeBegin: BeginLabel, InvokeEnd: EndLabel);
8485 } else if (!isScopedEHPersonality(Pers)) {
8486 assert(EHPadBB);
8487 MF.addInvoke(LandingPad: FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
8488 }
8489
8490 return Chain;
8491}
8492
8493std::pair<SDValue, SDValue>
8494SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
8495 const BasicBlock *EHPadBB) {
8496 MCSymbol *BeginLabel = nullptr;
8497
8498 if (EHPadBB) {
8499 // Both PendingLoads and PendingExports must be flushed here;
8500 // this call might not return.
8501 (void)getRoot();
8502 DAG.setRoot(lowerStartEH(Chain: getControlRoot(), EHPadBB, BeginLabel));
8503 CLI.setChain(getRoot());
8504 }
8505
8506 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8507 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
8508
8509 assert((CLI.IsTailCall || Result.second.getNode()) &&
8510 "Non-null chain expected with non-tail call!");
8511 assert((Result.second.getNode() || !Result.first.getNode()) &&
8512 "Null value expected with tail call!");
8513
8514 if (!Result.second.getNode()) {
8515 // As a special case, a null chain means that a tail call has been emitted
8516 // and the DAG root is already updated.
8517 HasTailCall = true;
8518
8519 // Since there's no actual continuation from this block, nothing can be
8520 // relying on us setting vregs for them.
8521 PendingExports.clear();
8522 } else {
8523 DAG.setRoot(Result.second);
8524 }
8525
8526 if (EHPadBB) {
8527 DAG.setRoot(lowerEndEH(Chain: getRoot(), II: cast_or_null<InvokeInst>(Val: CLI.CB), EHPadBB,
8528 BeginLabel));
8529 }
8530
8531 return Result;
8532}
8533
8534void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
8535 bool isTailCall,
8536 bool isMustTailCall,
8537 const BasicBlock *EHPadBB) {
8538 auto &DL = DAG.getDataLayout();
8539 FunctionType *FTy = CB.getFunctionType();
8540 Type *RetTy = CB.getType();
8541
8542 TargetLowering::ArgListTy Args;
8543 Args.reserve(n: CB.arg_size());
8544
8545 const Value *SwiftErrorVal = nullptr;
8546 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8547
8548 if (isTailCall) {
8549 // Avoid emitting tail calls in functions with the disable-tail-calls
8550 // attribute.
8551 auto *Caller = CB.getParent()->getParent();
8552 if (Caller->getFnAttribute(Kind: "disable-tail-calls").getValueAsString() ==
8553 "true" && !isMustTailCall)
8554 isTailCall = false;
8555
8556 // We can't tail call inside a function with a swifterror argument. Lowering
8557 // does not support this yet. It would have to move into the swifterror
8558 // register before the call.
8559 if (TLI.supportSwiftError() &&
8560 Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
8561 isTailCall = false;
8562 }
8563
8564 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
8565 TargetLowering::ArgListEntry Entry;
8566 const Value *V = *I;
8567
8568 // Skip empty types
8569 if (V->getType()->isEmptyTy())
8570 continue;
8571
8572 SDValue ArgNode = getValue(V);
8573 Entry.Node = ArgNode; Entry.Ty = V->getType();
8574
8575 Entry.setAttributes(Call: &CB, ArgIdx: I - CB.arg_begin());
8576
8577 // Use swifterror virtual register as input to the call.
8578 if (Entry.IsSwiftError && TLI.supportSwiftError()) {
8579 SwiftErrorVal = V;
8580 // We find the virtual register for the actual swifterror argument.
8581 // Instead of using the Value, we use the virtual register instead.
8582 Entry.Node =
8583 DAG.getRegister(Reg: SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
8584 VT: EVT(TLI.getPointerTy(DL)));
8585 }
8586
8587 Args.push_back(x: Entry);
8588
8589 // If we have an explicit sret argument that is an Instruction, (i.e., it
8590 // might point to function-local memory), we can't meaningfully tail-call.
8591 if (Entry.IsSRet && isa<Instruction>(Val: V))
8592 isTailCall = false;
8593 }
8594
8595 // If call site has a cfguardtarget operand bundle, create and add an
8596 // additional ArgListEntry.
8597 if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_cfguardtarget)) {
8598 TargetLowering::ArgListEntry Entry;
8599 Value *V = Bundle->Inputs[0];
8600 SDValue ArgNode = getValue(V);
8601 Entry.Node = ArgNode;
8602 Entry.Ty = V->getType();
8603 Entry.IsCFGuardTarget = true;
8604 Args.push_back(x: Entry);
8605 }
8606
8607 // Check if target-independent constraints permit a tail call here.
8608 // Target-dependent constraints are checked within TLI->LowerCallTo.
8609 if (isTailCall && !isInTailCallPosition(Call: CB, TM: DAG.getTarget()))
8610 isTailCall = false;
8611
8612 // Disable tail calls if there is an swifterror argument. Targets have not
8613 // been updated to support tail calls.
8614 if (TLI.supportSwiftError() && SwiftErrorVal)
8615 isTailCall = false;
8616
8617 ConstantInt *CFIType = nullptr;
8618 if (CB.isIndirectCall()) {
8619 if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_kcfi)) {
8620 if (!TLI.supportKCFIBundles())
8621 report_fatal_error(
8622 reason: "Target doesn't support calls with kcfi operand bundles.");
8623 CFIType = cast<ConstantInt>(Val: Bundle->Inputs[0]);
8624 assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
8625 }
8626 }
8627
8628 SDValue ConvControlToken;
8629 if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_convergencectrl)) {
8630 auto *Token = Bundle->Inputs[0].get();
8631 ConvControlToken = getValue(V: Token);
8632 }
8633
8634 TargetLowering::CallLoweringInfo CLI(DAG);
8635 CLI.setDebugLoc(getCurSDLoc())
8636 .setChain(getRoot())
8637 .setCallee(ResultType: RetTy, FTy, Target: Callee, ArgsList: std::move(Args), Call: CB)
8638 .setTailCall(isTailCall)
8639 .setConvergent(CB.isConvergent())
8640 .setIsPreallocated(
8641 CB.countOperandBundlesOfType(ID: LLVMContext::OB_preallocated) != 0)
8642 .setCFIType(CFIType)
8643 .setConvergenceControlToken(ConvControlToken);
8644 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
8645
8646 if (Result.first.getNode()) {
8647 Result.first = lowerRangeToAssertZExt(DAG, I: CB, Op: Result.first);
8648 setValue(V: &CB, NewN: Result.first);
8649 }
8650
8651 // The last element of CLI.InVals has the SDValue for swifterror return.
8652 // Here we copy it to a virtual register and update SwiftErrorMap for
8653 // book-keeping.
8654 if (SwiftErrorVal && TLI.supportSwiftError()) {
8655 // Get the last element of InVals.
8656 SDValue Src = CLI.InVals.back();
8657 Register VReg =
8658 SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
8659 SDValue CopyNode = CLI.DAG.getCopyToReg(Chain: Result.second, dl: CLI.DL, Reg: VReg, N: Src);
8660 DAG.setRoot(CopyNode);
8661 }
8662}
8663
8664static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
8665 SelectionDAGBuilder &Builder) {
8666 // Check to see if this load can be trivially constant folded, e.g. if the
8667 // input is from a string literal.
8668 if (const Constant *LoadInput = dyn_cast<Constant>(Val: PtrVal)) {
8669 // Cast pointer to the type we really want to load.
8670 Type *LoadTy =
8671 Type::getIntNTy(C&: PtrVal->getContext(), N: LoadVT.getScalarSizeInBits());
8672 if (LoadVT.isVector())
8673 LoadTy = FixedVectorType::get(ElementType: LoadTy, NumElts: LoadVT.getVectorNumElements());
8674
8675 LoadInput = ConstantExpr::getBitCast(C: const_cast<Constant *>(LoadInput),
8676 Ty: PointerType::getUnqual(ElementType: LoadTy));
8677
8678 if (const Constant *LoadCst =
8679 ConstantFoldLoadFromConstPtr(C: const_cast<Constant *>(LoadInput),
8680 Ty: LoadTy, DL: Builder.DAG.getDataLayout()))
8681 return Builder.getValue(V: LoadCst);
8682 }
8683
8684 // Otherwise, we have to emit the load. If the pointer is to unfoldable but
8685 // still constant memory, the input chain can be the entry node.
8686 SDValue Root;
8687 bool ConstantMemory = false;
8688
8689 // Do not serialize (non-volatile) loads of constant memory with anything.
8690 if (Builder.AA && Builder.AA->pointsToConstantMemory(P: PtrVal)) {
8691 Root = Builder.DAG.getEntryNode();
8692 ConstantMemory = true;
8693 } else {
8694 // Do not serialize non-volatile loads against each other.
8695 Root = Builder.DAG.getRoot();
8696 }
8697
8698 SDValue Ptr = Builder.getValue(V: PtrVal);
8699 SDValue LoadVal =
8700 Builder.DAG.getLoad(VT: LoadVT, dl: Builder.getCurSDLoc(), Chain: Root, Ptr,
8701 PtrInfo: MachinePointerInfo(PtrVal), Alignment: Align(1));
8702
8703 if (!ConstantMemory)
8704 Builder.PendingLoads.push_back(Elt: LoadVal.getValue(R: 1));
8705 return LoadVal;
8706}
8707
8708/// Record the value for an instruction that produces an integer result,
8709/// converting the type where necessary.
8710void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
8711 SDValue Value,
8712 bool IsSigned) {
8713 EVT VT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8714 Ty: I.getType(), AllowUnknown: true);
8715 Value = DAG.getExtOrTrunc(IsSigned, Op: Value, DL: getCurSDLoc(), VT);
8716 setValue(V: &I, NewN: Value);
8717}
8718
8719/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
8720/// true and lower it. Otherwise return false, and it will be lowered like a
8721/// normal call.
8722/// The caller already checked that \p I calls the appropriate LibFunc with a
8723/// correct prototype.
8724bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
8725 const Value *LHS = I.getArgOperand(i: 0), *RHS = I.getArgOperand(i: 1);
8726 const Value *Size = I.getArgOperand(i: 2);
8727 const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Val: getValue(V: Size));
8728 if (CSize && CSize->getZExtValue() == 0) {
8729 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8730 Ty: I.getType(), AllowUnknown: true);
8731 setValue(V: &I, NewN: DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: CallVT));
8732 return true;
8733 }
8734
8735 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8736 std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
8737 DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(), Op1: getValue(V: LHS), Op2: getValue(V: RHS),
8738 Op3: getValue(V: Size), Op1PtrInfo: MachinePointerInfo(LHS), Op2PtrInfo: MachinePointerInfo(RHS));
8739 if (Res.first.getNode()) {
8740 processIntegerCallValue(I, Value: Res.first, IsSigned: true);
8741 PendingLoads.push_back(Elt: Res.second);
8742 return true;
8743 }
8744
8745 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
8746 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
8747 if (!CSize || !isOnlyUsedInZeroEqualityComparison(CxtI: &I))
8748 return false;
8749
8750 // If the target has a fast compare for the given size, it will return a
8751 // preferred load type for that size. Require that the load VT is legal and
8752 // that the target supports unaligned loads of that type. Otherwise, return
8753 // INVALID.
8754 auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
8755 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8756 MVT LVT = TLI.hasFastEqualityCompare(NumBits);
8757 if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
8758 // TODO: Handle 5 byte compare as 4-byte + 1 byte.
8759 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
8760 // TODO: Check alignment of src and dest ptrs.
8761 unsigned DstAS = LHS->getType()->getPointerAddressSpace();
8762 unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
8763 if (!TLI.isTypeLegal(VT: LVT) ||
8764 !TLI.allowsMisalignedMemoryAccesses(LVT, AddrSpace: SrcAS) ||
8765 !TLI.allowsMisalignedMemoryAccesses(LVT, AddrSpace: DstAS))
8766 LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
8767 }
8768
8769 return LVT;
8770 };
8771
8772 // This turns into unaligned loads. We only do this if the target natively
8773 // supports the MVT we'll be loading or if it is small enough (<= 4) that
8774 // we'll only produce a small number of byte loads.
8775 MVT LoadVT;
8776 unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
8777 switch (NumBitsToCompare) {
8778 default:
8779 return false;
8780 case 16:
8781 LoadVT = MVT::i16;
8782 break;
8783 case 32:
8784 LoadVT = MVT::i32;
8785 break;
8786 case 64:
8787 case 128:
8788 case 256:
8789 LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
8790 break;
8791 }
8792
8793 if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
8794 return false;
8795
8796 SDValue LoadL = getMemCmpLoad(PtrVal: LHS, LoadVT, Builder&: *this);
8797 SDValue LoadR = getMemCmpLoad(PtrVal: RHS, LoadVT, Builder&: *this);
8798
8799 // Bitcast to a wide integer type if the loads are vectors.
8800 if (LoadVT.isVector()) {
8801 EVT CmpVT = EVT::getIntegerVT(Context&: LHS->getContext(), BitWidth: LoadVT.getSizeInBits());
8802 LoadL = DAG.getBitcast(VT: CmpVT, V: LoadL);
8803 LoadR = DAG.getBitcast(VT: CmpVT, V: LoadR);
8804 }
8805
8806 SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
8807 processIntegerCallValue(I, Value: Cmp, IsSigned: false);
8808 return true;
8809}
8810
8811/// See if we can lower a memchr call into an optimized form. If so, return
8812/// true and lower it. Otherwise return false, and it will be lowered like a
8813/// normal call.
8814/// The caller already checked that \p I calls the appropriate LibFunc with a
8815/// correct prototype.
8816bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
8817 const Value *Src = I.getArgOperand(i: 0);
8818 const Value *Char = I.getArgOperand(i: 1);
8819 const Value *Length = I.getArgOperand(i: 2);
8820
8821 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8822 std::pair<SDValue, SDValue> Res =
8823 TSI.EmitTargetCodeForMemchr(DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(),
8824 Src: getValue(V: Src), Char: getValue(V: Char), Length: getValue(V: Length),
8825 SrcPtrInfo: MachinePointerInfo(Src));
8826 if (Res.first.getNode()) {
8827 setValue(V: &I, NewN: Res.first);
8828 PendingLoads.push_back(Elt: Res.second);
8829 return true;
8830 }
8831
8832 return false;
8833}
8834
8835/// See if we can lower a mempcpy call into an optimized form. If so, return
8836/// true and lower it. Otherwise return false, and it will be lowered like a
8837/// normal call.
8838/// The caller already checked that \p I calls the appropriate LibFunc with a
8839/// correct prototype.
8840bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
8841 SDValue Dst = getValue(V: I.getArgOperand(i: 0));
8842 SDValue Src = getValue(V: I.getArgOperand(i: 1));
8843 SDValue Size = getValue(V: I.getArgOperand(i: 2));
8844
8845 Align DstAlign = DAG.InferPtrAlign(Ptr: Dst).valueOrOne();
8846 Align SrcAlign = DAG.InferPtrAlign(Ptr: Src).valueOrOne();
8847 // DAG::getMemcpy needs Alignment to be defined.
8848 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
8849
8850 SDLoc sdl = getCurSDLoc();
8851
8852 // In the mempcpy context we need to pass in a false value for isTailCall
8853 // because the return pointer needs to be adjusted by the size of
8854 // the copied memory.
8855 SDValue Root = getMemoryRoot();
8856 SDValue MC = DAG.getMemcpy(Chain: Root, dl: sdl, Dst, Src, Size, Alignment, isVol: false, AlwaysInline: false,
8857 /*isTailCall=*/false,
8858 DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
8859 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)),
8860 AAInfo: I.getAAMetadata());
8861 assert(MC.getNode() != nullptr &&
8862 "** memcpy should not be lowered as TailCall in mempcpy context **");
8863 DAG.setRoot(MC);
8864
8865 // Check if Size needs to be truncated or extended.
8866 Size = DAG.getSExtOrTrunc(Op: Size, DL: sdl, VT: Dst.getValueType());
8867
8868 // Adjust return pointer to point just past the last dst byte.
8869 SDValue DstPlusSize = DAG.getNode(Opcode: ISD::ADD, DL: sdl, VT: Dst.getValueType(),
8870 N1: Dst, N2: Size);
8871 setValue(V: &I, NewN: DstPlusSize);
8872 return true;
8873}
8874
8875/// See if we can lower a strcpy call into an optimized form. If so, return
8876/// true and lower it, otherwise return false and it will be lowered like a
8877/// normal call.
8878/// The caller already checked that \p I calls the appropriate LibFunc with a
8879/// correct prototype.
8880bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
8881 const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1);
8882
8883 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8884 std::pair<SDValue, SDValue> Res =
8885 TSI.EmitTargetCodeForStrcpy(DAG, DL: getCurSDLoc(), Chain: getRoot(),
8886 Dest: getValue(V: Arg0), Src: getValue(V: Arg1),
8887 DestPtrInfo: MachinePointerInfo(Arg0),
8888 SrcPtrInfo: MachinePointerInfo(Arg1), isStpcpy);
8889 if (Res.first.getNode()) {
8890 setValue(V: &I, NewN: Res.first);
8891 DAG.setRoot(Res.second);
8892 return true;
8893 }
8894
8895 return false;
8896}
8897
8898/// See if we can lower a strcmp call into an optimized form. If so, return
8899/// true and lower it, otherwise return false and it will be lowered like a
8900/// normal call.
8901/// The caller already checked that \p I calls the appropriate LibFunc with a
8902/// correct prototype.
8903bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
8904 const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1);
8905
8906 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8907 std::pair<SDValue, SDValue> Res =
8908 TSI.EmitTargetCodeForStrcmp(DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(),
8909 Op1: getValue(V: Arg0), Op2: getValue(V: Arg1),
8910 Op1PtrInfo: MachinePointerInfo(Arg0),
8911 Op2PtrInfo: MachinePointerInfo(Arg1));
8912 if (Res.first.getNode()) {
8913 processIntegerCallValue(I, Value: Res.first, IsSigned: true);
8914 PendingLoads.push_back(Elt: Res.second);
8915 return true;
8916 }
8917
8918 return false;
8919}
8920
8921/// See if we can lower a strlen call into an optimized form. If so, return
8922/// true and lower it, otherwise return false and it will be lowered like a
8923/// normal call.
8924/// The caller already checked that \p I calls the appropriate LibFunc with a
8925/// correct prototype.
8926bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
8927 const Value *Arg0 = I.getArgOperand(i: 0);
8928
8929 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8930 std::pair<SDValue, SDValue> Res =
8931 TSI.EmitTargetCodeForStrlen(DAG, DL: getCurSDLoc(), Chain: DAG.getRoot(),
8932 Src: getValue(V: Arg0), SrcPtrInfo: MachinePointerInfo(Arg0));
8933 if (Res.first.getNode()) {
8934 processIntegerCallValue(I, Value: Res.first, IsSigned: false);
8935 PendingLoads.push_back(Elt: Res.second);
8936 return true;
8937 }
8938
8939 return false;
8940}
8941
8942/// See if we can lower a strnlen call into an optimized form. If so, return
8943/// true and lower it, otherwise return false and it will be lowered like a
8944/// normal call.
8945/// The caller already checked that \p I calls the appropriate LibFunc with a
8946/// correct prototype.
8947bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
8948 const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1);
8949
8950 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8951 std::pair<SDValue, SDValue> Res =
8952 TSI.EmitTargetCodeForStrnlen(DAG, DL: getCurSDLoc(), Chain: DAG.getRoot(),
8953 Src: getValue(V: Arg0), MaxLength: getValue(V: Arg1),
8954 SrcPtrInfo: MachinePointerInfo(Arg0));
8955 if (Res.first.getNode()) {
8956 processIntegerCallValue(I, Value: Res.first, IsSigned: false);
8957 PendingLoads.push_back(Elt: Res.second);
8958 return true;
8959 }
8960
8961 return false;
8962}
8963
8964/// See if we can lower a unary floating-point operation into an SDNode with
8965/// the specified Opcode. If so, return true and lower it, otherwise return
8966/// false and it will be lowered like a normal call.
8967/// The caller already checked that \p I calls the appropriate LibFunc with a
8968/// correct prototype.
8969bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
8970 unsigned Opcode) {
8971 // We already checked this call's prototype; verify it doesn't modify errno.
8972 if (!I.onlyReadsMemory())
8973 return false;
8974
8975 SDNodeFlags Flags;
8976 Flags.copyFMF(FPMO: cast<FPMathOperator>(Val: I));
8977
8978 SDValue Tmp = getValue(V: I.getArgOperand(i: 0));
8979 setValue(V: &I,
8980 NewN: DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Tmp.getValueType(), Operand: Tmp, Flags));
8981 return true;
8982}
8983
8984/// See if we can lower a binary floating-point operation into an SDNode with
8985/// the specified Opcode. If so, return true and lower it. Otherwise return
8986/// false, and it will be lowered like a normal call.
8987/// The caller already checked that \p I calls the appropriate LibFunc with a
8988/// correct prototype.
8989bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
8990 unsigned Opcode) {
8991 // We already checked this call's prototype; verify it doesn't modify errno.
8992 if (!I.onlyReadsMemory())
8993 return false;
8994
8995 SDNodeFlags Flags;
8996 Flags.copyFMF(FPMO: cast<FPMathOperator>(Val: I));
8997
8998 SDValue Tmp0 = getValue(V: I.getArgOperand(i: 0));
8999 SDValue Tmp1 = getValue(V: I.getArgOperand(i: 1));
9000 EVT VT = Tmp0.getValueType();
9001 setValue(V: &I, NewN: DAG.getNode(Opcode, DL: getCurSDLoc(), VT, N1: Tmp0, N2: Tmp1, Flags));
9002 return true;
9003}
9004
9005void SelectionDAGBuilder::visitCall(const CallInst &I) {
9006 // Handle inline assembly differently.
9007 if (I.isInlineAsm()) {
9008 visitInlineAsm(Call: I);
9009 return;
9010 }
9011
9012 diagnoseDontCall(CI: I);
9013
9014 if (Function *F = I.getCalledFunction()) {
9015 if (F->isDeclaration()) {
9016 // Is this an LLVM intrinsic or a target-specific intrinsic?
9017 unsigned IID = F->getIntrinsicID();
9018 if (!IID)
9019 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
9020 IID = II->getIntrinsicID(F);
9021
9022 if (IID) {
9023 visitIntrinsicCall(I, Intrinsic: IID);
9024 return;
9025 }
9026 }
9027
9028 // Check for well-known libc/libm calls. If the function is internal, it
9029 // can't be a library call. Don't do the check if marked as nobuiltin for
9030 // some reason or the call site requires strict floating point semantics.
9031 LibFunc Func;
9032 if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
9033 F->hasName() && LibInfo->getLibFunc(FDecl: *F, F&: Func) &&
9034 LibInfo->hasOptimizedCodeGen(F: Func)) {
9035 switch (Func) {
9036 default: break;
9037 case LibFunc_bcmp:
9038 if (visitMemCmpBCmpCall(I))
9039 return;
9040 break;
9041 case LibFunc_copysign:
9042 case LibFunc_copysignf:
9043 case LibFunc_copysignl:
9044 // We already checked this call's prototype; verify it doesn't modify
9045 // errno.
9046 if (I.onlyReadsMemory()) {
9047 SDValue LHS = getValue(V: I.getArgOperand(i: 0));
9048 SDValue RHS = getValue(V: I.getArgOperand(i: 1));
9049 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: getCurSDLoc(),
9050 VT: LHS.getValueType(), N1: LHS, N2: RHS));
9051 return;
9052 }
9053 break;
9054 case LibFunc_fabs:
9055 case LibFunc_fabsf:
9056 case LibFunc_fabsl:
9057 if (visitUnaryFloatCall(I, Opcode: ISD::FABS))
9058 return;
9059 break;
9060 case LibFunc_fmin:
9061 case LibFunc_fminf:
9062 case LibFunc_fminl:
9063 if (visitBinaryFloatCall(I, Opcode: ISD::FMINNUM))
9064 return;
9065 break;
9066 case LibFunc_fmax:
9067 case LibFunc_fmaxf:
9068 case LibFunc_fmaxl:
9069 if (visitBinaryFloatCall(I, Opcode: ISD::FMAXNUM))
9070 return;
9071 break;
9072 case LibFunc_sin:
9073 case LibFunc_sinf:
9074 case LibFunc_sinl:
9075 if (visitUnaryFloatCall(I, Opcode: ISD::FSIN))
9076 return;
9077 break;
9078 case LibFunc_cos:
9079 case LibFunc_cosf:
9080 case LibFunc_cosl:
9081 if (visitUnaryFloatCall(I, Opcode: ISD::FCOS))
9082 return;
9083 break;
9084 case LibFunc_sqrt:
9085 case LibFunc_sqrtf:
9086 case LibFunc_sqrtl:
9087 case LibFunc_sqrt_finite:
9088 case LibFunc_sqrtf_finite:
9089 case LibFunc_sqrtl_finite:
9090 if (visitUnaryFloatCall(I, Opcode: ISD::FSQRT))
9091 return;
9092 break;
9093 case LibFunc_floor:
9094 case LibFunc_floorf:
9095 case LibFunc_floorl:
9096 if (visitUnaryFloatCall(I, Opcode: ISD::FFLOOR))
9097 return;
9098 break;
9099 case LibFunc_nearbyint:
9100 case LibFunc_nearbyintf:
9101 case LibFunc_nearbyintl:
9102 if (visitUnaryFloatCall(I, Opcode: ISD::FNEARBYINT))
9103 return;
9104 break;
9105 case LibFunc_ceil:
9106 case LibFunc_ceilf:
9107 case LibFunc_ceill:
9108 if (visitUnaryFloatCall(I, Opcode: ISD::FCEIL))
9109 return;
9110 break;
9111 case LibFunc_rint:
9112 case LibFunc_rintf:
9113 case LibFunc_rintl:
9114 if (visitUnaryFloatCall(I, Opcode: ISD::FRINT))
9115 return;
9116 break;
9117 case LibFunc_round:
9118 case LibFunc_roundf:
9119 case LibFunc_roundl:
9120 if (visitUnaryFloatCall(I, Opcode: ISD::FROUND))
9121 return;
9122 break;
9123 case LibFunc_trunc:
9124 case LibFunc_truncf:
9125 case LibFunc_truncl:
9126 if (visitUnaryFloatCall(I, Opcode: ISD::FTRUNC))
9127 return;
9128 break;
9129 case LibFunc_log2:
9130 case LibFunc_log2f:
9131 case LibFunc_log2l:
9132 if (visitUnaryFloatCall(I, Opcode: ISD::FLOG2))
9133 return;
9134 break;
9135 case LibFunc_exp2:
9136 case LibFunc_exp2f:
9137 case LibFunc_exp2l:
9138 if (visitUnaryFloatCall(I, Opcode: ISD::FEXP2))
9139 return;
9140 break;
9141 case LibFunc_exp10:
9142 case LibFunc_exp10f:
9143 case LibFunc_exp10l:
9144 if (visitUnaryFloatCall(I, Opcode: ISD::FEXP10))
9145 return;
9146 break;
9147 case LibFunc_ldexp:
9148 case LibFunc_ldexpf:
9149 case LibFunc_ldexpl:
9150 if (visitBinaryFloatCall(I, Opcode: ISD::FLDEXP))
9151 return;
9152 break;
9153 case LibFunc_memcmp:
9154 if (visitMemCmpBCmpCall(I))
9155 return;
9156 break;
9157 case LibFunc_mempcpy:
9158 if (visitMemPCpyCall(I))
9159 return;
9160 break;
9161 case LibFunc_memchr:
9162 if (visitMemChrCall(I))
9163 return;
9164 break;
9165 case LibFunc_strcpy:
9166 if (visitStrCpyCall(I, isStpcpy: false))
9167 return;
9168 break;
9169 case LibFunc_stpcpy:
9170 if (visitStrCpyCall(I, isStpcpy: true))
9171 return;
9172 break;
9173 case LibFunc_strcmp:
9174 if (visitStrCmpCall(I))
9175 return;
9176 break;
9177 case LibFunc_strlen:
9178 if (visitStrLenCall(I))
9179 return;
9180 break;
9181 case LibFunc_strnlen:
9182 if (visitStrNLenCall(I))
9183 return;
9184 break;
9185 }
9186 }
9187 }
9188
9189 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
9190 // have to do anything here to lower funclet bundles.
9191 // CFGuardTarget bundles are lowered in LowerCallTo.
9192 assert(!I.hasOperandBundlesOtherThan(
9193 {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
9194 LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
9195 LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi,
9196 LLVMContext::OB_convergencectrl}) &&
9197 "Cannot lower calls with arbitrary operand bundles!");
9198
9199 SDValue Callee = getValue(V: I.getCalledOperand());
9200
9201 if (I.countOperandBundlesOfType(ID: LLVMContext::OB_deopt))
9202 LowerCallSiteWithDeoptBundle(Call: &I, Callee, EHPadBB: nullptr);
9203 else
9204 // Check if we can potentially perform a tail call. More detailed checking
9205 // is be done within LowerCallTo, after more information about the call is
9206 // known.
9207 LowerCallTo(CB: I, Callee, isTailCall: I.isTailCall(), isMustTailCall: I.isMustTailCall());
9208}
9209
9210namespace {
9211
9212/// AsmOperandInfo - This contains information for each constraint that we are
9213/// lowering.
9214class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
9215public:
9216 /// CallOperand - If this is the result output operand or a clobber
9217 /// this is null, otherwise it is the incoming operand to the CallInst.
9218 /// This gets modified as the asm is processed.
9219 SDValue CallOperand;
9220
9221 /// AssignedRegs - If this is a register or register class operand, this
9222 /// contains the set of register corresponding to the operand.
9223 RegsForValue AssignedRegs;
9224
9225 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
9226 : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
9227 }
9228
9229 /// Whether or not this operand accesses memory
9230 bool hasMemory(const TargetLowering &TLI) const {
9231 // Indirect operand accesses access memory.
9232 if (isIndirect)
9233 return true;
9234
9235 for (const auto &Code : Codes)
9236 if (TLI.getConstraintType(Constraint: Code) == TargetLowering::C_Memory)
9237 return true;
9238
9239 return false;
9240 }
9241};
9242
9243
9244} // end anonymous namespace
9245
9246/// Make sure that the output operand \p OpInfo and its corresponding input
9247/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
9248/// out).
9249static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
9250 SDISelAsmOperandInfo &MatchingOpInfo,
9251 SelectionDAG &DAG) {
9252 if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
9253 return;
9254
9255 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
9256 const auto &TLI = DAG.getTargetLoweringInfo();
9257
9258 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
9259 TLI.getRegForInlineAsmConstraint(TRI, Constraint: OpInfo.ConstraintCode,
9260 VT: OpInfo.ConstraintVT);
9261 std::pair<unsigned, const TargetRegisterClass *> InputRC =
9262 TLI.getRegForInlineAsmConstraint(TRI, Constraint: MatchingOpInfo.ConstraintCode,
9263 VT: MatchingOpInfo.ConstraintVT);
9264 if ((OpInfo.ConstraintVT.isInteger() !=
9265 MatchingOpInfo.ConstraintVT.isInteger()) ||
9266 (MatchRC.second != InputRC.second)) {
9267 // FIXME: error out in a more elegant fashion
9268 report_fatal_error(reason: "Unsupported asm: input constraint"
9269 " with a matching output constraint of"
9270 " incompatible type!");
9271 }
9272 MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
9273}
9274
9275/// Get a direct memory input to behave well as an indirect operand.
9276/// This may introduce stores, hence the need for a \p Chain.
9277/// \return The (possibly updated) chain.
9278static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
9279 SDISelAsmOperandInfo &OpInfo,
9280 SelectionDAG &DAG) {
9281 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9282
9283 // If we don't have an indirect input, put it in the constpool if we can,
9284 // otherwise spill it to a stack slot.
9285 // TODO: This isn't quite right. We need to handle these according to
9286 // the addressing mode that the constraint wants. Also, this may take
9287 // an additional register for the computation and we don't want that
9288 // either.
9289
9290 // If the operand is a float, integer, or vector constant, spill to a
9291 // constant pool entry to get its address.
9292 const Value *OpVal = OpInfo.CallOperandVal;
9293 if (isa<ConstantFP>(Val: OpVal) || isa<ConstantInt>(Val: OpVal) ||
9294 isa<ConstantVector>(Val: OpVal) || isa<ConstantDataVector>(Val: OpVal)) {
9295 OpInfo.CallOperand = DAG.getConstantPool(
9296 C: cast<Constant>(Val: OpVal), VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
9297 return Chain;
9298 }
9299
9300 // Otherwise, create a stack slot and emit a store to it before the asm.
9301 Type *Ty = OpVal->getType();
9302 auto &DL = DAG.getDataLayout();
9303 uint64_t TySize = DL.getTypeAllocSize(Ty);
9304 MachineFunction &MF = DAG.getMachineFunction();
9305 int SSFI = MF.getFrameInfo().CreateStackObject(
9306 Size: TySize, Alignment: DL.getPrefTypeAlign(Ty), isSpillSlot: false);
9307 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: TLI.getFrameIndexTy(DL));
9308 Chain = DAG.getTruncStore(Chain, dl: Location, Val: OpInfo.CallOperand, Ptr: StackSlot,
9309 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: SSFI),
9310 SVT: TLI.getMemValueType(DL, Ty));
9311 OpInfo.CallOperand = StackSlot;
9312
9313 return Chain;
9314}
9315
9316/// GetRegistersForValue - Assign registers (virtual or physical) for the
9317/// specified operand. We prefer to assign virtual registers, to allow the
9318/// register allocator to handle the assignment process. However, if the asm
9319/// uses features that we can't model on machineinstrs, we have SDISel do the
9320/// allocation. This produces generally horrible, but correct, code.
9321///
9322/// OpInfo describes the operand
9323/// RefOpInfo describes the matching operand if any, the operand otherwise
9324static std::optional<unsigned>
9325getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
9326 SDISelAsmOperandInfo &OpInfo,
9327 SDISelAsmOperandInfo &RefOpInfo) {
9328 LLVMContext &Context = *DAG.getContext();
9329 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9330
9331 MachineFunction &MF = DAG.getMachineFunction();
9332 SmallVector<unsigned, 4> Regs;
9333 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9334
9335 // No work to do for memory/address operands.
9336 if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
9337 OpInfo.ConstraintType == TargetLowering::C_Address)
9338 return std::nullopt;
9339
9340 // If this is a constraint for a single physreg, or a constraint for a
9341 // register class, find it.
9342 unsigned AssignedReg;
9343 const TargetRegisterClass *RC;
9344 std::tie(args&: AssignedReg, args&: RC) = TLI.getRegForInlineAsmConstraint(
9345 TRI: &TRI, Constraint: RefOpInfo.ConstraintCode, VT: RefOpInfo.ConstraintVT);
9346 // RC is unset only on failure. Return immediately.
9347 if (!RC)
9348 return std::nullopt;
9349
9350 // Get the actual register value type. This is important, because the user
9351 // may have asked for (e.g.) the AX register in i32 type. We need to
9352 // remember that AX is actually i16 to get the right extension.
9353 const MVT RegVT = *TRI.legalclasstypes_begin(RC: *RC);
9354
9355 if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
9356 // If this is an FP operand in an integer register (or visa versa), or more
9357 // generally if the operand value disagrees with the register class we plan
9358 // to stick it in, fix the operand type.
9359 //
9360 // If this is an input value, the bitcast to the new type is done now.
9361 // Bitcast for output value is done at the end of visitInlineAsm().
9362 if ((OpInfo.Type == InlineAsm::isOutput ||
9363 OpInfo.Type == InlineAsm::isInput) &&
9364 !TRI.isTypeLegalForClass(RC: *RC, T: OpInfo.ConstraintVT)) {
9365 // Try to convert to the first EVT that the reg class contains. If the
9366 // types are identical size, use a bitcast to convert (e.g. two differing
9367 // vector types). Note: output bitcast is done at the end of
9368 // visitInlineAsm().
9369 if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
9370 // Exclude indirect inputs while they are unsupported because the code
9371 // to perform the load is missing and thus OpInfo.CallOperand still
9372 // refers to the input address rather than the pointed-to value.
9373 if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
9374 OpInfo.CallOperand =
9375 DAG.getNode(Opcode: ISD::BITCAST, DL, VT: RegVT, Operand: OpInfo.CallOperand);
9376 OpInfo.ConstraintVT = RegVT;
9377 // If the operand is an FP value and we want it in integer registers,
9378 // use the corresponding integer type. This turns an f64 value into
9379 // i64, which can be passed with two i32 values on a 32-bit machine.
9380 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
9381 MVT VT = MVT::getIntegerVT(BitWidth: OpInfo.ConstraintVT.getSizeInBits());
9382 if (OpInfo.Type == InlineAsm::isInput)
9383 OpInfo.CallOperand =
9384 DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: OpInfo.CallOperand);
9385 OpInfo.ConstraintVT = VT;
9386 }
9387 }
9388 }
9389
9390 // No need to allocate a matching input constraint since the constraint it's
9391 // matching to has already been allocated.
9392 if (OpInfo.isMatchingInputConstraint())
9393 return std::nullopt;
9394
9395 EVT ValueVT = OpInfo.ConstraintVT;
9396 if (OpInfo.ConstraintVT == MVT::Other)
9397 ValueVT = RegVT;
9398
9399 // Initialize NumRegs.
9400 unsigned NumRegs = 1;
9401 if (OpInfo.ConstraintVT != MVT::Other)
9402 NumRegs = TLI.getNumRegisters(Context, VT: OpInfo.ConstraintVT, RegisterVT: RegVT);
9403
9404 // If this is a constraint for a specific physical register, like {r17},
9405 // assign it now.
9406
9407 // If this associated to a specific register, initialize iterator to correct
9408 // place. If virtual, make sure we have enough registers
9409
9410 // Initialize iterator if necessary
9411 TargetRegisterClass::iterator I = RC->begin();
9412 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9413
9414 // Do not check for single registers.
9415 if (AssignedReg) {
9416 I = std::find(first: I, last: RC->end(), val: AssignedReg);
9417 if (I == RC->end()) {
9418 // RC does not contain the selected register, which indicates a
9419 // mismatch between the register and the required type/bitwidth.
9420 return {AssignedReg};
9421 }
9422 }
9423
9424 for (; NumRegs; --NumRegs, ++I) {
9425 assert(I != RC->end() && "Ran out of registers to allocate!");
9426 Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RegClass: RC);
9427 Regs.push_back(Elt: R);
9428 }
9429
9430 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
9431 return std::nullopt;
9432}
9433
9434static unsigned
9435findMatchingInlineAsmOperand(unsigned OperandNo,
9436 const std::vector<SDValue> &AsmNodeOperands) {
9437 // Scan until we find the definition we already emitted of this operand.
9438 unsigned CurOp = InlineAsm::Op_FirstOperand;
9439 for (; OperandNo; --OperandNo) {
9440 // Advance to the next operand.
9441 unsigned OpFlag = AsmNodeOperands[CurOp]->getAsZExtVal();
9442 const InlineAsm::Flag F(OpFlag);
9443 assert(
9444 (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) &&
9445 "Skipped past definitions?");
9446 CurOp += F.getNumOperandRegisters() + 1;
9447 }
9448 return CurOp;
9449}
9450
9451namespace {
9452
9453class ExtraFlags {
9454 unsigned Flags = 0;
9455
9456public:
9457 explicit ExtraFlags(const CallBase &Call) {
9458 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
9459 if (IA->hasSideEffects())
9460 Flags |= InlineAsm::Extra_HasSideEffects;
9461 if (IA->isAlignStack())
9462 Flags |= InlineAsm::Extra_IsAlignStack;
9463 if (Call.isConvergent())
9464 Flags |= InlineAsm::Extra_IsConvergent;
9465 Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
9466 }
9467
9468 void update(const TargetLowering::AsmOperandInfo &OpInfo) {
9469 // Ideally, we would only check against memory constraints. However, the
9470 // meaning of an Other constraint can be target-specific and we can't easily
9471 // reason about it. Therefore, be conservative and set MayLoad/MayStore
9472 // for Other constraints as well.
9473 if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
9474 OpInfo.ConstraintType == TargetLowering::C_Other) {
9475 if (OpInfo.Type == InlineAsm::isInput)
9476 Flags |= InlineAsm::Extra_MayLoad;
9477 else if (OpInfo.Type == InlineAsm::isOutput)
9478 Flags |= InlineAsm::Extra_MayStore;
9479 else if (OpInfo.Type == InlineAsm::isClobber)
9480 Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
9481 }
9482 }
9483
9484 unsigned get() const { return Flags; }
9485};
9486
9487} // end anonymous namespace
9488
9489static bool isFunction(SDValue Op) {
9490 if (Op && Op.getOpcode() == ISD::GlobalAddress) {
9491 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
9492 auto Fn = dyn_cast_or_null<Function>(Val: GA->getGlobal());
9493
9494 // In normal "call dllimport func" instruction (non-inlineasm) it force
9495 // indirect access by specifing call opcode. And usually specially print
9496 // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can
9497 // not do in this way now. (In fact, this is similar with "Data Access"
9498 // action). So here we ignore dllimport function.
9499 if (Fn && !Fn->hasDLLImportStorageClass())
9500 return true;
9501 }
9502 }
9503 return false;
9504}
9505
9506/// visitInlineAsm - Handle a call to an InlineAsm object.
9507void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
9508 const BasicBlock *EHPadBB) {
9509 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
9510
9511 /// ConstraintOperands - Information about all of the constraints.
9512 SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
9513
9514 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9515 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
9516 DL: DAG.getDataLayout(), TRI: DAG.getSubtarget().getRegisterInfo(), Call);
9517
9518 // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
9519 // AsmDialect, MayLoad, MayStore).
9520 bool HasSideEffect = IA->hasSideEffects();
9521 ExtraFlags ExtraInfo(Call);
9522
9523 for (auto &T : TargetConstraints) {
9524 ConstraintOperands.push_back(Elt: SDISelAsmOperandInfo(T));
9525 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
9526
9527 if (OpInfo.CallOperandVal)
9528 OpInfo.CallOperand = getValue(V: OpInfo.CallOperandVal);
9529
9530 if (!HasSideEffect)
9531 HasSideEffect = OpInfo.hasMemory(TLI);
9532
9533 // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
9534 // FIXME: Could we compute this on OpInfo rather than T?
9535
9536 // Compute the constraint code and ConstraintType to use.
9537 TLI.ComputeConstraintToUse(OpInfo&: T, Op: SDValue());
9538
9539 if (T.ConstraintType == TargetLowering::C_Immediate &&
9540 OpInfo.CallOperand && !isa<ConstantSDNode>(Val: OpInfo.CallOperand))
9541 // We've delayed emitting a diagnostic like the "n" constraint because
9542 // inlining could cause an integer showing up.
9543 return emitInlineAsmError(Call, Message: "constraint '" + Twine(T.ConstraintCode) +
9544 "' expects an integer constant "
9545 "expression");
9546
9547 ExtraInfo.update(OpInfo: T);
9548 }
9549
9550 // We won't need to flush pending loads if this asm doesn't touch
9551 // memory and is nonvolatile.
9552 SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
9553
9554 bool EmitEHLabels = isa<InvokeInst>(Val: Call);
9555 if (EmitEHLabels) {
9556 assert(EHPadBB && "InvokeInst must have an EHPadBB");
9557 }
9558 bool IsCallBr = isa<CallBrInst>(Val: Call);
9559
9560 if (IsCallBr || EmitEHLabels) {
9561 // If this is a callbr or invoke we need to flush pending exports since
9562 // inlineasm_br and invoke are terminators.
9563 // We need to do this before nodes are glued to the inlineasm_br node.
9564 Chain = getControlRoot();
9565 }
9566
9567 MCSymbol *BeginLabel = nullptr;
9568 if (EmitEHLabels) {
9569 Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
9570 }
9571
9572 int OpNo = -1;
9573 SmallVector<StringRef> AsmStrs;
9574 IA->collectAsmStrs(AsmStrs);
9575
9576 // Second pass over the constraints: compute which constraint option to use.
9577 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
9578 if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput)
9579 OpNo++;
9580
9581 // If this is an output operand with a matching input operand, look up the
9582 // matching input. If their types mismatch, e.g. one is an integer, the
9583 // other is floating point, or their sizes are different, flag it as an
9584 // error.
9585 if (OpInfo.hasMatchingInput()) {
9586 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
9587 patchMatchingInput(OpInfo, MatchingOpInfo&: Input, DAG);
9588 }
9589
9590 // Compute the constraint code and ConstraintType to use.
9591 TLI.ComputeConstraintToUse(OpInfo, Op: OpInfo.CallOperand, DAG: &DAG);
9592
9593 if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
9594 OpInfo.Type == InlineAsm::isClobber) ||
9595 OpInfo.ConstraintType == TargetLowering::C_Address)
9596 continue;
9597
9598 // In Linux PIC model, there are 4 cases about value/label addressing:
9599 //
9600 // 1: Function call or Label jmp inside the module.
9601 // 2: Data access (such as global variable, static variable) inside module.
9602 // 3: Function call or Label jmp outside the module.
9603 // 4: Data access (such as global variable) outside the module.
9604 //
9605 // Due to current llvm inline asm architecture designed to not "recognize"
9606 // the asm code, there are quite troubles for us to treat mem addressing
9607 // differently for same value/adress used in different instuctions.
9608 // For example, in pic model, call a func may in plt way or direclty
9609 // pc-related, but lea/mov a function adress may use got.
9610 //
9611 // Here we try to "recognize" function call for the case 1 and case 3 in
9612 // inline asm. And try to adjust the constraint for them.
9613 //
9614 // TODO: Due to current inline asm didn't encourage to jmp to the outsider
9615 // label, so here we don't handle jmp function label now, but we need to
9616 // enhance it (especilly in PIC model) if we meet meaningful requirements.
9617 if (OpInfo.isIndirect && isFunction(Op: OpInfo.CallOperand) &&
9618 TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
9619 TM.getCodeModel() != CodeModel::Large) {
9620 OpInfo.isIndirect = false;
9621 OpInfo.ConstraintType = TargetLowering::C_Address;
9622 }
9623
9624 // If this is a memory input, and if the operand is not indirect, do what we
9625 // need to provide an address for the memory input.
9626 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
9627 !OpInfo.isIndirect) {
9628 assert((OpInfo.isMultipleAlternative ||
9629 (OpInfo.Type == InlineAsm::isInput)) &&
9630 "Can only indirectify direct input operands!");
9631
9632 // Memory operands really want the address of the value.
9633 Chain = getAddressForMemoryInput(Chain, Location: getCurSDLoc(), OpInfo, DAG);
9634
9635 // There is no longer a Value* corresponding to this operand.
9636 OpInfo.CallOperandVal = nullptr;
9637
9638 // It is now an indirect operand.
9639 OpInfo.isIndirect = true;
9640 }
9641
9642 }
9643
9644 // AsmNodeOperands - The operands for the ISD::INLINEASM node.
9645 std::vector<SDValue> AsmNodeOperands;
9646 AsmNodeOperands.push_back(x: SDValue()); // reserve space for input chain
9647 AsmNodeOperands.push_back(x: DAG.getTargetExternalSymbol(
9648 Sym: IA->getAsmString().c_str(), VT: TLI.getProgramPointerTy(DL: DAG.getDataLayout())));
9649
9650 // If we have a !srcloc metadata node associated with it, we want to attach
9651 // this to the ultimately generated inline asm machineinstr. To do this, we
9652 // pass in the third operand as this (potentially null) inline asm MDNode.
9653 const MDNode *SrcLoc = Call.getMetadata(Kind: "srcloc");
9654 AsmNodeOperands.push_back(x: DAG.getMDNode(MD: SrcLoc));
9655
9656 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
9657 // bits as operand 3.
9658 AsmNodeOperands.push_back(x: DAG.getTargetConstant(
9659 Val: ExtraInfo.get(), DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
9660
9661 // Third pass: Loop over operands to prepare DAG-level operands.. As part of
9662 // this, assign virtual and physical registers for inputs and otput.
9663 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
9664 // Assign Registers.
9665 SDISelAsmOperandInfo &RefOpInfo =
9666 OpInfo.isMatchingInputConstraint()
9667 ? ConstraintOperands[OpInfo.getMatchedOperand()]
9668 : OpInfo;
9669 const auto RegError =
9670 getRegistersForValue(DAG, DL: getCurSDLoc(), OpInfo, RefOpInfo);
9671 if (RegError) {
9672 const MachineFunction &MF = DAG.getMachineFunction();
9673 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9674 const char *RegName = TRI.getName(RegNo: *RegError);
9675 emitInlineAsmError(Call, Message: "register '" + Twine(RegName) +
9676 "' allocated for constraint '" +
9677 Twine(OpInfo.ConstraintCode) +
9678 "' does not match required type");
9679 return;
9680 }
9681
9682 auto DetectWriteToReservedRegister = [&]() {
9683 const MachineFunction &MF = DAG.getMachineFunction();
9684 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9685 for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
9686 if (Register::isPhysicalRegister(Reg) &&
9687 TRI.isInlineAsmReadOnlyReg(MF, PhysReg: Reg)) {
9688 const char *RegName = TRI.getName(RegNo: Reg);
9689 emitInlineAsmError(Call, Message: "write to reserved register '" +
9690 Twine(RegName) + "'");
9691 return true;
9692 }
9693 }
9694 return false;
9695 };
9696 assert((OpInfo.ConstraintType != TargetLowering::C_Address ||
9697 (OpInfo.Type == InlineAsm::isInput &&
9698 !OpInfo.isMatchingInputConstraint())) &&
9699 "Only address as input operand is allowed.");
9700
9701 switch (OpInfo.Type) {
9702 case InlineAsm::isOutput:
9703 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
9704 const InlineAsm::ConstraintCode ConstraintID =
9705 TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode);
9706 assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
9707 "Failed to convert memory constraint code to constraint id.");
9708
9709 // Add information to the INLINEASM node to know about this output.
9710 InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1);
9711 OpFlags.setMemConstraint(ConstraintID);
9712 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
9713 MVT::i32));
9714 AsmNodeOperands.push_back(x: OpInfo.CallOperand);
9715 } else {
9716 // Otherwise, this outputs to a register (directly for C_Register /
9717 // C_RegisterClass, and a target-defined fashion for
9718 // C_Immediate/C_Other). Find a register that we can use.
9719 if (OpInfo.AssignedRegs.Regs.empty()) {
9720 emitInlineAsmError(
9721 Call, Message: "couldn't allocate output register for constraint '" +
9722 Twine(OpInfo.ConstraintCode) + "'");
9723 return;
9724 }
9725
9726 if (DetectWriteToReservedRegister())
9727 return;
9728
9729 // Add information to the INLINEASM node to know that this register is
9730 // set.
9731 OpInfo.AssignedRegs.AddInlineAsmOperands(
9732 Code: OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber
9733 : InlineAsm::Kind::RegDef,
9734 HasMatching: false, MatchingIdx: 0, dl: getCurSDLoc(), DAG, Ops&: AsmNodeOperands);
9735 }
9736 break;
9737
9738 case InlineAsm::isInput:
9739 case InlineAsm::isLabel: {
9740 SDValue InOperandVal = OpInfo.CallOperand;
9741
9742 if (OpInfo.isMatchingInputConstraint()) {
9743 // If this is required to match an output register we have already set,
9744 // just use its register.
9745 auto CurOp = findMatchingInlineAsmOperand(OperandNo: OpInfo.getMatchedOperand(),
9746 AsmNodeOperands);
9747 InlineAsm::Flag Flag(AsmNodeOperands[CurOp]->getAsZExtVal());
9748 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
9749 if (OpInfo.isIndirect) {
9750 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
9751 emitInlineAsmError(Call, Message: "inline asm not supported yet: "
9752 "don't know how to handle tied "
9753 "indirect register inputs");
9754 return;
9755 }
9756
9757 SmallVector<unsigned, 4> Regs;
9758 MachineFunction &MF = DAG.getMachineFunction();
9759 MachineRegisterInfo &MRI = MF.getRegInfo();
9760 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9761 auto *R = cast<RegisterSDNode>(Val&: AsmNodeOperands[CurOp+1]);
9762 Register TiedReg = R->getReg();
9763 MVT RegVT = R->getSimpleValueType(ResNo: 0);
9764 const TargetRegisterClass *RC =
9765 TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
9766 : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
9767 : TRI.getMinimalPhysRegClass(TiedReg);
9768 for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i)
9769 Regs.push_back(Elt: MRI.createVirtualRegister(RegClass: RC));
9770
9771 RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
9772
9773 SDLoc dl = getCurSDLoc();
9774 // Use the produced MatchedRegs object to
9775 MatchedRegs.getCopyToRegs(Val: InOperandVal, DAG, dl, Chain, Glue: &Glue, V: &Call);
9776 MatchedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::RegUse, HasMatching: true,
9777 MatchingIdx: OpInfo.getMatchedOperand(), dl, DAG,
9778 Ops&: AsmNodeOperands);
9779 break;
9780 }
9781
9782 assert(Flag.isMemKind() && "Unknown matching constraint!");
9783 assert(Flag.getNumOperandRegisters() == 1 &&
9784 "Unexpected number of operands");
9785 // Add information to the INLINEASM node to know about this input.
9786 // See InlineAsm.h isUseOperandTiedToDef.
9787 Flag.clearMemConstraint();
9788 Flag.setMatchingOp(OpInfo.getMatchedOperand());
9789 AsmNodeOperands.push_back(x: DAG.getTargetConstant(
9790 Val: Flag, DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
9791 AsmNodeOperands.push_back(x: AsmNodeOperands[CurOp+1]);
9792 break;
9793 }
9794
9795 // Treat indirect 'X' constraint as memory.
9796 if (OpInfo.ConstraintType == TargetLowering::C_Other &&
9797 OpInfo.isIndirect)
9798 OpInfo.ConstraintType = TargetLowering::C_Memory;
9799
9800 if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
9801 OpInfo.ConstraintType == TargetLowering::C_Other) {
9802 std::vector<SDValue> Ops;
9803 TLI.LowerAsmOperandForConstraint(Op: InOperandVal, Constraint: OpInfo.ConstraintCode,
9804 Ops, DAG);
9805 if (Ops.empty()) {
9806 if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
9807 if (isa<ConstantSDNode>(Val: InOperandVal)) {
9808 emitInlineAsmError(Call, Message: "value out of range for constraint '" +
9809 Twine(OpInfo.ConstraintCode) + "'");
9810 return;
9811 }
9812
9813 emitInlineAsmError(Call,
9814 Message: "invalid operand for inline asm constraint '" +
9815 Twine(OpInfo.ConstraintCode) + "'");
9816 return;
9817 }
9818
9819 // Add information to the INLINEASM node to know about this input.
9820 InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size());
9821 AsmNodeOperands.push_back(x: DAG.getTargetConstant(
9822 Val: ResOpType, DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
9823 llvm::append_range(C&: AsmNodeOperands, R&: Ops);
9824 break;
9825 }
9826
9827 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
9828 assert((OpInfo.isIndirect ||
9829 OpInfo.ConstraintType != TargetLowering::C_Memory) &&
9830 "Operand must be indirect to be a mem!");
9831 assert(InOperandVal.getValueType() ==
9832 TLI.getPointerTy(DAG.getDataLayout()) &&
9833 "Memory operands expect pointer values");
9834
9835 const InlineAsm::ConstraintCode ConstraintID =
9836 TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode);
9837 assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
9838 "Failed to convert memory constraint code to constraint id.");
9839
9840 // Add information to the INLINEASM node to know about this input.
9841 InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
9842 ResOpType.setMemConstraint(ConstraintID);
9843 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
9844 getCurSDLoc(),
9845 MVT::i32));
9846 AsmNodeOperands.push_back(x: InOperandVal);
9847 break;
9848 }
9849
9850 if (OpInfo.ConstraintType == TargetLowering::C_Address) {
9851 const InlineAsm::ConstraintCode ConstraintID =
9852 TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode);
9853 assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
9854 "Failed to convert memory constraint code to constraint id.");
9855
9856 InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
9857
9858 SDValue AsmOp = InOperandVal;
9859 if (isFunction(Op: InOperandVal)) {
9860 auto *GA = cast<GlobalAddressSDNode>(Val&: InOperandVal);
9861 ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1);
9862 AsmOp = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: getCurSDLoc(),
9863 VT: InOperandVal.getValueType(),
9864 offset: GA->getOffset());
9865 }
9866
9867 // Add information to the INLINEASM node to know about this input.
9868 ResOpType.setMemConstraint(ConstraintID);
9869
9870 AsmNodeOperands.push_back(
9871 DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
9872
9873 AsmNodeOperands.push_back(x: AsmOp);
9874 break;
9875 }
9876
9877 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
9878 OpInfo.ConstraintType == TargetLowering::C_Register) &&
9879 "Unknown constraint type!");
9880
9881 // TODO: Support this.
9882 if (OpInfo.isIndirect) {
9883 emitInlineAsmError(
9884 Call, Message: "Don't know how to handle indirect register inputs yet "
9885 "for constraint '" +
9886 Twine(OpInfo.ConstraintCode) + "'");
9887 return;
9888 }
9889
9890 // Copy the input into the appropriate registers.
9891 if (OpInfo.AssignedRegs.Regs.empty()) {
9892 emitInlineAsmError(Call,
9893 Message: "couldn't allocate input reg for constraint '" +
9894 Twine(OpInfo.ConstraintCode) + "'");
9895 return;
9896 }
9897
9898 if (DetectWriteToReservedRegister())
9899 return;
9900
9901 SDLoc dl = getCurSDLoc();
9902
9903 OpInfo.AssignedRegs.getCopyToRegs(Val: InOperandVal, DAG, dl, Chain, Glue: &Glue,
9904 V: &Call);
9905
9906 OpInfo.AssignedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::RegUse, HasMatching: false,
9907 MatchingIdx: 0, dl, DAG, Ops&: AsmNodeOperands);
9908 break;
9909 }
9910 case InlineAsm::isClobber:
9911 // Add the clobbered value to the operand list, so that the register
9912 // allocator is aware that the physreg got clobbered.
9913 if (!OpInfo.AssignedRegs.Regs.empty())
9914 OpInfo.AssignedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::Clobber,
9915 HasMatching: false, MatchingIdx: 0, dl: getCurSDLoc(), DAG,
9916 Ops&: AsmNodeOperands);
9917 break;
9918 }
9919 }
9920
9921 // Finish up input operands. Set the input chain and add the flag last.
9922 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
9923 if (Glue.getNode()) AsmNodeOperands.push_back(x: Glue);
9924
9925 unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
9926 Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
9927 DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
9928 Glue = Chain.getValue(R: 1);
9929
9930 // Do additional work to generate outputs.
9931
9932 SmallVector<EVT, 1> ResultVTs;
9933 SmallVector<SDValue, 1> ResultValues;
9934 SmallVector<SDValue, 8> OutChains;
9935
9936 llvm::Type *CallResultType = Call.getType();
9937 ArrayRef<Type *> ResultTypes;
9938 if (StructType *StructResult = dyn_cast<StructType>(Val: CallResultType))
9939 ResultTypes = StructResult->elements();
9940 else if (!CallResultType->isVoidTy())
9941 ResultTypes = ArrayRef(CallResultType);
9942
9943 auto CurResultType = ResultTypes.begin();
9944 auto handleRegAssign = [&](SDValue V) {
9945 assert(CurResultType != ResultTypes.end() && "Unexpected value");
9946 assert((*CurResultType)->isSized() && "Unexpected unsized type");
9947 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: *CurResultType);
9948 ++CurResultType;
9949 // If the type of the inline asm call site return value is different but has
9950 // same size as the type of the asm output bitcast it. One example of this
9951 // is for vectors with different width / number of elements. This can
9952 // happen for register classes that can contain multiple different value
9953 // types. The preg or vreg allocated may not have the same VT as was
9954 // expected.
9955 //
9956 // This can also happen for a return value that disagrees with the register
9957 // class it is put in, eg. a double in a general-purpose register on a
9958 // 32-bit machine.
9959 if (ResultVT != V.getValueType() &&
9960 ResultVT.getSizeInBits() == V.getValueSizeInBits())
9961 V = DAG.getNode(Opcode: ISD::BITCAST, DL: getCurSDLoc(), VT: ResultVT, Operand: V);
9962 else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
9963 V.getValueType().isInteger()) {
9964 // If a result value was tied to an input value, the computed result
9965 // may have a wider width than the expected result. Extract the
9966 // relevant portion.
9967 V = DAG.getNode(Opcode: ISD::TRUNCATE, DL: getCurSDLoc(), VT: ResultVT, Operand: V);
9968 }
9969 assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
9970 ResultVTs.push_back(Elt: ResultVT);
9971 ResultValues.push_back(Elt: V);
9972 };
9973
9974 // Deal with output operands.
9975 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
9976 if (OpInfo.Type == InlineAsm::isOutput) {
9977 SDValue Val;
9978 // Skip trivial output operands.
9979 if (OpInfo.AssignedRegs.Regs.empty())
9980 continue;
9981
9982 switch (OpInfo.ConstraintType) {
9983 case TargetLowering::C_Register:
9984 case TargetLowering::C_RegisterClass:
9985 Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(),
9986 Chain, Glue: &Glue, V: &Call);
9987 break;
9988 case TargetLowering::C_Immediate:
9989 case TargetLowering::C_Other:
9990 Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, DL: getCurSDLoc(),
9991 OpInfo, DAG);
9992 break;
9993 case TargetLowering::C_Memory:
9994 break; // Already handled.
9995 case TargetLowering::C_Address:
9996 break; // Silence warning.
9997 case TargetLowering::C_Unknown:
9998 assert(false && "Unexpected unknown constraint");
9999 }
10000
10001 // Indirect output manifest as stores. Record output chains.
10002 if (OpInfo.isIndirect) {
10003 const Value *Ptr = OpInfo.CallOperandVal;
10004 assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
10005 SDValue Store = DAG.getStore(Chain, dl: getCurSDLoc(), Val, Ptr: getValue(V: Ptr),
10006 PtrInfo: MachinePointerInfo(Ptr));
10007 OutChains.push_back(Elt: Store);
10008 } else {
10009 // generate CopyFromRegs to associated registers.
10010 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
10011 if (Val.getOpcode() == ISD::MERGE_VALUES) {
10012 for (const SDValue &V : Val->op_values())
10013 handleRegAssign(V);
10014 } else
10015 handleRegAssign(Val);
10016 }
10017 }
10018 }
10019
10020 // Set results.
10021 if (!ResultValues.empty()) {
10022 assert(CurResultType == ResultTypes.end() &&
10023 "Mismatch in number of ResultTypes");
10024 assert(ResultValues.size() == ResultTypes.size() &&
10025 "Mismatch in number of output operands in asm result");
10026
10027 SDValue V = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
10028 VTList: DAG.getVTList(VTs: ResultVTs), Ops: ResultValues);
10029 setValue(V: &Call, NewN: V);
10030 }
10031
10032 // Collect store chains.
10033 if (!OutChains.empty())
10034 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
10035
10036 if (EmitEHLabels) {
10037 Chain = lowerEndEH(Chain, II: cast<InvokeInst>(Val: &Call), EHPadBB, BeginLabel);
10038 }
10039
10040 // Only Update Root if inline assembly has a memory effect.
10041 if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
10042 EmitEHLabels)
10043 DAG.setRoot(Chain);
10044}
10045
10046void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
10047 const Twine &Message) {
10048 LLVMContext &Ctx = *DAG.getContext();
10049 Ctx.emitError(I: &Call, ErrorStr: Message);
10050
10051 // Make sure we leave the DAG in a valid state
10052 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10053 SmallVector<EVT, 1> ValueVTs;
10054 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: Call.getType(), ValueVTs);
10055
10056 if (ValueVTs.empty())
10057 return;
10058
10059 SmallVector<SDValue, 1> Ops;
10060 for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
10061 Ops.push_back(Elt: DAG.getUNDEF(VT: ValueVTs[i]));
10062
10063 setValue(V: &Call, NewN: DAG.getMergeValues(Ops, dl: getCurSDLoc()));
10064}
10065
10066void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
10067 DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
10068 MVT::Other, getRoot(),
10069 getValue(I.getArgOperand(0)),
10070 DAG.getSrcValue(I.getArgOperand(0))));
10071}
10072
10073void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
10074 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10075 const DataLayout &DL = DAG.getDataLayout();
10076 SDValue V = DAG.getVAArg(
10077 VT: TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType()), dl: getCurSDLoc(),
10078 Chain: getRoot(), Ptr: getValue(V: I.getOperand(i_nocapture: 0)), SV: DAG.getSrcValue(v: I.getOperand(i_nocapture: 0)),
10079 Align: DL.getABITypeAlign(Ty: I.getType()).value());
10080 DAG.setRoot(V.getValue(R: 1));
10081
10082 if (I.getType()->isPointerTy())
10083 V = DAG.getPtrExtOrTrunc(
10084 Op: V, DL: getCurSDLoc(), VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()));
10085 setValue(V: &I, NewN: V);
10086}
10087
10088void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
10089 DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
10090 MVT::Other, getRoot(),
10091 getValue(I.getArgOperand(0)),
10092 DAG.getSrcValue(I.getArgOperand(0))));
10093}
10094
10095void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
10096 DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
10097 MVT::Other, getRoot(),
10098 getValue(I.getArgOperand(0)),
10099 getValue(I.getArgOperand(1)),
10100 DAG.getSrcValue(I.getArgOperand(0)),
10101 DAG.getSrcValue(I.getArgOperand(1))));
10102}
10103
10104SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
10105 const Instruction &I,
10106 SDValue Op) {
10107 const MDNode *Range = getRangeMetadata(I);
10108 if (!Range)
10109 return Op;
10110
10111 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Range);
10112 if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
10113 return Op;
10114
10115 APInt Lo = CR.getUnsignedMin();
10116 if (!Lo.isMinValue())
10117 return Op;
10118
10119 APInt Hi = CR.getUnsignedMax();
10120 unsigned Bits = std::max(a: Hi.getActiveBits(),
10121 b: static_cast<unsigned>(IntegerType::MIN_INT_BITS));
10122
10123 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Bits);
10124
10125 SDLoc SL = getCurSDLoc();
10126
10127 SDValue ZExt = DAG.getNode(Opcode: ISD::AssertZext, DL: SL, VT: Op.getValueType(), N1: Op,
10128 N2: DAG.getValueType(SmallVT));
10129 unsigned NumVals = Op.getNode()->getNumValues();
10130 if (NumVals == 1)
10131 return ZExt;
10132
10133 SmallVector<SDValue, 4> Ops;
10134
10135 Ops.push_back(Elt: ZExt);
10136 for (unsigned I = 1; I != NumVals; ++I)
10137 Ops.push_back(Elt: Op.getValue(R: I));
10138
10139 return DAG.getMergeValues(Ops, dl: SL);
10140}
10141
10142/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
10143/// the call being lowered.
10144///
10145/// This is a helper for lowering intrinsics that follow a target calling
10146/// convention or require stack pointer adjustment. Only a subset of the
10147/// intrinsic's operands need to participate in the calling convention.
10148void SelectionDAGBuilder::populateCallLoweringInfo(
10149 TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
10150 unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
10151 AttributeSet RetAttrs, bool IsPatchPoint) {
10152 TargetLowering::ArgListTy Args;
10153 Args.reserve(n: NumArgs);
10154
10155 // Populate the argument list.
10156 // Attributes for args start at offset 1, after the return attribute.
10157 for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
10158 ArgI != ArgE; ++ArgI) {
10159 const Value *V = Call->getOperand(i_nocapture: ArgI);
10160
10161 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
10162
10163 TargetLowering::ArgListEntry Entry;
10164 Entry.Node = getValue(V);
10165 Entry.Ty = V->getType();
10166 Entry.setAttributes(Call, ArgIdx: ArgI);
10167 Args.push_back(x: Entry);
10168 }
10169
10170 CLI.setDebugLoc(getCurSDLoc())
10171 .setChain(getRoot())
10172 .setCallee(CC: Call->getCallingConv(), ResultType: ReturnTy, Target: Callee, ArgsList: std::move(Args),
10173 ResultAttrs: RetAttrs)
10174 .setDiscardResult(Call->use_empty())
10175 .setIsPatchPoint(IsPatchPoint)
10176 .setIsPreallocated(
10177 Call->countOperandBundlesOfType(ID: LLVMContext::OB_preallocated) != 0);
10178}
10179
10180/// Add a stack map intrinsic call's live variable operands to a stackmap
10181/// or patchpoint target node's operand list.
10182///
10183/// Constants are converted to TargetConstants purely as an optimization to
10184/// avoid constant materialization and register allocation.
10185///
10186/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
10187/// generate addess computation nodes, and so FinalizeISel can convert the
10188/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
10189/// address materialization and register allocation, but may also be required
10190/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
10191/// alloca in the entry block, then the runtime may assume that the alloca's
10192/// StackMap location can be read immediately after compilation and that the
10193/// location is valid at any point during execution (this is similar to the
10194/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
10195/// only available in a register, then the runtime would need to trap when
10196/// execution reaches the StackMap in order to read the alloca's location.
10197static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
10198 const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
10199 SelectionDAGBuilder &Builder) {
10200 SelectionDAG &DAG = Builder.DAG;
10201 for (unsigned I = StartIdx; I < Call.arg_size(); I++) {
10202 SDValue Op = Builder.getValue(V: Call.getArgOperand(i: I));
10203
10204 // Things on the stack are pointer-typed, meaning that they are already
10205 // legal and can be emitted directly to target nodes.
10206 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op)) {
10207 Ops.push_back(Elt: DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: Op.getValueType()));
10208 } else {
10209 // Otherwise emit a target independent node to be legalised.
10210 Ops.push_back(Elt: Builder.getValue(V: Call.getArgOperand(i: I)));
10211 }
10212 }
10213}
10214
10215/// Lower llvm.experimental.stackmap.
10216void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
10217 // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
10218 // [live variables...])
10219
10220 assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
10221
10222 SDValue Chain, InGlue, Callee;
10223 SmallVector<SDValue, 32> Ops;
10224
10225 SDLoc DL = getCurSDLoc();
10226 Callee = getValue(V: CI.getCalledOperand());
10227
10228 // The stackmap intrinsic only records the live variables (the arguments
10229 // passed to it) and emits NOPS (if requested). Unlike the patchpoint
10230 // intrinsic, this won't be lowered to a function call. This means we don't
10231 // have to worry about calling conventions and target specific lowering code.
10232 // Instead we perform the call lowering right here.
10233 //
10234 // chain, flag = CALLSEQ_START(chain, 0, 0)
10235 // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
10236 // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
10237 //
10238 Chain = DAG.getCALLSEQ_START(Chain: getRoot(), InSize: 0, OutSize: 0, DL);
10239 InGlue = Chain.getValue(R: 1);
10240
10241 // Add the STACKMAP operands, starting with DAG house-keeping.
10242 Ops.push_back(Elt: Chain);
10243 Ops.push_back(Elt: InGlue);
10244
10245 // Add the <id>, <numShadowBytes> operands.
10246 //
10247 // These do not require legalisation, and can be emitted directly to target
10248 // constant nodes.
10249 SDValue ID = getValue(V: CI.getArgOperand(i: 0));
10250 assert(ID.getValueType() == MVT::i64);
10251 SDValue IDConst =
10252 DAG.getTargetConstant(Val: ID->getAsZExtVal(), DL, VT: ID.getValueType());
10253 Ops.push_back(Elt: IDConst);
10254
10255 SDValue Shad = getValue(V: CI.getArgOperand(i: 1));
10256 assert(Shad.getValueType() == MVT::i32);
10257 SDValue ShadConst =
10258 DAG.getTargetConstant(Val: Shad->getAsZExtVal(), DL, VT: Shad.getValueType());
10259 Ops.push_back(Elt: ShadConst);
10260
10261 // Add the live variables.
10262 addStackMapLiveVars(Call: CI, StartIdx: 2, DL, Ops, Builder&: *this);
10263
10264 // Create the STACKMAP node.
10265 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10266 Chain = DAG.getNode(Opcode: ISD::STACKMAP, DL, VTList: NodeTys, Ops);
10267 InGlue = Chain.getValue(R: 1);
10268
10269 Chain = DAG.getCALLSEQ_END(Chain, Size1: 0, Size2: 0, Glue: InGlue, DL);
10270
10271 // Stackmaps don't generate values, so nothing goes into the NodeMap.
10272
10273 // Set the root to the target-lowered call chain.
10274 DAG.setRoot(Chain);
10275
10276 // Inform the Frame Information that we have a stackmap in this function.
10277 FuncInfo.MF->getFrameInfo().setHasStackMap();
10278}
10279
10280/// Lower llvm.experimental.patchpoint directly to its target opcode.
10281void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
10282 const BasicBlock *EHPadBB) {
10283 // <ty> @llvm.experimental.patchpoint.<ty>(i64 <id>,
10284 // i32 <numBytes>,
10285 // i8* <target>,
10286 // i32 <numArgs>,
10287 // [Args...],
10288 // [live variables...])
10289
10290 CallingConv::ID CC = CB.getCallingConv();
10291 bool IsAnyRegCC = CC == CallingConv::AnyReg;
10292 bool HasDef = !CB.getType()->isVoidTy();
10293 SDLoc dl = getCurSDLoc();
10294 SDValue Callee = getValue(V: CB.getArgOperand(i: PatchPointOpers::TargetPos));
10295
10296 // Handle immediate and symbolic callees.
10297 if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Val&: Callee))
10298 Callee = DAG.getIntPtrConstant(Val: ConstCallee->getZExtValue(), DL: dl,
10299 /*isTarget=*/true);
10300 else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Val&: Callee))
10301 Callee = DAG.getTargetGlobalAddress(GV: SymbolicCallee->getGlobal(),
10302 DL: SDLoc(SymbolicCallee),
10303 VT: SymbolicCallee->getValueType(ResNo: 0));
10304
10305 // Get the real number of arguments participating in the call <numArgs>
10306 SDValue NArgVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::NArgPos));
10307 unsigned NumArgs = NArgVal->getAsZExtVal();
10308
10309 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
10310 // Intrinsics include all meta-operands up to but not including CC.
10311 unsigned NumMetaOpers = PatchPointOpers::CCPos;
10312 assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
10313 "Not enough arguments provided to the patchpoint intrinsic");
10314
10315 // For AnyRegCC the arguments are lowered later on manually.
10316 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
10317 Type *ReturnTy =
10318 IsAnyRegCC ? Type::getVoidTy(C&: *DAG.getContext()) : CB.getType();
10319
10320 TargetLowering::CallLoweringInfo CLI(DAG);
10321 populateCallLoweringInfo(CLI, Call: &CB, ArgIdx: NumMetaOpers, NumArgs: NumCallArgs, Callee,
10322 ReturnTy, RetAttrs: CB.getAttributes().getRetAttrs(), IsPatchPoint: true);
10323 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
10324
10325 SDNode *CallEnd = Result.second.getNode();
10326 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
10327 CallEnd = CallEnd->getOperand(Num: 0).getNode();
10328
10329 /// Get a call instruction from the call sequence chain.
10330 /// Tail calls are not allowed.
10331 assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
10332 "Expected a callseq node.");
10333 SDNode *Call = CallEnd->getOperand(Num: 0).getNode();
10334 bool HasGlue = Call->getGluedNode();
10335
10336 // Replace the target specific call node with the patchable intrinsic.
10337 SmallVector<SDValue, 8> Ops;
10338
10339 // Push the chain.
10340 Ops.push_back(Elt: *(Call->op_begin()));
10341
10342 // Optionally, push the glue (if any).
10343 if (HasGlue)
10344 Ops.push_back(Elt: *(Call->op_end() - 1));
10345
10346 // Push the register mask info.
10347 if (HasGlue)
10348 Ops.push_back(Elt: *(Call->op_end() - 2));
10349 else
10350 Ops.push_back(Elt: *(Call->op_end() - 1));
10351
10352 // Add the <id> and <numBytes> constants.
10353 SDValue IDVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::IDPos));
10354 Ops.push_back(DAG.getTargetConstant(IDVal->getAsZExtVal(), dl, MVT::i64));
10355 SDValue NBytesVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::NBytesPos));
10356 Ops.push_back(DAG.getTargetConstant(NBytesVal->getAsZExtVal(), dl, MVT::i32));
10357
10358 // Add the callee.
10359 Ops.push_back(Elt: Callee);
10360
10361 // Adjust <numArgs> to account for any arguments that have been passed on the
10362 // stack instead.
10363 // Call Node: Chain, Target, {Args}, RegMask, [Glue]
10364 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
10365 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
10366 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
10367
10368 // Add the calling convention
10369 Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
10370
10371 // Add the arguments we omitted previously. The register allocator should
10372 // place these in any free register.
10373 if (IsAnyRegCC)
10374 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
10375 Ops.push_back(Elt: getValue(V: CB.getArgOperand(i)));
10376
10377 // Push the arguments from the call instruction.
10378 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
10379 Ops.append(in_start: Call->op_begin() + 2, in_end: e);
10380
10381 // Push live variables for the stack map.
10382 addStackMapLiveVars(Call: CB, StartIdx: NumMetaOpers + NumArgs, DL: dl, Ops, Builder&: *this);
10383
10384 SDVTList NodeTys;
10385 if (IsAnyRegCC && HasDef) {
10386 // Create the return types based on the intrinsic definition
10387 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10388 SmallVector<EVT, 3> ValueVTs;
10389 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: CB.getType(), ValueVTs);
10390 assert(ValueVTs.size() == 1 && "Expected only one return value type.");
10391
10392 // There is always a chain and a glue type at the end
10393 ValueVTs.push_back(MVT::Other);
10394 ValueVTs.push_back(MVT::Glue);
10395 NodeTys = DAG.getVTList(VTs: ValueVTs);
10396 } else
10397 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10398
10399 // Replace the target specific call node with a PATCHPOINT node.
10400 SDValue PPV = DAG.getNode(Opcode: ISD::PATCHPOINT, DL: dl, VTList: NodeTys, Ops);
10401
10402 // Update the NodeMap.
10403 if (HasDef) {
10404 if (IsAnyRegCC)
10405 setValue(V: &CB, NewN: SDValue(PPV.getNode(), 0));
10406 else
10407 setValue(V: &CB, NewN: Result.first);
10408 }
10409
10410 // Fixup the consumers of the intrinsic. The chain and glue may be used in the
10411 // call sequence. Furthermore the location of the chain and glue can change
10412 // when the AnyReg calling convention is used and the intrinsic returns a
10413 // value.
10414 if (IsAnyRegCC && HasDef) {
10415 SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
10416 SDValue To[] = {PPV.getValue(R: 1), PPV.getValue(R: 2)};
10417 DAG.ReplaceAllUsesOfValuesWith(From, To, Num: 2);
10418 } else
10419 DAG.ReplaceAllUsesWith(From: Call, To: PPV.getNode());
10420 DAG.DeleteNode(N: Call);
10421
10422 // Inform the Frame Information that we have a patchpoint in this function.
10423 FuncInfo.MF->getFrameInfo().setHasPatchPoint();
10424}
10425
10426void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
10427 unsigned Intrinsic) {
10428 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10429 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
10430 SDValue Op2;
10431 if (I.arg_size() > 1)
10432 Op2 = getValue(V: I.getArgOperand(i: 1));
10433 SDLoc dl = getCurSDLoc();
10434 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
10435 SDValue Res;
10436 SDNodeFlags SDFlags;
10437 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &I))
10438 SDFlags.copyFMF(FPMO: *FPMO);
10439
10440 switch (Intrinsic) {
10441 case Intrinsic::vector_reduce_fadd:
10442 if (SDFlags.hasAllowReassociation())
10443 Res = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT, N1: Op1,
10444 N2: DAG.getNode(Opcode: ISD::VECREDUCE_FADD, DL: dl, VT, Operand: Op2, Flags: SDFlags),
10445 Flags: SDFlags);
10446 else
10447 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SEQ_FADD, DL: dl, VT, N1: Op1, N2: Op2, Flags: SDFlags);
10448 break;
10449 case Intrinsic::vector_reduce_fmul:
10450 if (SDFlags.hasAllowReassociation())
10451 Res = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT, N1: Op1,
10452 N2: DAG.getNode(Opcode: ISD::VECREDUCE_FMUL, DL: dl, VT, Operand: Op2, Flags: SDFlags),
10453 Flags: SDFlags);
10454 else
10455 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SEQ_FMUL, DL: dl, VT, N1: Op1, N2: Op2, Flags: SDFlags);
10456 break;
10457 case Intrinsic::vector_reduce_add:
10458 Res = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL: dl, VT, Operand: Op1);
10459 break;
10460 case Intrinsic::vector_reduce_mul:
10461 Res = DAG.getNode(Opcode: ISD::VECREDUCE_MUL, DL: dl, VT, Operand: Op1);
10462 break;
10463 case Intrinsic::vector_reduce_and:
10464 Res = DAG.getNode(Opcode: ISD::VECREDUCE_AND, DL: dl, VT, Operand: Op1);
10465 break;
10466 case Intrinsic::vector_reduce_or:
10467 Res = DAG.getNode(Opcode: ISD::VECREDUCE_OR, DL: dl, VT, Operand: Op1);
10468 break;
10469 case Intrinsic::vector_reduce_xor:
10470 Res = DAG.getNode(Opcode: ISD::VECREDUCE_XOR, DL: dl, VT, Operand: Op1);
10471 break;
10472 case Intrinsic::vector_reduce_smax:
10473 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SMAX, DL: dl, VT, Operand: Op1);
10474 break;
10475 case Intrinsic::vector_reduce_smin:
10476 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SMIN, DL: dl, VT, Operand: Op1);
10477 break;
10478 case Intrinsic::vector_reduce_umax:
10479 Res = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL: dl, VT, Operand: Op1);
10480 break;
10481 case Intrinsic::vector_reduce_umin:
10482 Res = DAG.getNode(Opcode: ISD::VECREDUCE_UMIN, DL: dl, VT, Operand: Op1);
10483 break;
10484 case Intrinsic::vector_reduce_fmax:
10485 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMAX, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10486 break;
10487 case Intrinsic::vector_reduce_fmin:
10488 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMIN, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10489 break;
10490 case Intrinsic::vector_reduce_fmaximum:
10491 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMAXIMUM, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10492 break;
10493 case Intrinsic::vector_reduce_fminimum:
10494 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMINIMUM, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10495 break;
10496 default:
10497 llvm_unreachable("Unhandled vector reduce intrinsic");
10498 }
10499 setValue(V: &I, NewN: Res);
10500}
10501
10502/// Returns an AttributeList representing the attributes applied to the return
10503/// value of the given call.
10504static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
10505 SmallVector<Attribute::AttrKind, 2> Attrs;
10506 if (CLI.RetSExt)
10507 Attrs.push_back(Attribute::SExt);
10508 if (CLI.RetZExt)
10509 Attrs.push_back(Attribute::ZExt);
10510 if (CLI.IsInReg)
10511 Attrs.push_back(Attribute::InReg);
10512
10513 return AttributeList::get(C&: CLI.RetTy->getContext(), Index: AttributeList::ReturnIndex,
10514 Kinds: Attrs);
10515}
10516
10517/// TargetLowering::LowerCallTo - This is the default LowerCallTo
10518/// implementation, which just calls LowerCall.
10519/// FIXME: When all targets are
10520/// migrated to using LowerCall, this hook should be integrated into SDISel.
10521std::pair<SDValue, SDValue>
10522TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
10523 // Handle the incoming return values from the call.
10524 CLI.Ins.clear();
10525 Type *OrigRetTy = CLI.RetTy;
10526 SmallVector<EVT, 4> RetTys;
10527 SmallVector<TypeSize, 4> Offsets;
10528 auto &DL = CLI.DAG.getDataLayout();
10529 ComputeValueVTs(TLI: *this, DL, Ty: CLI.RetTy, ValueVTs&: RetTys, Offsets: &Offsets);
10530
10531 if (CLI.IsPostTypeLegalization) {
10532 // If we are lowering a libcall after legalization, split the return type.
10533 SmallVector<EVT, 4> OldRetTys;
10534 SmallVector<TypeSize, 4> OldOffsets;
10535 RetTys.swap(RHS&: OldRetTys);
10536 Offsets.swap(RHS&: OldOffsets);
10537
10538 for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
10539 EVT RetVT = OldRetTys[i];
10540 uint64_t Offset = OldOffsets[i];
10541 MVT RegisterVT = getRegisterType(Context&: CLI.RetTy->getContext(), VT: RetVT);
10542 unsigned NumRegs = getNumRegisters(Context&: CLI.RetTy->getContext(), VT: RetVT);
10543 unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
10544 RetTys.append(NumInputs: NumRegs, Elt: RegisterVT);
10545 for (unsigned j = 0; j != NumRegs; ++j)
10546 Offsets.push_back(Elt: TypeSize::getFixed(ExactSize: Offset + j * RegisterVTByteSZ));
10547 }
10548 }
10549
10550 SmallVector<ISD::OutputArg, 4> Outs;
10551 GetReturnInfo(CC: CLI.CallConv, ReturnType: CLI.RetTy, attr: getReturnAttrs(CLI), Outs, TLI: *this, DL);
10552
10553 bool CanLowerReturn =
10554 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
10555 CLI.IsVarArg, Outs, CLI.RetTy->getContext());
10556
10557 SDValue DemoteStackSlot;
10558 int DemoteStackIdx = -100;
10559 if (!CanLowerReturn) {
10560 // FIXME: equivalent assert?
10561 // assert(!CS.hasInAllocaArgument() &&
10562 // "sret demotion is incompatible with inalloca");
10563 uint64_t TySize = DL.getTypeAllocSize(Ty: CLI.RetTy);
10564 Align Alignment = DL.getPrefTypeAlign(Ty: CLI.RetTy);
10565 MachineFunction &MF = CLI.DAG.getMachineFunction();
10566 DemoteStackIdx =
10567 MF.getFrameInfo().CreateStackObject(Size: TySize, Alignment, isSpillSlot: false);
10568 Type *StackSlotPtrType = PointerType::get(ElementType: CLI.RetTy,
10569 AddressSpace: DL.getAllocaAddrSpace());
10570
10571 DemoteStackSlot = CLI.DAG.getFrameIndex(FI: DemoteStackIdx, VT: getFrameIndexTy(DL));
10572 ArgListEntry Entry;
10573 Entry.Node = DemoteStackSlot;
10574 Entry.Ty = StackSlotPtrType;
10575 Entry.IsSExt = false;
10576 Entry.IsZExt = false;
10577 Entry.IsInReg = false;
10578 Entry.IsSRet = true;
10579 Entry.IsNest = false;
10580 Entry.IsByVal = false;
10581 Entry.IsByRef = false;
10582 Entry.IsReturned = false;
10583 Entry.IsSwiftSelf = false;
10584 Entry.IsSwiftAsync = false;
10585 Entry.IsSwiftError = false;
10586 Entry.IsCFGuardTarget = false;
10587 Entry.Alignment = Alignment;
10588 CLI.getArgs().insert(position: CLI.getArgs().begin(), x: Entry);
10589 CLI.NumFixedArgs += 1;
10590 CLI.getArgs()[0].IndirectType = CLI.RetTy;
10591 CLI.RetTy = Type::getVoidTy(C&: CLI.RetTy->getContext());
10592
10593 // sret demotion isn't compatible with tail-calls, since the sret argument
10594 // points into the callers stack frame.
10595 CLI.IsTailCall = false;
10596 } else {
10597 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
10598 Ty: CLI.RetTy, CallConv: CLI.CallConv, isVarArg: CLI.IsVarArg, DL);
10599 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
10600 ISD::ArgFlagsTy Flags;
10601 if (NeedsRegBlock) {
10602 Flags.setInConsecutiveRegs();
10603 if (I == RetTys.size() - 1)
10604 Flags.setInConsecutiveRegsLast();
10605 }
10606 EVT VT = RetTys[I];
10607 MVT RegisterVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(),
10608 CC: CLI.CallConv, VT);
10609 unsigned NumRegs = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(),
10610 CC: CLI.CallConv, VT);
10611 for (unsigned i = 0; i != NumRegs; ++i) {
10612 ISD::InputArg MyFlags;
10613 MyFlags.Flags = Flags;
10614 MyFlags.VT = RegisterVT;
10615 MyFlags.ArgVT = VT;
10616 MyFlags.Used = CLI.IsReturnValueUsed;
10617 if (CLI.RetTy->isPointerTy()) {
10618 MyFlags.Flags.setPointer();
10619 MyFlags.Flags.setPointerAddrSpace(
10620 cast<PointerType>(Val: CLI.RetTy)->getAddressSpace());
10621 }
10622 if (CLI.RetSExt)
10623 MyFlags.Flags.setSExt();
10624 if (CLI.RetZExt)
10625 MyFlags.Flags.setZExt();
10626 if (CLI.IsInReg)
10627 MyFlags.Flags.setInReg();
10628 CLI.Ins.push_back(Elt: MyFlags);
10629 }
10630 }
10631 }
10632
10633 // We push in swifterror return as the last element of CLI.Ins.
10634 ArgListTy &Args = CLI.getArgs();
10635 if (supportSwiftError()) {
10636 for (const ArgListEntry &Arg : Args) {
10637 if (Arg.IsSwiftError) {
10638 ISD::InputArg MyFlags;
10639 MyFlags.VT = getPointerTy(DL);
10640 MyFlags.ArgVT = EVT(getPointerTy(DL));
10641 MyFlags.Flags.setSwiftError();
10642 CLI.Ins.push_back(Elt: MyFlags);
10643 }
10644 }
10645 }
10646
10647 // Handle all of the outgoing arguments.
10648 CLI.Outs.clear();
10649 CLI.OutVals.clear();
10650 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
10651 SmallVector<EVT, 4> ValueVTs;
10652 ComputeValueVTs(TLI: *this, DL, Ty: Args[i].Ty, ValueVTs);
10653 // FIXME: Split arguments if CLI.IsPostTypeLegalization
10654 Type *FinalType = Args[i].Ty;
10655 if (Args[i].IsByVal)
10656 FinalType = Args[i].IndirectType;
10657 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
10658 Ty: FinalType, CallConv: CLI.CallConv, isVarArg: CLI.IsVarArg, DL);
10659 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
10660 ++Value) {
10661 EVT VT = ValueVTs[Value];
10662 Type *ArgTy = VT.getTypeForEVT(Context&: CLI.RetTy->getContext());
10663 SDValue Op = SDValue(Args[i].Node.getNode(),
10664 Args[i].Node.getResNo() + Value);
10665 ISD::ArgFlagsTy Flags;
10666
10667 // Certain targets (such as MIPS), may have a different ABI alignment
10668 // for a type depending on the context. Give the target a chance to
10669 // specify the alignment it wants.
10670 const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
10671 Flags.setOrigAlign(OriginalAlignment);
10672
10673 if (Args[i].Ty->isPointerTy()) {
10674 Flags.setPointer();
10675 Flags.setPointerAddrSpace(
10676 cast<PointerType>(Val: Args[i].Ty)->getAddressSpace());
10677 }
10678 if (Args[i].IsZExt)
10679 Flags.setZExt();
10680 if (Args[i].IsSExt)
10681 Flags.setSExt();
10682 if (Args[i].IsInReg) {
10683 // If we are using vectorcall calling convention, a structure that is
10684 // passed InReg - is surely an HVA
10685 if (CLI.CallConv == CallingConv::X86_VectorCall &&
10686 isa<StructType>(Val: FinalType)) {
10687 // The first value of a structure is marked
10688 if (0 == Value)
10689 Flags.setHvaStart();
10690 Flags.setHva();
10691 }
10692 // Set InReg Flag
10693 Flags.setInReg();
10694 }
10695 if (Args[i].IsSRet)
10696 Flags.setSRet();
10697 if (Args[i].IsSwiftSelf)
10698 Flags.setSwiftSelf();
10699 if (Args[i].IsSwiftAsync)
10700 Flags.setSwiftAsync();
10701 if (Args[i].IsSwiftError)
10702 Flags.setSwiftError();
10703 if (Args[i].IsCFGuardTarget)
10704 Flags.setCFGuardTarget();
10705 if (Args[i].IsByVal)
10706 Flags.setByVal();
10707 if (Args[i].IsByRef)
10708 Flags.setByRef();
10709 if (Args[i].IsPreallocated) {
10710 Flags.setPreallocated();
10711 // Set the byval flag for CCAssignFn callbacks that don't know about
10712 // preallocated. This way we can know how many bytes we should've
10713 // allocated and how many bytes a callee cleanup function will pop. If
10714 // we port preallocated to more targets, we'll have to add custom
10715 // preallocated handling in the various CC lowering callbacks.
10716 Flags.setByVal();
10717 }
10718 if (Args[i].IsInAlloca) {
10719 Flags.setInAlloca();
10720 // Set the byval flag for CCAssignFn callbacks that don't know about
10721 // inalloca. This way we can know how many bytes we should've allocated
10722 // and how many bytes a callee cleanup function will pop. If we port
10723 // inalloca to more targets, we'll have to add custom inalloca handling
10724 // in the various CC lowering callbacks.
10725 Flags.setByVal();
10726 }
10727 Align MemAlign;
10728 if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
10729 unsigned FrameSize = DL.getTypeAllocSize(Ty: Args[i].IndirectType);
10730 Flags.setByValSize(FrameSize);
10731
10732 // info is not there but there are cases it cannot get right.
10733 if (auto MA = Args[i].Alignment)
10734 MemAlign = *MA;
10735 else
10736 MemAlign = Align(getByValTypeAlignment(Ty: Args[i].IndirectType, DL));
10737 } else if (auto MA = Args[i].Alignment) {
10738 MemAlign = *MA;
10739 } else {
10740 MemAlign = OriginalAlignment;
10741 }
10742 Flags.setMemAlign(MemAlign);
10743 if (Args[i].IsNest)
10744 Flags.setNest();
10745 if (NeedsRegBlock)
10746 Flags.setInConsecutiveRegs();
10747
10748 MVT PartVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(),
10749 CC: CLI.CallConv, VT);
10750 unsigned NumParts = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(),
10751 CC: CLI.CallConv, VT);
10752 SmallVector<SDValue, 4> Parts(NumParts);
10753 ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
10754
10755 if (Args[i].IsSExt)
10756 ExtendKind = ISD::SIGN_EXTEND;
10757 else if (Args[i].IsZExt)
10758 ExtendKind = ISD::ZERO_EXTEND;
10759
10760 // Conservatively only handle 'returned' on non-vectors that can be lowered,
10761 // for now.
10762 if (Args[i].IsReturned && !Op.getValueType().isVector() &&
10763 CanLowerReturn) {
10764 assert((CLI.RetTy == Args[i].Ty ||
10765 (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
10766 CLI.RetTy->getPointerAddressSpace() ==
10767 Args[i].Ty->getPointerAddressSpace())) &&
10768 RetTys.size() == NumValues && "unexpected use of 'returned'");
10769 // Before passing 'returned' to the target lowering code, ensure that
10770 // either the register MVT and the actual EVT are the same size or that
10771 // the return value and argument are extended in the same way; in these
10772 // cases it's safe to pass the argument register value unchanged as the
10773 // return register value (although it's at the target's option whether
10774 // to do so)
10775 // TODO: allow code generation to take advantage of partially preserved
10776 // registers rather than clobbering the entire register when the
10777 // parameter extension method is not compatible with the return
10778 // extension method
10779 if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
10780 (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
10781 CLI.RetZExt == Args[i].IsZExt))
10782 Flags.setReturned();
10783 }
10784
10785 getCopyToParts(DAG&: CLI.DAG, DL: CLI.DL, Val: Op, Parts: &Parts[0], NumParts, PartVT, V: CLI.CB,
10786 CallConv: CLI.CallConv, ExtendKind);
10787
10788 for (unsigned j = 0; j != NumParts; ++j) {
10789 // if it isn't first piece, alignment must be 1
10790 // For scalable vectors the scalable part is currently handled
10791 // by individual targets, so we just use the known minimum size here.
10792 ISD::OutputArg MyFlags(
10793 Flags, Parts[j].getValueType().getSimpleVT(), VT,
10794 i < CLI.NumFixedArgs, i,
10795 j * Parts[j].getValueType().getStoreSize().getKnownMinValue());
10796 if (NumParts > 1 && j == 0)
10797 MyFlags.Flags.setSplit();
10798 else if (j != 0) {
10799 MyFlags.Flags.setOrigAlign(Align(1));
10800 if (j == NumParts - 1)
10801 MyFlags.Flags.setSplitEnd();
10802 }
10803
10804 CLI.Outs.push_back(Elt: MyFlags);
10805 CLI.OutVals.push_back(Elt: Parts[j]);
10806 }
10807
10808 if (NeedsRegBlock && Value == NumValues - 1)
10809 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
10810 }
10811 }
10812
10813 SmallVector<SDValue, 4> InVals;
10814 CLI.Chain = LowerCall(CLI, InVals);
10815
10816 // Update CLI.InVals to use outside of this function.
10817 CLI.InVals = InVals;
10818
10819 // Verify that the target's LowerCall behaved as expected.
10820 assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
10821 "LowerCall didn't return a valid chain!");
10822 assert((!CLI.IsTailCall || InVals.empty()) &&
10823 "LowerCall emitted a return value for a tail call!");
10824 assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
10825 "LowerCall didn't emit the correct number of values!");
10826
10827 // For a tail call, the return value is merely live-out and there aren't
10828 // any nodes in the DAG representing it. Return a special value to
10829 // indicate that a tail call has been emitted and no more Instructions
10830 // should be processed in the current block.
10831 if (CLI.IsTailCall) {
10832 CLI.DAG.setRoot(CLI.Chain);
10833 return std::make_pair(x: SDValue(), y: SDValue());
10834 }
10835
10836#ifndef NDEBUG
10837 for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
10838 assert(InVals[i].getNode() && "LowerCall emitted a null value!");
10839 assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
10840 "LowerCall emitted a value with the wrong type!");
10841 }
10842#endif
10843
10844 SmallVector<SDValue, 4> ReturnValues;
10845 if (!CanLowerReturn) {
10846 // The instruction result is the result of loading from the
10847 // hidden sret parameter.
10848 SmallVector<EVT, 1> PVTs;
10849 Type *PtrRetTy =
10850 PointerType::get(C&: OrigRetTy->getContext(), AddressSpace: DL.getAllocaAddrSpace());
10851
10852 ComputeValueVTs(TLI: *this, DL, Ty: PtrRetTy, ValueVTs&: PVTs);
10853 assert(PVTs.size() == 1 && "Pointers should fit in one register");
10854 EVT PtrVT = PVTs[0];
10855
10856 unsigned NumValues = RetTys.size();
10857 ReturnValues.resize(N: NumValues);
10858 SmallVector<SDValue, 4> Chains(NumValues);
10859
10860 // An aggregate return value cannot wrap around the address space, so
10861 // offsets to its parts don't wrap either.
10862 SDNodeFlags Flags;
10863 Flags.setNoUnsignedWrap(true);
10864
10865 MachineFunction &MF = CLI.DAG.getMachineFunction();
10866 Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(ObjectIdx: DemoteStackIdx);
10867 for (unsigned i = 0; i < NumValues; ++i) {
10868 SDValue Add = CLI.DAG.getNode(Opcode: ISD::ADD, DL: CLI.DL, VT: PtrVT, N1: DemoteStackSlot,
10869 N2: CLI.DAG.getConstant(Val: Offsets[i], DL: CLI.DL,
10870 VT: PtrVT), Flags);
10871 SDValue L = CLI.DAG.getLoad(
10872 VT: RetTys[i], dl: CLI.DL, Chain: CLI.Chain, Ptr: Add,
10873 PtrInfo: MachinePointerInfo::getFixedStack(MF&: CLI.DAG.getMachineFunction(),
10874 FI: DemoteStackIdx, Offset: Offsets[i]),
10875 Alignment: HiddenSRetAlign);
10876 ReturnValues[i] = L;
10877 Chains[i] = L.getValue(R: 1);
10878 }
10879
10880 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
10881 } else {
10882 // Collect the legal value parts into potentially illegal values
10883 // that correspond to the original function's return values.
10884 std::optional<ISD::NodeType> AssertOp;
10885 if (CLI.RetSExt)
10886 AssertOp = ISD::AssertSext;
10887 else if (CLI.RetZExt)
10888 AssertOp = ISD::AssertZext;
10889 unsigned CurReg = 0;
10890 for (EVT VT : RetTys) {
10891 MVT RegisterVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(),
10892 CC: CLI.CallConv, VT);
10893 unsigned NumRegs = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(),
10894 CC: CLI.CallConv, VT);
10895
10896 ReturnValues.push_back(Elt: getCopyFromParts(
10897 DAG&: CLI.DAG, DL: CLI.DL, Parts: &InVals[CurReg], NumParts: NumRegs, PartVT: RegisterVT, ValueVT: VT, V: nullptr,
10898 InChain: CLI.Chain, CC: CLI.CallConv, AssertOp));
10899 CurReg += NumRegs;
10900 }
10901
10902 // For a function returning void, there is no return value. We can't create
10903 // such a node, so we just return a null return value in that case. In
10904 // that case, nothing will actually look at the value.
10905 if (ReturnValues.empty())
10906 return std::make_pair(x: SDValue(), y&: CLI.Chain);
10907 }
10908
10909 SDValue Res = CLI.DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: CLI.DL,
10910 VTList: CLI.DAG.getVTList(VTs: RetTys), Ops: ReturnValues);
10911 return std::make_pair(x&: Res, y&: CLI.Chain);
10912}
10913
10914/// Places new result values for the node in Results (their number
10915/// and types must exactly match those of the original return values of
10916/// the node), or leaves Results empty, which indicates that the node is not
10917/// to be custom lowered after all.
10918void TargetLowering::LowerOperationWrapper(SDNode *N,
10919 SmallVectorImpl<SDValue> &Results,
10920 SelectionDAG &DAG) const {
10921 SDValue Res = LowerOperation(Op: SDValue(N, 0), DAG);
10922
10923 if (!Res.getNode())
10924 return;
10925
10926 // If the original node has one result, take the return value from
10927 // LowerOperation as is. It might not be result number 0.
10928 if (N->getNumValues() == 1) {
10929 Results.push_back(Elt: Res);
10930 return;
10931 }
10932
10933 // If the original node has multiple results, then the return node should
10934 // have the same number of results.
10935 assert((N->getNumValues() == Res->getNumValues()) &&
10936 "Lowering returned the wrong number of results!");
10937
10938 // Places new result values base on N result number.
10939 for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
10940 Results.push_back(Elt: Res.getValue(R: I));
10941}
10942
10943SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10944 llvm_unreachable("LowerOperation not implemented for this target!");
10945}
10946
10947void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
10948 unsigned Reg,
10949 ISD::NodeType ExtendType) {
10950 SDValue Op = getNonRegisterValue(V);
10951 assert((Op.getOpcode() != ISD::CopyFromReg ||
10952 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
10953 "Copy from a reg to the same reg!");
10954 assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
10955
10956 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10957 // If this is an InlineAsm we have to match the registers required, not the
10958 // notional registers required by the type.
10959
10960 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
10961 std::nullopt); // This is not an ABI copy.
10962 SDValue Chain = DAG.getEntryNode();
10963
10964 if (ExtendType == ISD::ANY_EXTEND) {
10965 auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(Val: V);
10966 if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
10967 ExtendType = PreferredExtendIt->second;
10968 }
10969 RFV.getCopyToRegs(Val: Op, DAG, dl: getCurSDLoc(), Chain, Glue: nullptr, V, PreferredExtendType: ExtendType);
10970 PendingExports.push_back(Elt: Chain);
10971}
10972
10973#include "llvm/CodeGen/SelectionDAGISel.h"
10974
10975/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
10976/// entry block, return true. This includes arguments used by switches, since
10977/// the switch may expand into multiple basic blocks.
10978static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
10979 // With FastISel active, we may be splitting blocks, so force creation
10980 // of virtual registers for all non-dead arguments.
10981 if (FastISel)
10982 return A->use_empty();
10983
10984 const BasicBlock &Entry = A->getParent()->front();
10985 for (const User *U : A->users())
10986 if (cast<Instruction>(Val: U)->getParent() != &Entry || isa<SwitchInst>(Val: U))
10987 return false; // Use not in entry block.
10988
10989 return true;
10990}
10991
10992using ArgCopyElisionMapTy =
10993 DenseMap<const Argument *,
10994 std::pair<const AllocaInst *, const StoreInst *>>;
10995
10996/// Scan the entry block of the function in FuncInfo for arguments that look
10997/// like copies into a local alloca. Record any copied arguments in
10998/// ArgCopyElisionCandidates.
10999static void
11000findArgumentCopyElisionCandidates(const DataLayout &DL,
11001 FunctionLoweringInfo *FuncInfo,
11002 ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
11003 // Record the state of every static alloca used in the entry block. Argument
11004 // allocas are all used in the entry block, so we need approximately as many
11005 // entries as we have arguments.
11006 enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
11007 SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
11008 unsigned NumArgs = FuncInfo->Fn->arg_size();
11009 StaticAllocas.reserve(NumEntries: NumArgs * 2);
11010
11011 auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
11012 if (!V)
11013 return nullptr;
11014 V = V->stripPointerCasts();
11015 const auto *AI = dyn_cast<AllocaInst>(Val: V);
11016 if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(Val: AI))
11017 return nullptr;
11018 auto Iter = StaticAllocas.insert(KV: {AI, Unknown});
11019 return &Iter.first->second;
11020 };
11021
11022 // Look for stores of arguments to static allocas. Look through bitcasts and
11023 // GEPs to handle type coercions, as long as the alloca is fully initialized
11024 // by the store. Any non-store use of an alloca escapes it and any subsequent
11025 // unanalyzed store might write it.
11026 // FIXME: Handle structs initialized with multiple stores.
11027 for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
11028 // Look for stores, and handle non-store uses conservatively.
11029 const auto *SI = dyn_cast<StoreInst>(Val: &I);
11030 if (!SI) {
11031 // We will look through cast uses, so ignore them completely.
11032 if (I.isCast())
11033 continue;
11034 // Ignore debug info and pseudo op intrinsics, they don't escape or store
11035 // to allocas.
11036 if (I.isDebugOrPseudoInst())
11037 continue;
11038 // This is an unknown instruction. Assume it escapes or writes to all
11039 // static alloca operands.
11040 for (const Use &U : I.operands()) {
11041 if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
11042 *Info = StaticAllocaInfo::Clobbered;
11043 }
11044 continue;
11045 }
11046
11047 // If the stored value is a static alloca, mark it as escaped.
11048 if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
11049 *Info = StaticAllocaInfo::Clobbered;
11050
11051 // Check if the destination is a static alloca.
11052 const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
11053 StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
11054 if (!Info)
11055 continue;
11056 const AllocaInst *AI = cast<AllocaInst>(Val: Dst);
11057
11058 // Skip allocas that have been initialized or clobbered.
11059 if (*Info != StaticAllocaInfo::Unknown)
11060 continue;
11061
11062 // Check if the stored value is an argument, and that this store fully
11063 // initializes the alloca.
11064 // If the argument type has padding bits we can't directly forward a pointer
11065 // as the upper bits may contain garbage.
11066 // Don't elide copies from the same argument twice.
11067 const Value *Val = SI->getValueOperand()->stripPointerCasts();
11068 const auto *Arg = dyn_cast<Argument>(Val);
11069 if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
11070 Arg->getType()->isEmptyTy() ||
11071 DL.getTypeStoreSize(Ty: Arg->getType()) !=
11072 DL.getTypeAllocSize(Ty: AI->getAllocatedType()) ||
11073 !DL.typeSizeEqualsStoreSize(Ty: Arg->getType()) ||
11074 ArgCopyElisionCandidates.count(Val: Arg)) {
11075 *Info = StaticAllocaInfo::Clobbered;
11076 continue;
11077 }
11078
11079 LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
11080 << '\n');
11081
11082 // Mark this alloca and store for argument copy elision.
11083 *Info = StaticAllocaInfo::Elidable;
11084 ArgCopyElisionCandidates.insert(KV: {Arg, {AI, SI}});
11085
11086 // Stop scanning if we've seen all arguments. This will happen early in -O0
11087 // builds, which is useful, because -O0 builds have large entry blocks and
11088 // many allocas.
11089 if (ArgCopyElisionCandidates.size() == NumArgs)
11090 break;
11091 }
11092}
11093
11094/// Try to elide argument copies from memory into a local alloca. Succeeds if
11095/// ArgVal is a load from a suitable fixed stack object.
11096static void tryToElideArgumentCopy(
11097 FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
11098 DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
11099 SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
11100 ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
11101 ArrayRef<SDValue> ArgVals, bool &ArgHasUses) {
11102 // Check if this is a load from a fixed stack object.
11103 auto *LNode = dyn_cast<LoadSDNode>(Val: ArgVals[0]);
11104 if (!LNode)
11105 return;
11106 auto *FINode = dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode());
11107 if (!FINode)
11108 return;
11109
11110 // Check that the fixed stack object is the right size and alignment.
11111 // Look at the alignment that the user wrote on the alloca instead of looking
11112 // at the stack object.
11113 auto ArgCopyIter = ArgCopyElisionCandidates.find(Val: &Arg);
11114 assert(ArgCopyIter != ArgCopyElisionCandidates.end());
11115 const AllocaInst *AI = ArgCopyIter->second.first;
11116 int FixedIndex = FINode->getIndex();
11117 int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
11118 int OldIndex = AllocaIndex;
11119 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
11120 if (MFI.getObjectSize(ObjectIdx: FixedIndex) != MFI.getObjectSize(ObjectIdx: OldIndex)) {
11121 LLVM_DEBUG(
11122 dbgs() << " argument copy elision failed due to bad fixed stack "
11123 "object size\n");
11124 return;
11125 }
11126 Align RequiredAlignment = AI->getAlign();
11127 if (MFI.getObjectAlign(ObjectIdx: FixedIndex) < RequiredAlignment) {
11128 LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
11129 "greater than stack argument alignment ("
11130 << DebugStr(RequiredAlignment) << " vs "
11131 << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
11132 return;
11133 }
11134
11135 // Perform the elision. Delete the old stack object and replace its only use
11136 // in the variable info map. Mark the stack object as mutable and aliased.
11137 LLVM_DEBUG({
11138 dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
11139 << " Replacing frame index " << OldIndex << " with " << FixedIndex
11140 << '\n';
11141 });
11142 MFI.RemoveStackObject(ObjectIdx: OldIndex);
11143 MFI.setIsImmutableObjectIndex(ObjectIdx: FixedIndex, IsImmutable: false);
11144 MFI.setIsAliasedObjectIndex(ObjectIdx: FixedIndex, IsAliased: true);
11145 AllocaIndex = FixedIndex;
11146 ArgCopyElisionFrameIndexMap.insert(KV: {OldIndex, FixedIndex});
11147 for (SDValue ArgVal : ArgVals)
11148 Chains.push_back(Elt: ArgVal.getValue(R: 1));
11149
11150 // Avoid emitting code for the store implementing the copy.
11151 const StoreInst *SI = ArgCopyIter->second.second;
11152 ElidedArgCopyInstrs.insert(Ptr: SI);
11153
11154 // Check for uses of the argument again so that we can avoid exporting ArgVal
11155 // if it is't used by anything other than the store.
11156 for (const Value *U : Arg.users()) {
11157 if (U != SI) {
11158 ArgHasUses = true;
11159 break;
11160 }
11161 }
11162}
11163
11164void SelectionDAGISel::LowerArguments(const Function &F) {
11165 SelectionDAG &DAG = SDB->DAG;
11166 SDLoc dl = SDB->getCurSDLoc();
11167 const DataLayout &DL = DAG.getDataLayout();
11168 SmallVector<ISD::InputArg, 16> Ins;
11169
11170 // In Naked functions we aren't going to save any registers.
11171 if (F.hasFnAttribute(Attribute::Naked))
11172 return;
11173
11174 if (!FuncInfo->CanLowerReturn) {
11175 // Put in an sret pointer parameter before all the other parameters.
11176 SmallVector<EVT, 1> ValueVTs;
11177 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(),
11178 Ty: PointerType::get(C&: F.getContext(),
11179 AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()),
11180 ValueVTs);
11181
11182 // NOTE: Assuming that a pointer will never break down to more than one VT
11183 // or one register.
11184 ISD::ArgFlagsTy Flags;
11185 Flags.setSRet();
11186 MVT RegisterVT = TLI->getRegisterType(Context&: *DAG.getContext(), VT: ValueVTs[0]);
11187 ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
11188 ISD::InputArg::NoArgIndex, 0);
11189 Ins.push_back(Elt: RetArg);
11190 }
11191
11192 // Look for stores of arguments to static allocas. Mark such arguments with a
11193 // flag to ask the target to give us the memory location of that argument if
11194 // available.
11195 ArgCopyElisionMapTy ArgCopyElisionCandidates;
11196 findArgumentCopyElisionCandidates(DL, FuncInfo: FuncInfo.get(),
11197 ArgCopyElisionCandidates);
11198
11199 // Set up the incoming argument description vector.
11200 for (const Argument &Arg : F.args()) {
11201 unsigned ArgNo = Arg.getArgNo();
11202 SmallVector<EVT, 4> ValueVTs;
11203 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), Ty: Arg.getType(), ValueVTs);
11204 bool isArgValueUsed = !Arg.use_empty();
11205 unsigned PartBase = 0;
11206 Type *FinalType = Arg.getType();
11207 if (Arg.hasAttribute(Attribute::ByVal))
11208 FinalType = Arg.getParamByValType();
11209 bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
11210 Ty: FinalType, CallConv: F.getCallingConv(), isVarArg: F.isVarArg(), DL);
11211 for (unsigned Value = 0, NumValues = ValueVTs.size();
11212 Value != NumValues; ++Value) {
11213 EVT VT = ValueVTs[Value];
11214 Type *ArgTy = VT.getTypeForEVT(Context&: *DAG.getContext());
11215 ISD::ArgFlagsTy Flags;
11216
11217
11218 if (Arg.getType()->isPointerTy()) {
11219 Flags.setPointer();
11220 Flags.setPointerAddrSpace(
11221 cast<PointerType>(Val: Arg.getType())->getAddressSpace());
11222 }
11223 if (Arg.hasAttribute(Attribute::ZExt))
11224 Flags.setZExt();
11225 if (Arg.hasAttribute(Attribute::SExt))
11226 Flags.setSExt();
11227 if (Arg.hasAttribute(Attribute::InReg)) {
11228 // If we are using vectorcall calling convention, a structure that is
11229 // passed InReg - is surely an HVA
11230 if (F.getCallingConv() == CallingConv::X86_VectorCall &&
11231 isa<StructType>(Val: Arg.getType())) {
11232 // The first value of a structure is marked
11233 if (0 == Value)
11234 Flags.setHvaStart();
11235 Flags.setHva();
11236 }
11237 // Set InReg Flag
11238 Flags.setInReg();
11239 }
11240 if (Arg.hasAttribute(Attribute::StructRet))
11241 Flags.setSRet();
11242 if (Arg.hasAttribute(Attribute::SwiftSelf))
11243 Flags.setSwiftSelf();
11244 if (Arg.hasAttribute(Attribute::SwiftAsync))
11245 Flags.setSwiftAsync();
11246 if (Arg.hasAttribute(Attribute::SwiftError))
11247 Flags.setSwiftError();
11248 if (Arg.hasAttribute(Attribute::ByVal))
11249 Flags.setByVal();
11250 if (Arg.hasAttribute(Attribute::ByRef))
11251 Flags.setByRef();
11252 if (Arg.hasAttribute(Attribute::InAlloca)) {
11253 Flags.setInAlloca();
11254 // Set the byval flag for CCAssignFn callbacks that don't know about
11255 // inalloca. This way we can know how many bytes we should've allocated
11256 // and how many bytes a callee cleanup function will pop. If we port
11257 // inalloca to more targets, we'll have to add custom inalloca handling
11258 // in the various CC lowering callbacks.
11259 Flags.setByVal();
11260 }
11261 if (Arg.hasAttribute(Attribute::Preallocated)) {
11262 Flags.setPreallocated();
11263 // Set the byval flag for CCAssignFn callbacks that don't know about
11264 // preallocated. This way we can know how many bytes we should've
11265 // allocated and how many bytes a callee cleanup function will pop. If
11266 // we port preallocated to more targets, we'll have to add custom
11267 // preallocated handling in the various CC lowering callbacks.
11268 Flags.setByVal();
11269 }
11270
11271 // Certain targets (such as MIPS), may have a different ABI alignment
11272 // for a type depending on the context. Give the target a chance to
11273 // specify the alignment it wants.
11274 const Align OriginalAlignment(
11275 TLI->getABIAlignmentForCallingConv(ArgTy, DL));
11276 Flags.setOrigAlign(OriginalAlignment);
11277
11278 Align MemAlign;
11279 Type *ArgMemTy = nullptr;
11280 if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
11281 Flags.isByRef()) {
11282 if (!ArgMemTy)
11283 ArgMemTy = Arg.getPointeeInMemoryValueType();
11284
11285 uint64_t MemSize = DL.getTypeAllocSize(Ty: ArgMemTy);
11286
11287 // For in-memory arguments, size and alignment should be passed from FE.
11288 // BE will guess if this info is not there but there are cases it cannot
11289 // get right.
11290 if (auto ParamAlign = Arg.getParamStackAlign())
11291 MemAlign = *ParamAlign;
11292 else if ((ParamAlign = Arg.getParamAlign()))
11293 MemAlign = *ParamAlign;
11294 else
11295 MemAlign = Align(TLI->getByValTypeAlignment(Ty: ArgMemTy, DL));
11296 if (Flags.isByRef())
11297 Flags.setByRefSize(MemSize);
11298 else
11299 Flags.setByValSize(MemSize);
11300 } else if (auto ParamAlign = Arg.getParamStackAlign()) {
11301 MemAlign = *ParamAlign;
11302 } else {
11303 MemAlign = OriginalAlignment;
11304 }
11305 Flags.setMemAlign(MemAlign);
11306
11307 if (Arg.hasAttribute(Attribute::Nest))
11308 Flags.setNest();
11309 if (NeedsRegBlock)
11310 Flags.setInConsecutiveRegs();
11311 if (ArgCopyElisionCandidates.count(Val: &Arg))
11312 Flags.setCopyElisionCandidate();
11313 if (Arg.hasAttribute(Attribute::Returned))
11314 Flags.setReturned();
11315
11316 MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
11317 Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT);
11318 unsigned NumRegs = TLI->getNumRegistersForCallingConv(
11319 Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT);
11320 for (unsigned i = 0; i != NumRegs; ++i) {
11321 // For scalable vectors, use the minimum size; individual targets
11322 // are responsible for handling scalable vector arguments and
11323 // return values.
11324 ISD::InputArg MyFlags(
11325 Flags, RegisterVT, VT, isArgValueUsed, ArgNo,
11326 PartBase + i * RegisterVT.getStoreSize().getKnownMinValue());
11327 if (NumRegs > 1 && i == 0)
11328 MyFlags.Flags.setSplit();
11329 // if it isn't first piece, alignment must be 1
11330 else if (i > 0) {
11331 MyFlags.Flags.setOrigAlign(Align(1));
11332 if (i == NumRegs - 1)
11333 MyFlags.Flags.setSplitEnd();
11334 }
11335 Ins.push_back(Elt: MyFlags);
11336 }
11337 if (NeedsRegBlock && Value == NumValues - 1)
11338 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
11339 PartBase += VT.getStoreSize().getKnownMinValue();
11340 }
11341 }
11342
11343 // Call the target to set up the argument values.
11344 SmallVector<SDValue, 8> InVals;
11345 SDValue NewRoot = TLI->LowerFormalArguments(
11346 DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
11347
11348 // Verify that the target's LowerFormalArguments behaved as expected.
11349 assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
11350 "LowerFormalArguments didn't return a valid chain!");
11351 assert(InVals.size() == Ins.size() &&
11352 "LowerFormalArguments didn't emit the correct number of values!");
11353 LLVM_DEBUG({
11354 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
11355 assert(InVals[i].getNode() &&
11356 "LowerFormalArguments emitted a null value!");
11357 assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
11358 "LowerFormalArguments emitted a value with the wrong type!");
11359 }
11360 });
11361
11362 // Update the DAG with the new chain value resulting from argument lowering.
11363 DAG.setRoot(NewRoot);
11364
11365 // Set up the argument values.
11366 unsigned i = 0;
11367 if (!FuncInfo->CanLowerReturn) {
11368 // Create a virtual register for the sret pointer, and put in a copy
11369 // from the sret argument into it.
11370 SmallVector<EVT, 1> ValueVTs;
11371 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(),
11372 Ty: PointerType::get(C&: F.getContext(),
11373 AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()),
11374 ValueVTs);
11375 MVT VT = ValueVTs[0].getSimpleVT();
11376 MVT RegVT = TLI->getRegisterType(Context&: *CurDAG->getContext(), VT);
11377 std::optional<ISD::NodeType> AssertOp;
11378 SDValue ArgValue =
11379 getCopyFromParts(DAG, DL: dl, Parts: &InVals[0], NumParts: 1, PartVT: RegVT, ValueVT: VT, V: nullptr, InChain: NewRoot,
11380 CC: F.getCallingConv(), AssertOp);
11381
11382 MachineFunction& MF = SDB->DAG.getMachineFunction();
11383 MachineRegisterInfo& RegInfo = MF.getRegInfo();
11384 Register SRetReg =
11385 RegInfo.createVirtualRegister(RegClass: TLI->getRegClassFor(VT: RegVT));
11386 FuncInfo->DemoteRegister = SRetReg;
11387 NewRoot =
11388 SDB->DAG.getCopyToReg(Chain: NewRoot, dl: SDB->getCurSDLoc(), Reg: SRetReg, N: ArgValue);
11389 DAG.setRoot(NewRoot);
11390
11391 // i indexes lowered arguments. Bump it past the hidden sret argument.
11392 ++i;
11393 }
11394
11395 SmallVector<SDValue, 4> Chains;
11396 DenseMap<int, int> ArgCopyElisionFrameIndexMap;
11397 for (const Argument &Arg : F.args()) {
11398 SmallVector<SDValue, 4> ArgValues;
11399 SmallVector<EVT, 4> ValueVTs;
11400 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), Ty: Arg.getType(), ValueVTs);
11401 unsigned NumValues = ValueVTs.size();
11402 if (NumValues == 0)
11403 continue;
11404
11405 bool ArgHasUses = !Arg.use_empty();
11406
11407 // Elide the copying store if the target loaded this argument from a
11408 // suitable fixed stack object.
11409 if (Ins[i].Flags.isCopyElisionCandidate()) {
11410 unsigned NumParts = 0;
11411 for (EVT VT : ValueVTs)
11412 NumParts += TLI->getNumRegistersForCallingConv(Context&: *CurDAG->getContext(),
11413 CC: F.getCallingConv(), VT);
11414
11415 tryToElideArgumentCopy(FuncInfo&: *FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
11416 ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
11417 ArgVals: ArrayRef(&InVals[i], NumParts), ArgHasUses);
11418 }
11419
11420 // If this argument is unused then remember its value. It is used to generate
11421 // debugging information.
11422 bool isSwiftErrorArg =
11423 TLI->supportSwiftError() &&
11424 Arg.hasAttribute(Attribute::SwiftError);
11425 if (!ArgHasUses && !isSwiftErrorArg) {
11426 SDB->setUnusedArgValue(V: &Arg, NewN: InVals[i]);
11427
11428 // Also remember any frame index for use in FastISel.
11429 if (FrameIndexSDNode *FI =
11430 dyn_cast<FrameIndexSDNode>(Val: InVals[i].getNode()))
11431 FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex());
11432 }
11433
11434 for (unsigned Val = 0; Val != NumValues; ++Val) {
11435 EVT VT = ValueVTs[Val];
11436 MVT PartVT = TLI->getRegisterTypeForCallingConv(Context&: *CurDAG->getContext(),
11437 CC: F.getCallingConv(), VT);
11438 unsigned NumParts = TLI->getNumRegistersForCallingConv(
11439 Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT);
11440
11441 // Even an apparent 'unused' swifterror argument needs to be returned. So
11442 // we do generate a copy for it that can be used on return from the
11443 // function.
11444 if (ArgHasUses || isSwiftErrorArg) {
11445 std::optional<ISD::NodeType> AssertOp;
11446 if (Arg.hasAttribute(Attribute::SExt))
11447 AssertOp = ISD::AssertSext;
11448 else if (Arg.hasAttribute(Attribute::ZExt))
11449 AssertOp = ISD::AssertZext;
11450
11451 ArgValues.push_back(Elt: getCopyFromParts(DAG, DL: dl, Parts: &InVals[i], NumParts,
11452 PartVT, ValueVT: VT, V: nullptr, InChain: NewRoot,
11453 CC: F.getCallingConv(), AssertOp));
11454 }
11455
11456 i += NumParts;
11457 }
11458
11459 // We don't need to do anything else for unused arguments.
11460 if (ArgValues.empty())
11461 continue;
11462
11463 // Note down frame index.
11464 if (FrameIndexSDNode *FI =
11465 dyn_cast<FrameIndexSDNode>(Val: ArgValues[0].getNode()))
11466 FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex());
11467
11468 SDValue Res = DAG.getMergeValues(Ops: ArrayRef(ArgValues.data(), NumValues),
11469 dl: SDB->getCurSDLoc());
11470
11471 SDB->setValue(V: &Arg, NewN: Res);
11472 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
11473 // We want to associate the argument with the frame index, among
11474 // involved operands, that correspond to the lowest address. The
11475 // getCopyFromParts function, called earlier, is swapping the order of
11476 // the operands to BUILD_PAIR depending on endianness. The result of
11477 // that swapping is that the least significant bits of the argument will
11478 // be in the first operand of the BUILD_PAIR node, and the most
11479 // significant bits will be in the second operand.
11480 unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
11481 if (LoadSDNode *LNode =
11482 dyn_cast<LoadSDNode>(Val: Res.getOperand(i: LowAddressOp).getNode()))
11483 if (FrameIndexSDNode *FI =
11484 dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode()))
11485 FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex());
11486 }
11487
11488 // Analyses past this point are naive and don't expect an assertion.
11489 if (Res.getOpcode() == ISD::AssertZext)
11490 Res = Res.getOperand(i: 0);
11491
11492 // Update the SwiftErrorVRegDefMap.
11493 if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
11494 unsigned Reg = cast<RegisterSDNode>(Val: Res.getOperand(i: 1))->getReg();
11495 if (Register::isVirtualRegister(Reg))
11496 SwiftError->setCurrentVReg(MBB: FuncInfo->MBB, SwiftError->getFunctionArg(),
11497 Reg);
11498 }
11499
11500 // If this argument is live outside of the entry block, insert a copy from
11501 // wherever we got it to the vreg that other BB's will reference it as.
11502 if (Res.getOpcode() == ISD::CopyFromReg) {
11503 // If we can, though, try to skip creating an unnecessary vreg.
11504 // FIXME: This isn't very clean... it would be nice to make this more
11505 // general.
11506 unsigned Reg = cast<RegisterSDNode>(Val: Res.getOperand(i: 1))->getReg();
11507 if (Register::isVirtualRegister(Reg)) {
11508 FuncInfo->ValueMap[&Arg] = Reg;
11509 continue;
11510 }
11511 }
11512 if (!isOnlyUsedInEntryBlock(A: &Arg, FastISel: TM.Options.EnableFastISel)) {
11513 FuncInfo->InitializeRegForValue(V: &Arg);
11514 SDB->CopyToExportRegsIfNeeded(V: &Arg);
11515 }
11516 }
11517
11518 if (!Chains.empty()) {
11519 Chains.push_back(Elt: NewRoot);
11520 NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11521 }
11522
11523 DAG.setRoot(NewRoot);
11524
11525 assert(i == InVals.size() && "Argument register count mismatch!");
11526
11527 // If any argument copy elisions occurred and we have debug info, update the
11528 // stale frame indices used in the dbg.declare variable info table.
11529 if (!ArgCopyElisionFrameIndexMap.empty()) {
11530 for (MachineFunction::VariableDbgInfo &VI :
11531 MF->getInStackSlotVariableDbgInfo()) {
11532 auto I = ArgCopyElisionFrameIndexMap.find(Val: VI.getStackSlot());
11533 if (I != ArgCopyElisionFrameIndexMap.end())
11534 VI.updateStackSlot(NewSlot: I->second);
11535 }
11536 }
11537
11538 // Finally, if the target has anything special to do, allow it to do so.
11539 emitFunctionEntryCode();
11540}
11541
11542/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
11543/// ensure constants are generated when needed. Remember the virtual registers
11544/// that need to be added to the Machine PHI nodes as input. We cannot just
11545/// directly add them, because expansion might result in multiple MBB's for one
11546/// BB. As such, the start of the BB might correspond to a different MBB than
11547/// the end.
11548void
11549SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
11550 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11551
11552 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
11553
11554 // Check PHI nodes in successors that expect a value to be available from this
11555 // block.
11556 for (const BasicBlock *SuccBB : successors(I: LLVMBB->getTerminator())) {
11557 if (!isa<PHINode>(Val: SuccBB->begin())) continue;
11558 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
11559
11560 // If this terminator has multiple identical successors (common for
11561 // switches), only handle each succ once.
11562 if (!SuccsHandled.insert(Ptr: SuccMBB).second)
11563 continue;
11564
11565 MachineBasicBlock::iterator MBBI = SuccMBB->begin();
11566
11567 // At this point we know that there is a 1-1 correspondence between LLVM PHI
11568 // nodes and Machine PHI nodes, but the incoming operands have not been
11569 // emitted yet.
11570 for (const PHINode &PN : SuccBB->phis()) {
11571 // Ignore dead phi's.
11572 if (PN.use_empty())
11573 continue;
11574
11575 // Skip empty types
11576 if (PN.getType()->isEmptyTy())
11577 continue;
11578
11579 unsigned Reg;
11580 const Value *PHIOp = PN.getIncomingValueForBlock(BB: LLVMBB);
11581
11582 if (const auto *C = dyn_cast<Constant>(Val: PHIOp)) {
11583 unsigned &RegOut = ConstantsOut[C];
11584 if (RegOut == 0) {
11585 RegOut = FuncInfo.CreateRegs(V: C);
11586 // We need to zero/sign extend ConstantInt phi operands to match
11587 // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
11588 ISD::NodeType ExtendType = ISD::ANY_EXTEND;
11589 if (auto *CI = dyn_cast<ConstantInt>(Val: C))
11590 ExtendType = TLI.signExtendConstant(C: CI) ? ISD::SIGN_EXTEND
11591 : ISD::ZERO_EXTEND;
11592 CopyValueToVirtualRegister(V: C, Reg: RegOut, ExtendType);
11593 }
11594 Reg = RegOut;
11595 } else {
11596 DenseMap<const Value *, Register>::iterator I =
11597 FuncInfo.ValueMap.find(Val: PHIOp);
11598 if (I != FuncInfo.ValueMap.end())
11599 Reg = I->second;
11600 else {
11601 assert(isa<AllocaInst>(PHIOp) &&
11602 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
11603 "Didn't codegen value into a register!??");
11604 Reg = FuncInfo.CreateRegs(V: PHIOp);
11605 CopyValueToVirtualRegister(V: PHIOp, Reg);
11606 }
11607 }
11608
11609 // Remember that this register needs to added to the machine PHI node as
11610 // the input for this MBB.
11611 SmallVector<EVT, 4> ValueVTs;
11612 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: PN.getType(), ValueVTs);
11613 for (EVT VT : ValueVTs) {
11614 const unsigned NumRegisters = TLI.getNumRegisters(Context&: *DAG.getContext(), VT);
11615 for (unsigned i = 0; i != NumRegisters; ++i)
11616 FuncInfo.PHINodesToUpdate.push_back(
11617 x: std::make_pair(x: &*MBBI++, y: Reg + i));
11618 Reg += NumRegisters;
11619 }
11620 }
11621 }
11622
11623 ConstantsOut.clear();
11624}
11625
11626MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
11627 MachineFunction::iterator I(MBB);
11628 if (++I == FuncInfo.MF->end())
11629 return nullptr;
11630 return &*I;
11631}
11632
11633/// During lowering new call nodes can be created (such as memset, etc.).
11634/// Those will become new roots of the current DAG, but complications arise
11635/// when they are tail calls. In such cases, the call lowering will update
11636/// the root, but the builder still needs to know that a tail call has been
11637/// lowered in order to avoid generating an additional return.
11638void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
11639 // If the node is null, we do have a tail call.
11640 if (MaybeTC.getNode() != nullptr)
11641 DAG.setRoot(MaybeTC);
11642 else
11643 HasTailCall = true;
11644}
11645
11646void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
11647 MachineBasicBlock *SwitchMBB,
11648 MachineBasicBlock *DefaultMBB) {
11649 MachineFunction *CurMF = FuncInfo.MF;
11650 MachineBasicBlock *NextMBB = nullptr;
11651 MachineFunction::iterator BBI(W.MBB);
11652 if (++BBI != FuncInfo.MF->end())
11653 NextMBB = &*BBI;
11654
11655 unsigned Size = W.LastCluster - W.FirstCluster + 1;
11656
11657 BranchProbabilityInfo *BPI = FuncInfo.BPI;
11658
11659 if (Size == 2 && W.MBB == SwitchMBB) {
11660 // If any two of the cases has the same destination, and if one value
11661 // is the same as the other, but has one bit unset that the other has set,
11662 // use bit manipulation to do two compares at once. For example:
11663 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
11664 // TODO: This could be extended to merge any 2 cases in switches with 3
11665 // cases.
11666 // TODO: Handle cases where W.CaseBB != SwitchBB.
11667 CaseCluster &Small = *W.FirstCluster;
11668 CaseCluster &Big = *W.LastCluster;
11669
11670 if (Small.Low == Small.High && Big.Low == Big.High &&
11671 Small.MBB == Big.MBB) {
11672 const APInt &SmallValue = Small.Low->getValue();
11673 const APInt &BigValue = Big.Low->getValue();
11674
11675 // Check that there is only one bit different.
11676 APInt CommonBit = BigValue ^ SmallValue;
11677 if (CommonBit.isPowerOf2()) {
11678 SDValue CondLHS = getValue(V: Cond);
11679 EVT VT = CondLHS.getValueType();
11680 SDLoc DL = getCurSDLoc();
11681
11682 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: CondLHS,
11683 N2: DAG.getConstant(Val: CommonBit, DL, VT));
11684 SDValue Cond = DAG.getSetCC(
11685 DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
11686 ISD::SETEQ);
11687
11688 // Update successor info.
11689 // Both Small and Big will jump to Small.BB, so we sum up the
11690 // probabilities.
11691 addSuccessorWithProb(Src: SwitchMBB, Dst: Small.MBB, Prob: Small.Prob + Big.Prob);
11692 if (BPI)
11693 addSuccessorWithProb(
11694 Src: SwitchMBB, Dst: DefaultMBB,
11695 // The default destination is the first successor in IR.
11696 Prob: BPI->getEdgeProbability(Src: SwitchMBB->getBasicBlock(), IndexInSuccessors: (unsigned)0));
11697 else
11698 addSuccessorWithProb(Src: SwitchMBB, Dst: DefaultMBB);
11699
11700 // Insert the true branch.
11701 SDValue BrCond =
11702 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
11703 DAG.getBasicBlock(Small.MBB));
11704 // Insert the false branch.
11705 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
11706 DAG.getBasicBlock(DefaultMBB));
11707
11708 DAG.setRoot(BrCond);
11709 return;
11710 }
11711 }
11712 }
11713
11714 if (TM.getOptLevel() != CodeGenOptLevel::None) {
11715 // Here, we order cases by probability so the most likely case will be
11716 // checked first. However, two clusters can have the same probability in
11717 // which case their relative ordering is non-deterministic. So we use Low
11718 // as a tie-breaker as clusters are guaranteed to never overlap.
11719 llvm::sort(Start: W.FirstCluster, End: W.LastCluster + 1,
11720 Comp: [](const CaseCluster &a, const CaseCluster &b) {
11721 return a.Prob != b.Prob ?
11722 a.Prob > b.Prob :
11723 a.Low->getValue().slt(RHS: b.Low->getValue());
11724 });
11725
11726 // Rearrange the case blocks so that the last one falls through if possible
11727 // without changing the order of probabilities.
11728 for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
11729 --I;
11730 if (I->Prob > W.LastCluster->Prob)
11731 break;
11732 if (I->Kind == CC_Range && I->MBB == NextMBB) {
11733 std::swap(a&: *I, b&: *W.LastCluster);
11734 break;
11735 }
11736 }
11737 }
11738
11739 // Compute total probability.
11740 BranchProbability DefaultProb = W.DefaultProb;
11741 BranchProbability UnhandledProbs = DefaultProb;
11742 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
11743 UnhandledProbs += I->Prob;
11744
11745 MachineBasicBlock *CurMBB = W.MBB;
11746 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
11747 bool FallthroughUnreachable = false;
11748 MachineBasicBlock *Fallthrough;
11749 if (I == W.LastCluster) {
11750 // For the last cluster, fall through to the default destination.
11751 Fallthrough = DefaultMBB;
11752 FallthroughUnreachable = isa<UnreachableInst>(
11753 Val: DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
11754 } else {
11755 Fallthrough = CurMF->CreateMachineBasicBlock(BB: CurMBB->getBasicBlock());
11756 CurMF->insert(MBBI: BBI, MBB: Fallthrough);
11757 // Put Cond in a virtual register to make it available from the new blocks.
11758 ExportFromCurrentBlock(V: Cond);
11759 }
11760 UnhandledProbs -= I->Prob;
11761
11762 switch (I->Kind) {
11763 case CC_JumpTable: {
11764 // FIXME: Optimize away range check based on pivot comparisons.
11765 JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
11766 SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
11767
11768 // The jump block hasn't been inserted yet; insert it here.
11769 MachineBasicBlock *JumpMBB = JT->MBB;
11770 CurMF->insert(MBBI: BBI, MBB: JumpMBB);
11771
11772 auto JumpProb = I->Prob;
11773 auto FallthroughProb = UnhandledProbs;
11774
11775 // If the default statement is a target of the jump table, we evenly
11776 // distribute the default probability to successors of CurMBB. Also
11777 // update the probability on the edge from JumpMBB to Fallthrough.
11778 for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
11779 SE = JumpMBB->succ_end();
11780 SI != SE; ++SI) {
11781 if (*SI == DefaultMBB) {
11782 JumpProb += DefaultProb / 2;
11783 FallthroughProb -= DefaultProb / 2;
11784 JumpMBB->setSuccProbability(I: SI, Prob: DefaultProb / 2);
11785 JumpMBB->normalizeSuccProbs();
11786 break;
11787 }
11788 }
11789
11790 // If the default clause is unreachable, propagate that knowledge into
11791 // JTH->FallthroughUnreachable which will use it to suppress the range
11792 // check.
11793 //
11794 // However, don't do this if we're doing branch target enforcement,
11795 // because a table branch _without_ a range check can be a tempting JOP
11796 // gadget - out-of-bounds inputs that are impossible in correct
11797 // execution become possible again if an attacker can influence the
11798 // control flow. So if an attacker doesn't already have a BTI bypass
11799 // available, we don't want them to be able to get one out of this
11800 // table branch.
11801 if (FallthroughUnreachable) {
11802 Function &CurFunc = CurMF->getFunction();
11803 bool HasBranchTargetEnforcement = false;
11804 if (CurFunc.hasFnAttribute(Kind: "branch-target-enforcement")) {
11805 HasBranchTargetEnforcement =
11806 CurFunc.getFnAttribute(Kind: "branch-target-enforcement")
11807 .getValueAsBool();
11808 } else {
11809 HasBranchTargetEnforcement =
11810 CurMF->getMMI().getModule()->getModuleFlag(
11811 Key: "branch-target-enforcement");
11812 }
11813 if (!HasBranchTargetEnforcement)
11814 JTH->FallthroughUnreachable = true;
11815 }
11816
11817 if (!JTH->FallthroughUnreachable)
11818 addSuccessorWithProb(Src: CurMBB, Dst: Fallthrough, Prob: FallthroughProb);
11819 addSuccessorWithProb(Src: CurMBB, Dst: JumpMBB, Prob: JumpProb);
11820 CurMBB->normalizeSuccProbs();
11821
11822 // The jump table header will be inserted in our current block, do the
11823 // range check, and fall through to our fallthrough block.
11824 JTH->HeaderBB = CurMBB;
11825 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
11826
11827 // If we're in the right place, emit the jump table header right now.
11828 if (CurMBB == SwitchMBB) {
11829 visitJumpTableHeader(JT&: *JT, JTH&: *JTH, SwitchBB: SwitchMBB);
11830 JTH->Emitted = true;
11831 }
11832 break;
11833 }
11834 case CC_BitTests: {
11835 // FIXME: Optimize away range check based on pivot comparisons.
11836 BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
11837
11838 // The bit test blocks haven't been inserted yet; insert them here.
11839 for (BitTestCase &BTC : BTB->Cases)
11840 CurMF->insert(MBBI: BBI, MBB: BTC.ThisBB);
11841
11842 // Fill in fields of the BitTestBlock.
11843 BTB->Parent = CurMBB;
11844 BTB->Default = Fallthrough;
11845
11846 BTB->DefaultProb = UnhandledProbs;
11847 // If the cases in bit test don't form a contiguous range, we evenly
11848 // distribute the probability on the edge to Fallthrough to two
11849 // successors of CurMBB.
11850 if (!BTB->ContiguousRange) {
11851 BTB->Prob += DefaultProb / 2;
11852 BTB->DefaultProb -= DefaultProb / 2;
11853 }
11854
11855 if (FallthroughUnreachable)
11856 BTB->FallthroughUnreachable = true;
11857
11858 // If we're in the right place, emit the bit test header right now.
11859 if (CurMBB == SwitchMBB) {
11860 visitBitTestHeader(B&: *BTB, SwitchBB: SwitchMBB);
11861 BTB->Emitted = true;
11862 }
11863 break;
11864 }
11865 case CC_Range: {
11866 const Value *RHS, *LHS, *MHS;
11867 ISD::CondCode CC;
11868 if (I->Low == I->High) {
11869 // Check Cond == I->Low.
11870 CC = ISD::SETEQ;
11871 LHS = Cond;
11872 RHS=I->Low;
11873 MHS = nullptr;
11874 } else {
11875 // Check I->Low <= Cond <= I->High.
11876 CC = ISD::SETLE;
11877 LHS = I->Low;
11878 MHS = Cond;
11879 RHS = I->High;
11880 }
11881
11882 // If Fallthrough is unreachable, fold away the comparison.
11883 if (FallthroughUnreachable)
11884 CC = ISD::SETTRUE;
11885
11886 // The false probability is the sum of all unhandled cases.
11887 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
11888 getCurSDLoc(), I->Prob, UnhandledProbs);
11889
11890 if (CurMBB == SwitchMBB)
11891 visitSwitchCase(CB, SwitchBB: SwitchMBB);
11892 else
11893 SL->SwitchCases.push_back(x: CB);
11894
11895 break;
11896 }
11897 }
11898 CurMBB = Fallthrough;
11899 }
11900}
11901
11902void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
11903 const SwitchWorkListItem &W,
11904 Value *Cond,
11905 MachineBasicBlock *SwitchMBB) {
11906 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
11907 "Clusters not sorted?");
11908 assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
11909
11910 auto [LastLeft, FirstRight, LeftProb, RightProb] =
11911 SL->computeSplitWorkItemInfo(W);
11912
11913 // Use the first element on the right as pivot since we will make less-than
11914 // comparisons against it.
11915 CaseClusterIt PivotCluster = FirstRight;
11916 assert(PivotCluster > W.FirstCluster);
11917 assert(PivotCluster <= W.LastCluster);
11918
11919 CaseClusterIt FirstLeft = W.FirstCluster;
11920 CaseClusterIt LastRight = W.LastCluster;
11921
11922 const ConstantInt *Pivot = PivotCluster->Low;
11923
11924 // New blocks will be inserted immediately after the current one.
11925 MachineFunction::iterator BBI(W.MBB);
11926 ++BBI;
11927
11928 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
11929 // we can branch to its destination directly if it's squeezed exactly in
11930 // between the known lower bound and Pivot - 1.
11931 MachineBasicBlock *LeftMBB;
11932 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
11933 FirstLeft->Low == W.GE &&
11934 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
11935 LeftMBB = FirstLeft->MBB;
11936 } else {
11937 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock());
11938 FuncInfo.MF->insert(MBBI: BBI, MBB: LeftMBB);
11939 WorkList.push_back(
11940 Elt: {.MBB: LeftMBB, .FirstCluster: FirstLeft, .LastCluster: LastLeft, .GE: W.GE, .LT: Pivot, .DefaultProb: W.DefaultProb / 2});
11941 // Put Cond in a virtual register to make it available from the new blocks.
11942 ExportFromCurrentBlock(V: Cond);
11943 }
11944
11945 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
11946 // single cluster, RHS.Low == Pivot, and we can branch to its destination
11947 // directly if RHS.High equals the current upper bound.
11948 MachineBasicBlock *RightMBB;
11949 if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
11950 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
11951 RightMBB = FirstRight->MBB;
11952 } else {
11953 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock());
11954 FuncInfo.MF->insert(MBBI: BBI, MBB: RightMBB);
11955 WorkList.push_back(
11956 Elt: {.MBB: RightMBB, .FirstCluster: FirstRight, .LastCluster: LastRight, .GE: Pivot, .LT: W.LT, .DefaultProb: W.DefaultProb / 2});
11957 // Put Cond in a virtual register to make it available from the new blocks.
11958 ExportFromCurrentBlock(V: Cond);
11959 }
11960
11961 // Create the CaseBlock record that will be used to lower the branch.
11962 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
11963 getCurSDLoc(), LeftProb, RightProb);
11964
11965 if (W.MBB == SwitchMBB)
11966 visitSwitchCase(CB, SwitchBB: SwitchMBB);
11967 else
11968 SL->SwitchCases.push_back(x: CB);
11969}
11970
11971// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
11972// from the swith statement.
11973static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
11974 BranchProbability PeeledCaseProb) {
11975 if (PeeledCaseProb == BranchProbability::getOne())
11976 return BranchProbability::getZero();
11977 BranchProbability SwitchProb = PeeledCaseProb.getCompl();
11978
11979 uint32_t Numerator = CaseProb.getNumerator();
11980 uint32_t Denominator = SwitchProb.scale(Num: CaseProb.getDenominator());
11981 return BranchProbability(Numerator, std::max(a: Numerator, b: Denominator));
11982}
11983
11984// Try to peel the top probability case if it exceeds the threshold.
11985// Return current MachineBasicBlock for the switch statement if the peeling
11986// does not occur.
11987// If the peeling is performed, return the newly created MachineBasicBlock
11988// for the peeled switch statement. Also update Clusters to remove the peeled
11989// case. PeeledCaseProb is the BranchProbability for the peeled case.
11990MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
11991 const SwitchInst &SI, CaseClusterVector &Clusters,
11992 BranchProbability &PeeledCaseProb) {
11993 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
11994 // Don't perform if there is only one cluster or optimizing for size.
11995 if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
11996 TM.getOptLevel() == CodeGenOptLevel::None ||
11997 SwitchMBB->getParent()->getFunction().hasMinSize())
11998 return SwitchMBB;
11999
12000 BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
12001 unsigned PeeledCaseIndex = 0;
12002 bool SwitchPeeled = false;
12003 for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
12004 CaseCluster &CC = Clusters[Index];
12005 if (CC.Prob < TopCaseProb)
12006 continue;
12007 TopCaseProb = CC.Prob;
12008 PeeledCaseIndex = Index;
12009 SwitchPeeled = true;
12010 }
12011 if (!SwitchPeeled)
12012 return SwitchMBB;
12013
12014 LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
12015 << TopCaseProb << "\n");
12016
12017 // Record the MBB for the peeled switch statement.
12018 MachineFunction::iterator BBI(SwitchMBB);
12019 ++BBI;
12020 MachineBasicBlock *PeeledSwitchMBB =
12021 FuncInfo.MF->CreateMachineBasicBlock(BB: SwitchMBB->getBasicBlock());
12022 FuncInfo.MF->insert(MBBI: BBI, MBB: PeeledSwitchMBB);
12023
12024 ExportFromCurrentBlock(V: SI.getCondition());
12025 auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
12026 SwitchWorkListItem W = {.MBB: SwitchMBB, .FirstCluster: PeeledCaseIt, .LastCluster: PeeledCaseIt,
12027 .GE: nullptr, .LT: nullptr, .DefaultProb: TopCaseProb.getCompl()};
12028 lowerWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB: PeeledSwitchMBB);
12029
12030 Clusters.erase(position: PeeledCaseIt);
12031 for (CaseCluster &CC : Clusters) {
12032 LLVM_DEBUG(
12033 dbgs() << "Scale the probablity for one cluster, before scaling: "
12034 << CC.Prob << "\n");
12035 CC.Prob = scaleCaseProbality(CaseProb: CC.Prob, PeeledCaseProb: TopCaseProb);
12036 LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
12037 }
12038 PeeledCaseProb = TopCaseProb;
12039 return PeeledSwitchMBB;
12040}
12041
12042void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
12043 // Extract cases from the switch.
12044 BranchProbabilityInfo *BPI = FuncInfo.BPI;
12045 CaseClusterVector Clusters;
12046 Clusters.reserve(n: SI.getNumCases());
12047 for (auto I : SI.cases()) {
12048 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
12049 const ConstantInt *CaseVal = I.getCaseValue();
12050 BranchProbability Prob =
12051 BPI ? BPI->getEdgeProbability(Src: SI.getParent(), IndexInSuccessors: I.getSuccessorIndex())
12052 : BranchProbability(1, SI.getNumCases() + 1);
12053 Clusters.push_back(x: CaseCluster::range(Low: CaseVal, High: CaseVal, MBB: Succ, Prob));
12054 }
12055
12056 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
12057
12058 // Cluster adjacent cases with the same destination. We do this at all
12059 // optimization levels because it's cheap to do and will make codegen faster
12060 // if there are many clusters.
12061 sortAndRangeify(Clusters);
12062
12063 // The branch probablity of the peeled case.
12064 BranchProbability PeeledCaseProb = BranchProbability::getZero();
12065 MachineBasicBlock *PeeledSwitchMBB =
12066 peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
12067
12068 // If there is only the default destination, jump there directly.
12069 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
12070 if (Clusters.empty()) {
12071 assert(PeeledSwitchMBB == SwitchMBB);
12072 SwitchMBB->addSuccessor(Succ: DefaultMBB);
12073 if (DefaultMBB != NextBlock(MBB: SwitchMBB)) {
12074 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
12075 getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
12076 }
12077 return;
12078 }
12079
12080 SL->findJumpTables(Clusters, SI: &SI, SL: getCurSDLoc(), DefaultMBB, PSI: DAG.getPSI(),
12081 BFI: DAG.getBFI());
12082 SL->findBitTestClusters(Clusters, SI: &SI);
12083
12084 LLVM_DEBUG({
12085 dbgs() << "Case clusters: ";
12086 for (const CaseCluster &C : Clusters) {
12087 if (C.Kind == CC_JumpTable)
12088 dbgs() << "JT:";
12089 if (C.Kind == CC_BitTests)
12090 dbgs() << "BT:";
12091
12092 C.Low->getValue().print(dbgs(), true);
12093 if (C.Low != C.High) {
12094 dbgs() << '-';
12095 C.High->getValue().print(dbgs(), true);
12096 }
12097 dbgs() << ' ';
12098 }
12099 dbgs() << '\n';
12100 });
12101
12102 assert(!Clusters.empty());
12103 SwitchWorkList WorkList;
12104 CaseClusterIt First = Clusters.begin();
12105 CaseClusterIt Last = Clusters.end() - 1;
12106 auto DefaultProb = getEdgeProbability(Src: PeeledSwitchMBB, Dst: DefaultMBB);
12107 // Scale the branchprobability for DefaultMBB if the peel occurs and
12108 // DefaultMBB is not replaced.
12109 if (PeeledCaseProb != BranchProbability::getZero() &&
12110 DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
12111 DefaultProb = scaleCaseProbality(CaseProb: DefaultProb, PeeledCaseProb);
12112 WorkList.push_back(
12113 Elt: {.MBB: PeeledSwitchMBB, .FirstCluster: First, .LastCluster: Last, .GE: nullptr, .LT: nullptr, .DefaultProb: DefaultProb});
12114
12115 while (!WorkList.empty()) {
12116 SwitchWorkListItem W = WorkList.pop_back_val();
12117 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
12118
12119 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None &&
12120 !DefaultMBB->getParent()->getFunction().hasMinSize()) {
12121 // For optimized builds, lower large range as a balanced binary tree.
12122 splitWorkItem(WorkList, W, Cond: SI.getCondition(), SwitchMBB);
12123 continue;
12124 }
12125
12126 lowerWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB);
12127 }
12128}
12129
12130void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
12131 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12132 auto DL = getCurSDLoc();
12133 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
12134 setValue(V: &I, NewN: DAG.getStepVector(DL, ResVT: ResultVT));
12135}
12136
12137void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
12138 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12139 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
12140
12141 SDLoc DL = getCurSDLoc();
12142 SDValue V = getValue(V: I.getOperand(i_nocapture: 0));
12143 assert(VT == V.getValueType() && "Malformed vector.reverse!");
12144
12145 if (VT.isScalableVector()) {
12146 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT, Operand: V));
12147 return;
12148 }
12149
12150 // Use VECTOR_SHUFFLE for the fixed-length vector
12151 // to maintain existing behavior.
12152 SmallVector<int, 8> Mask;
12153 unsigned NumElts = VT.getVectorMinNumElements();
12154 for (unsigned i = 0; i != NumElts; ++i)
12155 Mask.push_back(Elt: NumElts - 1 - i);
12156
12157 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: V, N2: DAG.getUNDEF(VT), Mask));
12158}
12159
12160void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) {
12161 auto DL = getCurSDLoc();
12162 SDValue InVec = getValue(V: I.getOperand(i_nocapture: 0));
12163 EVT OutVT =
12164 InVec.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
12165
12166 unsigned OutNumElts = OutVT.getVectorMinNumElements();
12167
12168 // ISD Node needs the input vectors split into two equal parts
12169 SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: OutVT, N1: InVec,
12170 N2: DAG.getVectorIdxConstant(Val: 0, DL));
12171 SDValue Hi = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: OutVT, N1: InVec,
12172 N2: DAG.getVectorIdxConstant(Val: OutNumElts, DL));
12173
12174 // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
12175 // legalisation and combines.
12176 if (OutVT.isFixedLengthVector()) {
12177 SDValue Even = DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: Lo, N2: Hi,
12178 Mask: createStrideMask(Start: 0, Stride: 2, VF: OutNumElts));
12179 SDValue Odd = DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: Lo, N2: Hi,
12180 Mask: createStrideMask(Start: 1, Stride: 2, VF: OutNumElts));
12181 SDValue Res = DAG.getMergeValues(Ops: {Even, Odd}, dl: getCurSDLoc());
12182 setValue(V: &I, NewN: Res);
12183 return;
12184 }
12185
12186 SDValue Res = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL,
12187 VTList: DAG.getVTList(VT1: OutVT, VT2: OutVT), N1: Lo, N2: Hi);
12188 setValue(V: &I, NewN: Res);
12189}
12190
12191void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) {
12192 auto DL = getCurSDLoc();
12193 EVT InVT = getValue(V: I.getOperand(i_nocapture: 0)).getValueType();
12194 SDValue InVec0 = getValue(V: I.getOperand(i_nocapture: 0));
12195 SDValue InVec1 = getValue(V: I.getOperand(i_nocapture: 1));
12196 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12197 EVT OutVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
12198
12199 // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
12200 // legalisation and combines.
12201 if (OutVT.isFixedLengthVector()) {
12202 unsigned NumElts = InVT.getVectorMinNumElements();
12203 SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: InVec0, N2: InVec1);
12204 setValue(V: &I, NewN: DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: V, N2: DAG.getUNDEF(VT: OutVT),
12205 Mask: createInterleaveMask(VF: NumElts, NumVecs: 2)));
12206 return;
12207 }
12208
12209 SDValue Res = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL,
12210 VTList: DAG.getVTList(VT1: InVT, VT2: InVT), N1: InVec0, N2: InVec1);
12211 Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: Res.getValue(R: 0),
12212 N2: Res.getValue(R: 1));
12213 setValue(V: &I, NewN: Res);
12214}
12215
12216void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
12217 SmallVector<EVT, 4> ValueVTs;
12218 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty: I.getType(),
12219 ValueVTs);
12220 unsigned NumValues = ValueVTs.size();
12221 if (NumValues == 0) return;
12222
12223 SmallVector<SDValue, 4> Values(NumValues);
12224 SDValue Op = getValue(V: I.getOperand(i_nocapture: 0));
12225
12226 for (unsigned i = 0; i != NumValues; ++i)
12227 Values[i] = DAG.getNode(Opcode: ISD::FREEZE, DL: getCurSDLoc(), VT: ValueVTs[i],
12228 Operand: SDValue(Op.getNode(), Op.getResNo() + i));
12229
12230 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
12231 VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values));
12232}
12233
12234void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
12235 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12236 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
12237
12238 SDLoc DL = getCurSDLoc();
12239 SDValue V1 = getValue(V: I.getOperand(i_nocapture: 0));
12240 SDValue V2 = getValue(V: I.getOperand(i_nocapture: 1));
12241 int64_t Imm = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 2))->getSExtValue();
12242
12243 // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
12244 if (VT.isScalableVector()) {
12245 MVT IdxVT = TLI.getVectorIdxTy(DL: DAG.getDataLayout());
12246 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_SPLICE, DL, VT, N1: V1, N2: V2,
12247 N3: DAG.getConstant(Val: Imm, DL, VT: IdxVT)));
12248 return;
12249 }
12250
12251 unsigned NumElts = VT.getVectorNumElements();
12252
12253 uint64_t Idx = (NumElts + Imm) % NumElts;
12254
12255 // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
12256 SmallVector<int, 8> Mask;
12257 for (unsigned i = 0; i < NumElts; ++i)
12258 Mask.push_back(Elt: Idx + i);
12259 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask));
12260}
12261
12262// Consider the following MIR after SelectionDAG, which produces output in
12263// phyregs in the first case or virtregs in the second case.
12264//
12265// INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx
12266// %5:gr32 = COPY $ebx
12267// %6:gr32 = COPY $edx
12268// %1:gr32 = COPY %6:gr32
12269// %0:gr32 = COPY %5:gr32
12270//
12271// INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32
12272// %1:gr32 = COPY %6:gr32
12273// %0:gr32 = COPY %5:gr32
12274//
12275// Given %0, we'd like to return $ebx in the first case and %5 in the second.
12276// Given %1, we'd like to return $edx in the first case and %6 in the second.
12277//
12278// If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap
12279// to a single virtreg (such as %0). The remaining outputs monotonically
12280// increase in virtreg number from there. If a callbr has no outputs, then it
12281// should not have a corresponding callbr landingpad; in fact, the callbr
12282// landingpad would not even be able to refer to such a callbr.
12283static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
12284 MachineInstr *MI = MRI.def_begin(RegNo: Reg)->getParent();
12285 // There is definitely at least one copy.
12286 assert(MI->getOpcode() == TargetOpcode::COPY &&
12287 "start of copy chain MUST be COPY");
12288 Reg = MI->getOperand(i: 1).getReg();
12289 MI = MRI.def_begin(RegNo: Reg)->getParent();
12290 // There may be an optional second copy.
12291 if (MI->getOpcode() == TargetOpcode::COPY) {
12292 assert(Reg.isVirtual() && "expected COPY of virtual register");
12293 Reg = MI->getOperand(i: 1).getReg();
12294 assert(Reg.isPhysical() && "expected COPY of physical register");
12295 MI = MRI.def_begin(RegNo: Reg)->getParent();
12296 }
12297 // The start of the chain must be an INLINEASM_BR.
12298 assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
12299 "end of copy chain MUST be INLINEASM_BR");
12300 return Reg;
12301}
12302
12303// We must do this walk rather than the simpler
12304// setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
12305// otherwise we will end up with copies of virtregs only valid along direct
12306// edges.
12307void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
12308 SmallVector<EVT, 8> ResultVTs;
12309 SmallVector<SDValue, 8> ResultValues;
12310 const auto *CBR =
12311 cast<CallBrInst>(Val: I.getParent()->getUniquePredecessor()->getTerminator());
12312
12313 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12314 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
12315 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
12316
12317 unsigned InitialDef = FuncInfo.ValueMap[CBR];
12318 SDValue Chain = DAG.getRoot();
12319
12320 // Re-parse the asm constraints string.
12321 TargetLowering::AsmOperandInfoVector TargetConstraints =
12322 TLI.ParseConstraints(DL: DAG.getDataLayout(), TRI, Call: *CBR);
12323 for (auto &T : TargetConstraints) {
12324 SDISelAsmOperandInfo OpInfo(T);
12325 if (OpInfo.Type != InlineAsm::isOutput)
12326 continue;
12327
12328 // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the
12329 // individual constraint.
12330 TLI.ComputeConstraintToUse(OpInfo, Op: OpInfo.CallOperand, DAG: &DAG);
12331
12332 switch (OpInfo.ConstraintType) {
12333 case TargetLowering::C_Register:
12334 case TargetLowering::C_RegisterClass: {
12335 // Fill in OpInfo.AssignedRegs.Regs.
12336 getRegistersForValue(DAG, DL: getCurSDLoc(), OpInfo, RefOpInfo&: OpInfo);
12337
12338 // getRegistersForValue may produce 1 to many registers based on whether
12339 // the OpInfo.ConstraintVT is legal on the target or not.
12340 for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) {
12341 Register OriginalDef = FollowCopyChain(MRI, Reg: InitialDef++);
12342 if (Register::isPhysicalRegister(Reg: OriginalDef))
12343 FuncInfo.MBB->addLiveIn(PhysReg: OriginalDef);
12344 // Update the assigned registers to use the original defs.
12345 OpInfo.AssignedRegs.Regs[i] = OriginalDef;
12346 }
12347
12348 SDValue V = OpInfo.AssignedRegs.getCopyFromRegs(
12349 DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, V: CBR);
12350 ResultValues.push_back(Elt: V);
12351 ResultVTs.push_back(Elt: OpInfo.ConstraintVT);
12352 break;
12353 }
12354 case TargetLowering::C_Other: {
12355 SDValue Flag;
12356 SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Glue&: Flag, DL: getCurSDLoc(),
12357 OpInfo, DAG);
12358 ++InitialDef;
12359 ResultValues.push_back(Elt: V);
12360 ResultVTs.push_back(Elt: OpInfo.ConstraintVT);
12361 break;
12362 }
12363 default:
12364 break;
12365 }
12366 }
12367 SDValue V = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
12368 VTList: DAG.getVTList(VTs: ResultVTs), Ops: ResultValues);
12369 setValue(V: &I, NewN: V);
12370}
12371

source code of llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp