1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86CallingConv.h"
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86TargetMachine.h"
21#include "X86TargetObjectFile.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/ObjCARCUtil.h"
24#include "llvm/CodeGen/MachineJumpTableInfo.h"
25#include "llvm/CodeGen/MachineModuleInfo.h"
26#include "llvm/CodeGen/WinEHFuncInfo.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/IRBuilder.h"
29
30#define DEBUG_TYPE "x86-isel"
31
32using namespace llvm;
33
34STATISTIC(NumTailCalls, "Number of tail calls");
35
36/// Call this when the user attempts to do something unsupported, like
37/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38/// report_fatal_error, so calling code should attempt to recover without
39/// crashing.
40static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41 const char *Msg) {
42 MachineFunction &MF = DAG.getMachineFunction();
43 DAG.getContext()->diagnose(
44 DI: DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
45}
46
47/// Returns true if a CC can dynamically exclude a register from the list of
48/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49/// the return registers.
50static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
51 switch (CC) {
52 default:
53 return false;
54 case CallingConv::X86_RegCall:
55 case CallingConv::PreserveMost:
56 case CallingConv::PreserveAll:
57 return true;
58 }
59}
60
61/// Returns true if a CC can dynamically exclude a register from the list of
62/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63/// the parameters.
64static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
65 return CC == CallingConv::X86_RegCall;
66}
67
68static std::pair<MVT, unsigned>
69handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
70 const X86Subtarget &Subtarget) {
71 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72 // convention is one that uses k registers.
73 if (NumElts == 2)
74 return {MVT::v2i64, 1};
75 if (NumElts == 4)
76 return {MVT::v4i32, 1};
77 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
78 CC != CallingConv::Intel_OCL_BI)
79 return {MVT::v8i16, 1};
80 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
81 CC != CallingConv::Intel_OCL_BI)
82 return {MVT::v16i8, 1};
83 // v32i1 passes in ymm unless we have BWI and the calling convention is
84 // regcall.
85 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86 return {MVT::v32i8, 1};
87 // Split v64i1 vectors if we don't have v64i8 available.
88 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89 if (Subtarget.useAVX512Regs())
90 return {MVT::v64i8, 1};
91 return {MVT::v32i8, 2};
92 }
93
94 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96 NumElts > 64)
97 return {MVT::i8, NumElts};
98
99 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
100}
101
102MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
103 CallingConv::ID CC,
104 EVT VT) const {
105 if (VT.isVector()) {
106 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107 unsigned NumElts = VT.getVectorNumElements();
108
109 MVT RegisterVT;
110 unsigned NumRegisters;
111 std::tie(args&: RegisterVT, args&: NumRegisters) =
112 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114 return RegisterVT;
115 }
116
117 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118 return MVT::v8f16;
119 }
120
121 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123 !Subtarget.hasX87())
124 return MVT::i32;
125
126 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
127 return getRegisterTypeForCallingConv(Context, CC,
128 VT: VT.changeVectorElementType(MVT::EltVT: f16));
129
130 if (VT == MVT::bf16)
131 return MVT::f16;
132
133 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
134}
135
136unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
137 CallingConv::ID CC,
138 EVT VT) const {
139 if (VT.isVector()) {
140 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
141 unsigned NumElts = VT.getVectorNumElements();
142
143 MVT RegisterVT;
144 unsigned NumRegisters;
145 std::tie(args&: RegisterVT, args&: NumRegisters) =
146 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
147 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
148 return NumRegisters;
149 }
150
151 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
152 return 1;
153 }
154
155 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
156 // x87 is disabled.
157 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
158 if (VT == MVT::f64)
159 return 2;
160 if (VT == MVT::f80)
161 return 3;
162 }
163
164 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
165 return getNumRegistersForCallingConv(Context, CC,
166 VT: VT.changeVectorElementType(MVT::EltVT: f16));
167
168 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
169}
170
171unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
172 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
173 unsigned &NumIntermediates, MVT &RegisterVT) const {
174 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
175 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
176 Subtarget.hasAVX512() &&
177 (!isPowerOf2_32(Value: VT.getVectorNumElements()) ||
178 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
179 VT.getVectorNumElements() > 64)) {
180 RegisterVT = MVT::i8;
181 IntermediateVT = MVT::i1;
182 NumIntermediates = VT.getVectorNumElements();
183 return NumIntermediates;
184 }
185
186 // Split v64i1 vectors if we don't have v64i8 available.
187 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
188 CC != CallingConv::X86_RegCall) {
189 RegisterVT = MVT::v32i8;
190 IntermediateVT = MVT::v32i1;
191 NumIntermediates = 2;
192 return 2;
193 }
194
195 // Split vNbf16 vectors according to vNf16.
196 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
197 VT = VT.changeVectorElementType(MVT::EltVT: f16);
198
199 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
200 NumIntermediates, RegisterVT);
201}
202
203EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
204 LLVMContext& Context,
205 EVT VT) const {
206 if (!VT.isVector())
207 return MVT::i8;
208
209 if (Subtarget.hasAVX512()) {
210 // Figure out what this type will be legalized to.
211 EVT LegalVT = VT;
212 while (getTypeAction(Context, VT: LegalVT) != TypeLegal)
213 LegalVT = getTypeToTransformTo(Context, VT: LegalVT);
214
215 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
216 if (LegalVT.getSimpleVT().is512BitVector())
217 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
218
219 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
220 // If we legalized to less than a 512-bit vector, then we will use a vXi1
221 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
222 // vXi16/vXi8.
223 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
224 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
225 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
226 }
227 }
228
229 return VT.changeVectorElementTypeToInteger();
230}
231
232/// Helper for getByValTypeAlignment to determine
233/// the desired ByVal argument alignment.
234static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
235 if (MaxAlign == 16)
236 return;
237 if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
238 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
239 MaxAlign = Align(16);
240 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
241 Align EltAlign;
242 getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign);
243 if (EltAlign > MaxAlign)
244 MaxAlign = EltAlign;
245 } else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
246 for (auto *EltTy : STy->elements()) {
247 Align EltAlign;
248 getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign);
249 if (EltAlign > MaxAlign)
250 MaxAlign = EltAlign;
251 if (MaxAlign == 16)
252 break;
253 }
254 }
255}
256
257/// Return the desired alignment for ByVal aggregate
258/// function arguments in the caller parameter area. For X86, aggregates
259/// that contain SSE vectors are placed at 16-byte boundaries while the rest
260/// are at 4-byte boundaries.
261uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
262 const DataLayout &DL) const {
263 if (Subtarget.is64Bit()) {
264 // Max of 8 and alignment of type.
265 Align TyAlign = DL.getABITypeAlign(Ty);
266 if (TyAlign > 8)
267 return TyAlign.value();
268 return 8;
269 }
270
271 Align Alignment(4);
272 if (Subtarget.hasSSE1())
273 getMaxByValAlign(Ty, MaxAlign&: Alignment);
274 return Alignment.value();
275}
276
277/// It returns EVT::Other if the type should be determined using generic
278/// target-independent logic.
279/// For vector ops we check that the overall size isn't larger than our
280/// preferred vector width.
281EVT X86TargetLowering::getOptimalMemOpType(
282 const MemOp &Op, const AttributeList &FuncAttributes) const {
283 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
284 if (Op.size() >= 16 &&
285 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(AlignCheck: Align(16)))) {
286 // FIXME: Check if unaligned 64-byte accesses are slow.
287 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
288 (Subtarget.getPreferVectorWidth() >= 512)) {
289 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
290 }
291 // FIXME: Check if unaligned 32-byte accesses are slow.
292 if (Op.size() >= 32 && Subtarget.hasAVX() &&
293 Subtarget.useLight256BitInstructions()) {
294 // Although this isn't a well-supported type for AVX1, we'll let
295 // legalization and shuffle lowering produce the optimal codegen. If we
296 // choose an optimal type with a vector element larger than a byte,
297 // getMemsetStores() may create an intermediate splat (using an integer
298 // multiply) before we splat as a vector.
299 return MVT::v32i8;
300 }
301 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
302 return MVT::v16i8;
303 // TODO: Can SSE1 handle a byte vector?
304 // If we have SSE1 registers we should be able to use them.
305 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
306 (Subtarget.getPreferVectorWidth() >= 128))
307 return MVT::v4f32;
308 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
309 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
310 // Do not use f64 to lower memcpy if source is string constant. It's
311 // better to use i32 to avoid the loads.
312 // Also, do not use f64 to lower memset unless this is a memset of zeros.
313 // The gymnastics of splatting a byte value into an XMM register and then
314 // only using 8-byte stores (because this is a CPU with slow unaligned
315 // 16-byte accesses) makes that a loser.
316 return MVT::f64;
317 }
318 }
319 // This is a compromise. If we reach here, unaligned accesses may be slow on
320 // this target. However, creating smaller, aligned accesses could be even
321 // slower and would certainly be a lot more code.
322 if (Subtarget.is64Bit() && Op.size() >= 8)
323 return MVT::i64;
324 return MVT::i32;
325}
326
327bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
328 if (VT == MVT::f32)
329 return Subtarget.hasSSE1();
330 if (VT == MVT::f64)
331 return Subtarget.hasSSE2();
332 return true;
333}
334
335static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
336 return (8 * Alignment.value()) % SizeInBits == 0;
337}
338
339bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
340 if (isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
341 return true;
342 switch (VT.getSizeInBits()) {
343 default:
344 // 8-byte and under are always assumed to be fast.
345 return true;
346 case 128:
347 return !Subtarget.isUnalignedMem16Slow();
348 case 256:
349 return !Subtarget.isUnalignedMem32Slow();
350 // TODO: What about AVX-512 (512-bit) accesses?
351 }
352}
353
354bool X86TargetLowering::allowsMisalignedMemoryAccesses(
355 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
356 unsigned *Fast) const {
357 if (Fast)
358 *Fast = isMemoryAccessFast(VT, Alignment);
359 // NonTemporal vector memory ops must be aligned.
360 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
361 // NT loads can only be vector aligned, so if its less aligned than the
362 // minimum vector size (which we can split the vector down to), we might as
363 // well use a regular unaligned vector load.
364 // We don't have any NT loads pre-SSE41.
365 if (!!(Flags & MachineMemOperand::MOLoad))
366 return (Alignment < 16 || !Subtarget.hasSSE41());
367 return false;
368 }
369 // Misaligned accesses of any size are always allowed.
370 return true;
371}
372
373bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
374 const DataLayout &DL, EVT VT,
375 unsigned AddrSpace, Align Alignment,
376 MachineMemOperand::Flags Flags,
377 unsigned *Fast) const {
378 if (Fast)
379 *Fast = isMemoryAccessFast(VT, Alignment);
380 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
381 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
382 /*Fast=*/nullptr))
383 return true;
384 // NonTemporal vector memory ops are special, and must be aligned.
385 if (!isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
386 return false;
387 switch (VT.getSizeInBits()) {
388 case 128:
389 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
390 return true;
391 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
392 return true;
393 return false;
394 case 256:
395 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
396 return true;
397 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
398 return true;
399 return false;
400 case 512:
401 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
402 return true;
403 return false;
404 default:
405 return false; // Don't have NonTemporal vector memory ops of this size.
406 }
407 }
408 return true;
409}
410
411/// Return the entry encoding for a jump table in the
412/// current function. The returned value is a member of the
413/// MachineJumpTableInfo::JTEntryKind enum.
414unsigned X86TargetLowering::getJumpTableEncoding() const {
415 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
416 // symbol.
417 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
418 return MachineJumpTableInfo::EK_Custom32;
419 if (isPositionIndependent() &&
420 getTargetMachine().getCodeModel() == CodeModel::Large)
421 return MachineJumpTableInfo::EK_LabelDifference64;
422
423 // Otherwise, use the normal jump table encoding heuristics.
424 return TargetLowering::getJumpTableEncoding();
425}
426
427bool X86TargetLowering::useSoftFloat() const {
428 return Subtarget.useSoftFloat();
429}
430
431void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
432 ArgListTy &Args) const {
433
434 // Only relabel X86-32 for C / Stdcall CCs.
435 if (Subtarget.is64Bit())
436 return;
437 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
438 return;
439 unsigned ParamRegs = 0;
440 if (auto *M = MF->getFunction().getParent())
441 ParamRegs = M->getNumberRegisterParameters();
442
443 // Mark the first N int arguments as having reg
444 for (auto &Arg : Args) {
445 Type *T = Arg.Ty;
446 if (T->isIntOrPtrTy())
447 if (MF->getDataLayout().getTypeAllocSize(Ty: T) <= 8) {
448 unsigned numRegs = 1;
449 if (MF->getDataLayout().getTypeAllocSize(Ty: T) > 4)
450 numRegs = 2;
451 if (ParamRegs < numRegs)
452 return;
453 ParamRegs -= numRegs;
454 Arg.IsInReg = true;
455 }
456 }
457}
458
459const MCExpr *
460X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
461 const MachineBasicBlock *MBB,
462 unsigned uid,MCContext &Ctx) const{
463 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
464 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
465 // entries.
466 return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(),
467 Kind: MCSymbolRefExpr::VK_GOTOFF, Ctx);
468}
469
470/// Returns relocation base for the given PIC jumptable.
471SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
472 SelectionDAG &DAG) const {
473 if (!Subtarget.is64Bit())
474 // This doesn't have SDLoc associated with it, but is not really the
475 // same as a Register.
476 return DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc(),
477 VT: getPointerTy(DL: DAG.getDataLayout()));
478 return Table;
479}
480
481/// This returns the relocation base for the given PIC jumptable,
482/// the same as getPICJumpTableRelocBase, but as an MCExpr.
483const MCExpr *X86TargetLowering::
484getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
485 MCContext &Ctx) const {
486 // X86-64 uses RIP relative addressing based on the jump table label.
487 if (Subtarget.isPICStyleRIPRel() ||
488 (Subtarget.is64Bit() &&
489 getTargetMachine().getCodeModel() == CodeModel::Large))
490 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
491
492 // Otherwise, the reference is relative to the PIC base.
493 return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
494}
495
496std::pair<const TargetRegisterClass *, uint8_t>
497X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
498 MVT VT) const {
499 const TargetRegisterClass *RRC = nullptr;
500 uint8_t Cost = 1;
501 switch (VT.SimpleTy) {
502 default:
503 return TargetLowering::findRepresentativeClass(TRI, VT);
504 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
505 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
506 break;
507 case MVT::x86mmx:
508 RRC = &X86::VR64RegClass;
509 break;
510 case MVT::f32: case MVT::f64:
511 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
512 case MVT::v4f32: case MVT::v2f64:
513 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
514 case MVT::v8f32: case MVT::v4f64:
515 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
516 case MVT::v16f32: case MVT::v8f64:
517 RRC = &X86::VR128XRegClass;
518 break;
519 }
520 return std::make_pair(x&: RRC, y&: Cost);
521}
522
523unsigned X86TargetLowering::getAddressSpace() const {
524 if (Subtarget.is64Bit())
525 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
526 return 256;
527}
528
529static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
530 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
531 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(Major: 17));
532}
533
534static Constant* SegmentOffset(IRBuilderBase &IRB,
535 int Offset, unsigned AddressSpace) {
536 return ConstantExpr::getIntToPtr(
537 C: ConstantInt::get(Ty: Type::getInt32Ty(C&: IRB.getContext()), V: Offset),
538 Ty: IRB.getPtrTy(AddrSpace: AddressSpace));
539}
540
541Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
542 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
543 // tcbhead_t; use it instead of the usual global variable (see
544 // sysdeps/{i386,x86_64}/nptl/tls.h)
545 if (hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple())) {
546 unsigned AddressSpace = getAddressSpace();
547
548 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
549 if (Subtarget.isTargetFuchsia())
550 return SegmentOffset(IRB, Offset: 0x10, AddressSpace);
551
552 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
553 // Specially, some users may customize the base reg and offset.
554 int Offset = M->getStackProtectorGuardOffset();
555 // If we don't set -stack-protector-guard-offset value:
556 // %fs:0x28, unless we're using a Kernel code model, in which case
557 // it's %gs:0x28. gs:0x14 on i386.
558 if (Offset == INT_MAX)
559 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
560
561 StringRef GuardReg = M->getStackProtectorGuardReg();
562 if (GuardReg == "fs")
563 AddressSpace = X86AS::FS;
564 else if (GuardReg == "gs")
565 AddressSpace = X86AS::GS;
566
567 // Use symbol guard if user specify.
568 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
569 if (!GuardSymb.empty()) {
570 GlobalVariable *GV = M->getGlobalVariable(Name: GuardSymb);
571 if (!GV) {
572 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(C&: M->getContext())
573 : Type::getInt32Ty(C&: M->getContext());
574 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
575 nullptr, GuardSymb, nullptr,
576 GlobalValue::NotThreadLocal, AddressSpace);
577 if (!Subtarget.isTargetDarwin())
578 GV->setDSOLocal(M->getDirectAccessExternalData());
579 }
580 return GV;
581 }
582
583 return SegmentOffset(IRB, Offset, AddressSpace);
584 }
585 return TargetLowering::getIRStackGuard(IRB);
586}
587
588void X86TargetLowering::insertSSPDeclarations(Module &M) const {
589 // MSVC CRT provides functionalities for stack protection.
590 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
591 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
592 // MSVC CRT has a global variable holding security cookie.
593 M.getOrInsertGlobal(Name: "__security_cookie",
594 Ty: PointerType::getUnqual(C&: M.getContext()));
595
596 // MSVC CRT has a function to validate security cookie.
597 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
598 Name: "__security_check_cookie", RetTy: Type::getVoidTy(C&: M.getContext()),
599 Args: PointerType::getUnqual(C&: M.getContext()));
600 if (Function *F = dyn_cast<Function>(Val: SecurityCheckCookie.getCallee())) {
601 F->setCallingConv(CallingConv::X86_FastCall);
602 F->addParamAttr(0, Attribute::AttrKind::InReg);
603 }
604 return;
605 }
606
607 StringRef GuardMode = M.getStackProtectorGuard();
608
609 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
610 if ((GuardMode == "tls" || GuardMode.empty()) &&
611 hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple()))
612 return;
613 TargetLowering::insertSSPDeclarations(M);
614}
615
616Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
617 // MSVC CRT has a global variable holding security cookie.
618 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
619 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
620 return M.getGlobalVariable(Name: "__security_cookie");
621 }
622 return TargetLowering::getSDagStackGuard(M);
623}
624
625Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
626 // MSVC CRT has a function to validate security cookie.
627 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
628 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
629 return M.getFunction(Name: "__security_check_cookie");
630 }
631 return TargetLowering::getSSPStackGuardCheck(M);
632}
633
634Value *
635X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
636 // Android provides a fixed TLS slot for the SafeStack pointer. See the
637 // definition of TLS_SLOT_SAFESTACK in
638 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
639 if (Subtarget.isTargetAndroid()) {
640 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
641 // %gs:0x24 on i386
642 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
643 return SegmentOffset(IRB, Offset, AddressSpace: getAddressSpace());
644 }
645
646 // Fuchsia is similar.
647 if (Subtarget.isTargetFuchsia()) {
648 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
649 return SegmentOffset(IRB, Offset: 0x18, AddressSpace: getAddressSpace());
650 }
651
652 return TargetLowering::getSafeStackPointerLocation(IRB);
653}
654
655//===----------------------------------------------------------------------===//
656// Return Value Calling Convention Implementation
657//===----------------------------------------------------------------------===//
658
659bool X86TargetLowering::CanLowerReturn(
660 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
661 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
662 SmallVector<CCValAssign, 16> RVLocs;
663 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
664 return CCInfo.CheckReturn(Outs, Fn: RetCC_X86);
665}
666
667const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
668 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
669 return ScratchRegs;
670}
671
672ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
673 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
674 return RCRegs;
675}
676
677/// Lowers masks values (v*i1) to the local register values
678/// \returns DAG node after lowering to register type
679static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
680 const SDLoc &DL, SelectionDAG &DAG) {
681 EVT ValVT = ValArg.getValueType();
682
683 if (ValVT == MVT::v1i1)
684 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ValLoc, N1: ValArg,
685 N2: DAG.getIntPtrConstant(Val: 0, DL));
686
687 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
688 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
689 // Two stage lowering might be required
690 // bitcast: v8i1 -> i8 / v16i1 -> i16
691 // anyextend: i8 -> i32 / i16 -> i32
692 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
693 SDValue ValToCopy = DAG.getBitcast(VT: TempValLoc, V: ValArg);
694 if (ValLoc == MVT::i32)
695 ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValToCopy);
696 return ValToCopy;
697 }
698
699 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
700 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
701 // One stage lowering is required
702 // bitcast: v32i1 -> i32 / v64i1 -> i64
703 return DAG.getBitcast(VT: ValLoc, V: ValArg);
704 }
705
706 return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValArg);
707}
708
709/// Breaks v64i1 value into two registers and adds the new node to the DAG
710static void Passv64i1ArgInRegs(
711 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
712 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
713 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
714 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
715 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
716 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
717 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
718 "The value should reside in two registers");
719
720 // Before splitting the value we cast it to i64
721 Arg = DAG.getBitcast(MVT::i64, Arg);
722
723 // Splitting the value into two i32 types
724 SDValue Lo, Hi;
725 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
726
727 // Attach the two i32 types into corresponding registers
728 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Lo));
729 RegsToPass.push_back(Elt: std::make_pair(x: NextVA.getLocReg(), y&: Hi));
730}
731
732SDValue
733X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
734 bool isVarArg,
735 const SmallVectorImpl<ISD::OutputArg> &Outs,
736 const SmallVectorImpl<SDValue> &OutVals,
737 const SDLoc &dl, SelectionDAG &DAG) const {
738 MachineFunction &MF = DAG.getMachineFunction();
739 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
740
741 // In some cases we need to disable registers from the default CSR list.
742 // For example, when they are used as return registers (preserve_* and X86's
743 // regcall) or for argument passing (X86's regcall).
744 bool ShouldDisableCalleeSavedRegister =
745 shouldDisableRetRegFromCSR(CC: CallConv) ||
746 MF.getFunction().hasFnAttribute(Kind: "no_caller_saved_registers");
747
748 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
749 report_fatal_error(reason: "X86 interrupts may not return any value");
750
751 SmallVector<CCValAssign, 16> RVLocs;
752 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
753 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_X86);
754
755 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
756 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
757 ++I, ++OutsIndex) {
758 CCValAssign &VA = RVLocs[I];
759 assert(VA.isRegLoc() && "Can only return in registers!");
760
761 // Add the register to the CalleeSaveDisableRegs list.
762 if (ShouldDisableCalleeSavedRegister)
763 MF.getRegInfo().disableCalleeSavedRegister(Reg: VA.getLocReg());
764
765 SDValue ValToCopy = OutVals[OutsIndex];
766 EVT ValVT = ValToCopy.getValueType();
767
768 // Promote values to the appropriate types.
769 if (VA.getLocInfo() == CCValAssign::SExt)
770 ValToCopy = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
771 else if (VA.getLocInfo() == CCValAssign::ZExt)
772 ValToCopy = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
773 else if (VA.getLocInfo() == CCValAssign::AExt) {
774 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
775 ValToCopy = lowerMasksToReg(ValArg: ValToCopy, ValLoc: VA.getLocVT(), DL: dl, DAG);
776 else
777 ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
778 }
779 else if (VA.getLocInfo() == CCValAssign::BCvt)
780 ValToCopy = DAG.getBitcast(VT: VA.getLocVT(), V: ValToCopy);
781
782 assert(VA.getLocInfo() != CCValAssign::FPExt &&
783 "Unexpected FP-extend for return value.");
784
785 // Report an error if we have attempted to return a value via an XMM
786 // register and SSE was disabled.
787 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
788 errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
789 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
790 } else if (!Subtarget.hasSSE2() &&
791 X86::FR64XRegClass.contains(VA.getLocReg()) &&
792 ValVT == MVT::f64) {
793 // When returning a double via an XMM register, report an error if SSE2 is
794 // not enabled.
795 errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
796 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
797 }
798
799 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
800 // the RET instruction and handled by the FP Stackifier.
801 if (VA.getLocReg() == X86::FP0 ||
802 VA.getLocReg() == X86::FP1) {
803 // If this is a copy from an xmm register to ST(0), use an FPExtend to
804 // change the value to the FP stack register class.
805 if (isScalarFPTypeInSSEReg(VA.getValVT()))
806 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
807 RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
808 // Don't emit a copytoreg.
809 continue;
810 }
811
812 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
813 // which is returned in RAX / RDX.
814 if (Subtarget.is64Bit()) {
815 if (ValVT == MVT::x86mmx) {
816 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
817 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
818 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
819 ValToCopy);
820 // If we don't have SSE2 available, convert to v4f32 so the generated
821 // register is legal.
822 if (!Subtarget.hasSSE2())
823 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
824 }
825 }
826 }
827
828 if (VA.needsCustom()) {
829 assert(VA.getValVT() == MVT::v64i1 &&
830 "Currently the only custom case is when we split v64i1 to 2 regs");
831
832 Passv64i1ArgInRegs(DL: dl, DAG, Arg&: ValToCopy, RegsToPass&: RetVals, VA, NextVA&: RVLocs[++I],
833 Subtarget);
834
835 // Add the second register to the CalleeSaveDisableRegs list.
836 if (ShouldDisableCalleeSavedRegister)
837 MF.getRegInfo().disableCalleeSavedRegister(Reg: RVLocs[I].getLocReg());
838 } else {
839 RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
840 }
841 }
842
843 SDValue Glue;
844 SmallVector<SDValue, 6> RetOps;
845 RetOps.push_back(Elt: Chain); // Operand #0 = Chain (updated below)
846 // Operand #1 = Bytes To Pop
847 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
848 MVT::i32));
849
850 // Copy the result values into the output registers.
851 for (auto &RetVal : RetVals) {
852 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
853 RetOps.push_back(Elt: RetVal.second);
854 continue; // Don't emit a copytoreg.
855 }
856
857 Chain = DAG.getCopyToReg(Chain, dl, Reg: RetVal.first, N: RetVal.second, Glue);
858 Glue = Chain.getValue(R: 1);
859 RetOps.push_back(
860 Elt: DAG.getRegister(Reg: RetVal.first, VT: RetVal.second.getValueType()));
861 }
862
863 // Swift calling convention does not require we copy the sret argument
864 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
865
866 // All x86 ABIs require that for returning structs by value we copy
867 // the sret argument into %rax/%eax (depending on ABI) for the return.
868 // We saved the argument into a virtual register in the entry block,
869 // so now we copy the value out and into %rax/%eax.
870 //
871 // Checking Function.hasStructRetAttr() here is insufficient because the IR
872 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
873 // false, then an sret argument may be implicitly inserted in the SelDAG. In
874 // either case FuncInfo->setSRetReturnReg() will have been called.
875 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
876 // When we have both sret and another return value, we should use the
877 // original Chain stored in RetOps[0], instead of the current Chain updated
878 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
879
880 // For the case of sret and another return value, we have
881 // Chain_0 at the function entry
882 // Chain_1 = getCopyToReg(Chain_0) in the above loop
883 // If we use Chain_1 in getCopyFromReg, we will have
884 // Val = getCopyFromReg(Chain_1)
885 // Chain_2 = getCopyToReg(Chain_1, Val) from below
886
887 // getCopyToReg(Chain_0) will be glued together with
888 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
889 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
890 // Data dependency from Unit B to Unit A due to usage of Val in
891 // getCopyToReg(Chain_1, Val)
892 // Chain dependency from Unit A to Unit B
893
894 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
895 SDValue Val = DAG.getCopyFromReg(Chain: RetOps[0], dl, Reg: SRetReg,
896 VT: getPointerTy(DL: MF.getDataLayout()));
897
898 Register RetValReg
899 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
900 X86::RAX : X86::EAX;
901 Chain = DAG.getCopyToReg(Chain, dl, Reg: RetValReg, N: Val, Glue);
902 Glue = Chain.getValue(R: 1);
903
904 // RAX/EAX now acts like a return value.
905 RetOps.push_back(
906 Elt: DAG.getRegister(Reg: RetValReg, VT: getPointerTy(DL: DAG.getDataLayout())));
907
908 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
909 // this however for preserve_most/preserve_all to minimize the number of
910 // callee-saved registers for these CCs.
911 if (ShouldDisableCalleeSavedRegister &&
912 CallConv != CallingConv::PreserveAll &&
913 CallConv != CallingConv::PreserveMost)
914 MF.getRegInfo().disableCalleeSavedRegister(Reg: RetValReg);
915 }
916
917 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
918 const MCPhysReg *I =
919 TRI->getCalleeSavedRegsViaCopy(MF: &DAG.getMachineFunction());
920 if (I) {
921 for (; *I; ++I) {
922 if (X86::GR64RegClass.contains(*I))
923 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
924 else
925 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
926 }
927 }
928
929 RetOps[0] = Chain; // Update chain.
930
931 // Add the glue if we have it.
932 if (Glue.getNode())
933 RetOps.push_back(Elt: Glue);
934
935 X86ISD::NodeType opcode = X86ISD::RET_GLUE;
936 if (CallConv == CallingConv::X86_INTR)
937 opcode = X86ISD::IRET;
938 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
939}
940
941bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
942 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(NUses: 1, Value: 0))
943 return false;
944
945 SDValue TCChain = Chain;
946 SDNode *Copy = *N->use_begin();
947 if (Copy->getOpcode() == ISD::CopyToReg) {
948 // If the copy has a glue operand, we conservatively assume it isn't safe to
949 // perform a tail call.
950 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
951 return false;
952 TCChain = Copy->getOperand(Num: 0);
953 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
954 return false;
955
956 bool HasRet = false;
957 for (const SDNode *U : Copy->uses()) {
958 if (U->getOpcode() != X86ISD::RET_GLUE)
959 return false;
960 // If we are returning more than one value, we can definitely
961 // not make a tail call see PR19530
962 if (U->getNumOperands() > 4)
963 return false;
964 if (U->getNumOperands() == 4 &&
965 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
966 return false;
967 HasRet = true;
968 }
969
970 if (!HasRet)
971 return false;
972
973 Chain = TCChain;
974 return true;
975}
976
977EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
978 ISD::NodeType ExtendKind) const {
979 MVT ReturnMVT = MVT::i32;
980
981 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
982 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
983 // The ABI does not require i1, i8 or i16 to be extended.
984 //
985 // On Darwin, there is code in the wild relying on Clang's old behaviour of
986 // always extending i8/i16 return values, so keep doing that for now.
987 // (PR26665).
988 ReturnMVT = MVT::i8;
989 }
990
991 EVT MinVT = getRegisterType(Context, VT: ReturnMVT);
992 return VT.bitsLT(VT: MinVT) ? MinVT : VT;
993}
994
995/// Reads two 32 bit registers and creates a 64 bit mask value.
996/// \param VA The current 32 bit value that need to be assigned.
997/// \param NextVA The next 32 bit value that need to be assigned.
998/// \param Root The parent DAG node.
999/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1000/// glue purposes. In the case the DAG is already using
1001/// physical register instead of virtual, we should glue
1002/// our new SDValue to InGlue SDvalue.
1003/// \return a new SDvalue of size 64bit.
1004static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1005 SDValue &Root, SelectionDAG &DAG,
1006 const SDLoc &DL, const X86Subtarget &Subtarget,
1007 SDValue *InGlue = nullptr) {
1008 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1009 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1010 assert(VA.getValVT() == MVT::v64i1 &&
1011 "Expecting first location of 64 bit width type");
1012 assert(NextVA.getValVT() == VA.getValVT() &&
1013 "The locations should have the same type");
1014 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1015 "The values should reside in two registers");
1016
1017 SDValue Lo, Hi;
1018 SDValue ArgValueLo, ArgValueHi;
1019
1020 MachineFunction &MF = DAG.getMachineFunction();
1021 const TargetRegisterClass *RC = &X86::GR32RegClass;
1022
1023 // Read a 32 bit value from the registers.
1024 if (nullptr == InGlue) {
1025 // When no physical register is present,
1026 // create an intermediate virtual register.
1027 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1028 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1029 Reg = MF.addLiveIn(PReg: NextVA.getLocReg(), RC);
1030 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1031 } else {
1032 // When a physical register is available read the value from it and glue
1033 // the reads together.
1034 ArgValueLo =
1035 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1036 *InGlue = ArgValueLo.getValue(R: 2);
1037 ArgValueHi =
1038 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1039 *InGlue = ArgValueHi.getValue(R: 2);
1040 }
1041
1042 // Convert the i32 type into v32i1 type.
1043 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1044
1045 // Convert the i32 type into v32i1 type.
1046 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1047
1048 // Concatenate the two values together.
1049 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1050}
1051
1052/// The function will lower a register of various sizes (8/16/32/64)
1053/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1054/// \returns a DAG node contains the operand after lowering to mask type.
1055static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1056 const EVT &ValLoc, const SDLoc &DL,
1057 SelectionDAG &DAG) {
1058 SDValue ValReturned = ValArg;
1059
1060 if (ValVT == MVT::v1i1)
1061 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1062
1063 if (ValVT == MVT::v64i1) {
1064 // In 32 bit machine, this case is handled by getv64i1Argument
1065 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1066 // In 64 bit machine, There is no need to truncate the value only bitcast
1067 } else {
1068 MVT MaskLenVT;
1069 switch (ValVT.getSimpleVT().SimpleTy) {
1070 case MVT::v8i1:
1071 MaskLenVT = MVT::i8;
1072 break;
1073 case MVT::v16i1:
1074 MaskLenVT = MVT::i16;
1075 break;
1076 case MVT::v32i1:
1077 MaskLenVT = MVT::i32;
1078 break;
1079 default:
1080 llvm_unreachable("Expecting a vector of i1 types");
1081 }
1082
1083 ValReturned = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaskLenVT, Operand: ValReturned);
1084 }
1085 return DAG.getBitcast(VT: ValVT, V: ValReturned);
1086}
1087
1088/// Lower the result values of a call into the
1089/// appropriate copies out of appropriate physical registers.
1090///
1091SDValue X86TargetLowering::LowerCallResult(
1092 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1093 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1094 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1095 uint32_t *RegMask) const {
1096
1097 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1098 // Assign locations to each value returned by this call.
1099 SmallVector<CCValAssign, 16> RVLocs;
1100 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1101 *DAG.getContext());
1102 CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
1103
1104 // Copy all of the result registers out of their specified physreg.
1105 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1106 ++I, ++InsIndex) {
1107 CCValAssign &VA = RVLocs[I];
1108 EVT CopyVT = VA.getLocVT();
1109
1110 // In some calling conventions we need to remove the used registers
1111 // from the register mask.
1112 if (RegMask) {
1113 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1114 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1115 }
1116
1117 // Report an error if there was an attempt to return FP values via XMM
1118 // registers.
1119 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1120 errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
1121 if (VA.getLocReg() == X86::XMM1)
1122 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1123 else
1124 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1125 } else if (!Subtarget.hasSSE2() &&
1126 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1127 CopyVT == MVT::f64) {
1128 errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
1129 if (VA.getLocReg() == X86::XMM1)
1130 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1131 else
1132 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1133 }
1134
1135 // If we prefer to use the value in xmm registers, copy it out as f80 and
1136 // use a truncate to move it from fp stack reg to xmm reg.
1137 bool RoundAfterCopy = false;
1138 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1139 isScalarFPTypeInSSEReg(VA.getValVT())) {
1140 if (!Subtarget.hasX87())
1141 report_fatal_error(reason: "X87 register return with X87 disabled");
1142 CopyVT = MVT::f80;
1143 RoundAfterCopy = (CopyVT != VA.getLocVT());
1144 }
1145
1146 SDValue Val;
1147 if (VA.needsCustom()) {
1148 assert(VA.getValVT() == MVT::v64i1 &&
1149 "Currently the only custom case is when we split v64i1 to 2 regs");
1150 Val =
1151 getv64i1Argument(VA, NextVA&: RVLocs[++I], Root&: Chain, DAG, DL: dl, Subtarget, InGlue: &InGlue);
1152 } else {
1153 Chain = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: CopyVT, Glue: InGlue)
1154 .getValue(R: 1);
1155 Val = Chain.getValue(R: 0);
1156 InGlue = Chain.getValue(R: 2);
1157 }
1158
1159 if (RoundAfterCopy)
1160 Val = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: VA.getValVT(), N1: Val,
1161 // This truncation won't change the value.
1162 N2: DAG.getIntPtrConstant(Val: 1, DL: dl, /*isTarget=*/true));
1163
1164 if (VA.isExtInLoc()) {
1165 if (VA.getValVT().isVector() &&
1166 VA.getValVT().getScalarType() == MVT::i1 &&
1167 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1168 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1169 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1170 Val = lowerRegToMasks(ValArg: Val, ValVT: VA.getValVT(), ValLoc: VA.getLocVT(), DL: dl, DAG);
1171 } else
1172 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
1173 }
1174
1175 if (VA.getLocInfo() == CCValAssign::BCvt)
1176 Val = DAG.getBitcast(VT: VA.getValVT(), V: Val);
1177
1178 InVals.push_back(Elt: Val);
1179 }
1180
1181 return Chain;
1182}
1183
1184//===----------------------------------------------------------------------===//
1185// C & StdCall & Fast Calling Convention implementation
1186//===----------------------------------------------------------------------===//
1187// StdCall calling convention seems to be standard for many Windows' API
1188// routines and around. It differs from C calling convention just a little:
1189// callee should clean up the stack, not caller. Symbols should be also
1190// decorated in some fancy way :) It doesn't support any vector arguments.
1191// For info on fast calling convention see Fast Calling Convention (tail call)
1192// implementation LowerX86_32FastCCCallTo.
1193
1194/// Determines whether Args, either a set of outgoing arguments to a call, or a
1195/// set of incoming args of a call, contains an sret pointer that the callee
1196/// pops
1197template <typename T>
1198static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1199 const X86Subtarget &Subtarget) {
1200 // Not C++20 (yet), so no concepts available.
1201 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1202 std::is_same_v<T, ISD::InputArg>,
1203 "requires ISD::OutputArg or ISD::InputArg");
1204
1205 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1206 // for most compilations.
1207 if (!Subtarget.is32Bit())
1208 return false;
1209
1210 if (Args.empty())
1211 return false;
1212
1213 // Most calls do not have an sret argument, check the arg next.
1214 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1215 if (!Flags.isSRet() || Flags.isInReg())
1216 return false;
1217
1218 // The MSVCabi does not pop the sret.
1219 if (Subtarget.getTargetTriple().isOSMSVCRT())
1220 return false;
1221
1222 // MCUs don't pop the sret
1223 if (Subtarget.isTargetMCU())
1224 return false;
1225
1226 // Callee pops argument
1227 return true;
1228}
1229
1230/// Make a copy of an aggregate at address specified by "Src" to address
1231/// "Dst" with size and alignment information specified by the specific
1232/// parameter attribute. The copy will be passed as a byval function parameter.
1233static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1234 SDValue Chain, ISD::ArgFlagsTy Flags,
1235 SelectionDAG &DAG, const SDLoc &dl) {
1236 SDValue SizeNode = DAG.getIntPtrConstant(Val: Flags.getByValSize(), DL: dl);
1237
1238 return DAG.getMemcpy(
1239 Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(),
1240 /*isVolatile*/ isVol: false, /*AlwaysInline=*/true,
1241 /*isTailCall*/ false, DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
1242}
1243
1244/// Return true if the calling convention is one that we can guarantee TCO for.
1245static bool canGuaranteeTCO(CallingConv::ID CC) {
1246 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1247 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
1248 CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
1249}
1250
1251/// Return true if we might ever do TCO for calls with this calling convention.
1252static bool mayTailCallThisCC(CallingConv::ID CC) {
1253 switch (CC) {
1254 // C calling conventions:
1255 case CallingConv::C:
1256 case CallingConv::Win64:
1257 case CallingConv::X86_64_SysV:
1258 case CallingConv::PreserveNone:
1259 // Callee pop conventions:
1260 case CallingConv::X86_ThisCall:
1261 case CallingConv::X86_StdCall:
1262 case CallingConv::X86_VectorCall:
1263 case CallingConv::X86_FastCall:
1264 // Swift:
1265 case CallingConv::Swift:
1266 return true;
1267 default:
1268 return canGuaranteeTCO(CC);
1269 }
1270}
1271
1272/// Return true if the function is being made into a tailcall target by
1273/// changing its ABI.
1274static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1275 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1276 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
1277}
1278
1279bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1280 if (!CI->isTailCall())
1281 return false;
1282
1283 CallingConv::ID CalleeCC = CI->getCallingConv();
1284 if (!mayTailCallThisCC(CC: CalleeCC))
1285 return false;
1286
1287 return true;
1288}
1289
1290SDValue
1291X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1292 const SmallVectorImpl<ISD::InputArg> &Ins,
1293 const SDLoc &dl, SelectionDAG &DAG,
1294 const CCValAssign &VA,
1295 MachineFrameInfo &MFI, unsigned i) const {
1296 // Create the nodes corresponding to a load from this parameter slot.
1297 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1298 bool AlwaysUseMutable = shouldGuaranteeTCO(
1299 CC: CallConv, GuaranteedTailCallOpt: DAG.getTarget().Options.GuaranteedTailCallOpt);
1300 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1301 EVT ValVT;
1302 MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1303
1304 // If value is passed by pointer we have address passed instead of the value
1305 // itself. No need to extend if the mask value and location share the same
1306 // absolute size.
1307 bool ExtendedInMem =
1308 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1309 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1310
1311 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1312 ValVT = VA.getLocVT();
1313 else
1314 ValVT = VA.getValVT();
1315
1316 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1317 // changed with more analysis.
1318 // In case of tail call optimization mark all arguments mutable. Since they
1319 // could be overwritten by lowering of arguments in case of a tail call.
1320 if (Flags.isByVal()) {
1321 unsigned Bytes = Flags.getByValSize();
1322 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1323
1324 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1325 // can be improved with deeper analysis.
1326 int FI = MFI.CreateFixedObject(Size: Bytes, SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable,
1327 /*isAliased=*/true);
1328 return DAG.getFrameIndex(FI, VT: PtrVT);
1329 }
1330
1331 EVT ArgVT = Ins[i].ArgVT;
1332
1333 // If this is a vector that has been split into multiple parts, don't elide
1334 // the copy. The layout on the stack may not match the packed in-memory
1335 // layout.
1336 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1337
1338 // This is an argument in memory. We might be able to perform copy elision.
1339 // If the argument is passed directly in memory without any extension, then we
1340 // can perform copy elision. Large vector types, for example, may be passed
1341 // indirectly by pointer.
1342 if (Flags.isCopyElisionCandidate() &&
1343 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1344 !ScalarizedVector) {
1345 SDValue PartAddr;
1346 if (Ins[i].PartOffset == 0) {
1347 // If this is a one-part value or the first part of a multi-part value,
1348 // create a stack object for the entire argument value type and return a
1349 // load from our portion of it. This assumes that if the first part of an
1350 // argument is in memory, the rest will also be in memory.
1351 int FI = MFI.CreateFixedObject(Size: ArgVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
1352 /*IsImmutable=*/false);
1353 PartAddr = DAG.getFrameIndex(FI, VT: PtrVT);
1354 return DAG.getLoad(
1355 VT: ValVT, dl, Chain, Ptr: PartAddr,
1356 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
1357 }
1358
1359 // This is not the first piece of an argument in memory. See if there is
1360 // already a fixed stack object including this offset. If so, assume it
1361 // was created by the PartOffset == 0 branch above and create a load from
1362 // the appropriate offset into it.
1363 int64_t PartBegin = VA.getLocMemOffset();
1364 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1365 int FI = MFI.getObjectIndexBegin();
1366 for (; MFI.isFixedObjectIndex(ObjectIdx: FI); ++FI) {
1367 int64_t ObjBegin = MFI.getObjectOffset(ObjectIdx: FI);
1368 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(ObjectIdx: FI);
1369 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1370 break;
1371 }
1372 if (MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1373 SDValue Addr =
1374 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: DAG.getFrameIndex(FI, VT: PtrVT),
1375 N2: DAG.getIntPtrConstant(Val: Ins[i].PartOffset, DL: dl));
1376 return DAG.getLoad(VT: ValVT, dl, Chain, Ptr: Addr,
1377 PtrInfo: MachinePointerInfo::getFixedStack(
1378 MF&: DAG.getMachineFunction(), FI, Offset: Ins[i].PartOffset));
1379 }
1380 }
1381
1382 int FI = MFI.CreateFixedObject(Size: ValVT.getSizeInBits() / 8,
1383 SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable);
1384
1385 // Set SExt or ZExt flag.
1386 if (VA.getLocInfo() == CCValAssign::ZExt) {
1387 MFI.setObjectZExt(ObjectIdx: FI, IsZExt: true);
1388 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1389 MFI.setObjectSExt(ObjectIdx: FI, IsSExt: true);
1390 }
1391
1392 MaybeAlign Alignment;
1393 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1394 ValVT != MVT::f80)
1395 Alignment = MaybeAlign(4);
1396 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
1397 SDValue Val = DAG.getLoad(
1398 VT: ValVT, dl, Chain, Ptr: FIN,
1399 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI),
1400 Alignment);
1401 return ExtendedInMem
1402 ? (VA.getValVT().isVector()
1403 ? DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VA.getValVT(), Operand: Val)
1404 : DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val))
1405 : Val;
1406}
1407
1408// FIXME: Get this from tablegen.
1409static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1410 const X86Subtarget &Subtarget) {
1411 assert(Subtarget.is64Bit());
1412
1413 if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1414 static const MCPhysReg GPR64ArgRegsWin64[] = {
1415 X86::RCX, X86::RDX, X86::R8, X86::R9
1416 };
1417 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1418 }
1419
1420 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1421 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1422 };
1423 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1424}
1425
1426// FIXME: Get this from tablegen.
1427static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1428 CallingConv::ID CallConv,
1429 const X86Subtarget &Subtarget) {
1430 assert(Subtarget.is64Bit());
1431 if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1432 // The XMM registers which might contain var arg parameters are shadowed
1433 // in their paired GPR. So we only need to save the GPR to their home
1434 // slots.
1435 // TODO: __vectorcall will change this.
1436 return std::nullopt;
1437 }
1438
1439 bool isSoftFloat = Subtarget.useSoftFloat();
1440 if (isSoftFloat || !Subtarget.hasSSE1())
1441 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1442 // registers.
1443 return std::nullopt;
1444
1445 static const MCPhysReg XMMArgRegs64Bit[] = {
1446 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1447 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1448 };
1449 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1450}
1451
1452#ifndef NDEBUG
1453static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1454 return llvm::is_sorted(
1455 Range&: ArgLocs, C: [](const CCValAssign &A, const CCValAssign &B) -> bool {
1456 return A.getValNo() < B.getValNo();
1457 });
1458}
1459#endif
1460
1461namespace {
1462/// This is a helper class for lowering variable arguments parameters.
1463class VarArgsLoweringHelper {
1464public:
1465 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1466 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1467 CallingConv::ID CallConv, CCState &CCInfo)
1468 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1469 TheMachineFunction(DAG.getMachineFunction()),
1470 TheFunction(TheMachineFunction.getFunction()),
1471 FrameInfo(TheMachineFunction.getFrameInfo()),
1472 FrameLowering(*Subtarget.getFrameLowering()),
1473 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1474 CCInfo(CCInfo) {}
1475
1476 // Lower variable arguments parameters.
1477 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1478
1479private:
1480 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1481
1482 void forwardMustTailParameters(SDValue &Chain);
1483
1484 bool is64Bit() const { return Subtarget.is64Bit(); }
1485 bool isWin64() const { return Subtarget.isCallingConvWin64(CC: CallConv); }
1486
1487 X86MachineFunctionInfo *FuncInfo;
1488 const SDLoc &DL;
1489 SelectionDAG &DAG;
1490 const X86Subtarget &Subtarget;
1491 MachineFunction &TheMachineFunction;
1492 const Function &TheFunction;
1493 MachineFrameInfo &FrameInfo;
1494 const TargetFrameLowering &FrameLowering;
1495 const TargetLowering &TargLowering;
1496 CallingConv::ID CallConv;
1497 CCState &CCInfo;
1498};
1499} // namespace
1500
1501void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1502 SDValue &Chain, unsigned StackSize) {
1503 // If the function takes variable number of arguments, make a frame index for
1504 // the start of the first vararg value... for expansion of llvm.va_start. We
1505 // can skip this if there are no va_start calls.
1506 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1507 CallConv != CallingConv::X86_ThisCall)) {
1508 FuncInfo->setVarArgsFrameIndex(
1509 FrameInfo.CreateFixedObject(Size: 1, SPOffset: StackSize, IsImmutable: true));
1510 }
1511
1512 // 64-bit calling conventions support varargs and register parameters, so we
1513 // have to do extra work to spill them in the prologue.
1514 if (is64Bit()) {
1515 // Find the first unallocated argument registers.
1516 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1517 ArrayRef<MCPhysReg> ArgXMMs =
1518 get64BitArgumentXMMs(MF&: TheMachineFunction, CallConv, Subtarget);
1519 unsigned NumIntRegs = CCInfo.getFirstUnallocated(Regs: ArgGPRs);
1520 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: ArgXMMs);
1521
1522 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1523 "SSE register cannot be used when SSE is disabled!");
1524
1525 if (isWin64()) {
1526 // Get to the caller-allocated home save location. Add 8 to account
1527 // for the return address.
1528 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1529 FuncInfo->setRegSaveFrameIndex(
1530 FrameInfo.CreateFixedObject(Size: 1, SPOffset: NumIntRegs * 8 + HomeOffset, IsImmutable: false));
1531 // Fixup to set vararg frame on shadow area (4 x i64).
1532 if (NumIntRegs < 4)
1533 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1534 } else {
1535 // For X86-64, if there are vararg parameters that are passed via
1536 // registers, then we must store them to their spots on the stack so
1537 // they may be loaded by dereferencing the result of va_next.
1538 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1539 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1540 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1541 Size: ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Alignment: Align(16), isSpillSlot: false));
1542 }
1543
1544 SmallVector<SDValue, 6>
1545 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1546 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1547 // keeping live input value
1548 SDValue ALVal; // if applicable keeps SDValue for %al register
1549
1550 // Gather all the live in physical registers.
1551 for (MCPhysReg Reg : ArgGPRs.slice(N: NumIntRegs)) {
1552 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1553 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1554 }
1555 const auto &AvailableXmms = ArgXMMs.slice(N: NumXMMRegs);
1556 if (!AvailableXmms.empty()) {
1557 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1558 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1559 for (MCPhysReg Reg : AvailableXmms) {
1560 // FastRegisterAllocator spills virtual registers at basic
1561 // block boundary. That leads to usages of xmm registers
1562 // outside of check for %al. Pass physical registers to
1563 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1564 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1565 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1566 }
1567 }
1568
1569 // Store the integer parameter registers.
1570 SmallVector<SDValue, 8> MemOps;
1571 SDValue RSFIN =
1572 DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(),
1573 VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()));
1574 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1575 for (SDValue Val : LiveGPRs) {
1576 SDValue FIN = DAG.getNode(Opcode: ISD::ADD, DL,
1577 VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()),
1578 N1: RSFIN, N2: DAG.getIntPtrConstant(Val: Offset, DL));
1579 SDValue Store =
1580 DAG.getStore(Chain: Val.getValue(R: 1), dl: DL, Val, Ptr: FIN,
1581 PtrInfo: MachinePointerInfo::getFixedStack(
1582 MF&: DAG.getMachineFunction(),
1583 FI: FuncInfo->getRegSaveFrameIndex(), Offset));
1584 MemOps.push_back(Elt: Store);
1585 Offset += 8;
1586 }
1587
1588 // Now store the XMM (fp + vector) parameter registers.
1589 if (!LiveXMMRegs.empty()) {
1590 SmallVector<SDValue, 12> SaveXMMOps;
1591 SaveXMMOps.push_back(Elt: Chain);
1592 SaveXMMOps.push_back(Elt: ALVal);
1593 SaveXMMOps.push_back(Elt: RSFIN);
1594 SaveXMMOps.push_back(
1595 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1596 llvm::append_range(C&: SaveXMMOps, R&: LiveXMMRegs);
1597 MachineMemOperand *StoreMMO =
1598 DAG.getMachineFunction().getMachineMemOperand(
1599 PtrInfo: MachinePointerInfo::getFixedStack(
1600 MF&: DAG.getMachineFunction(), FI: FuncInfo->getRegSaveFrameIndex(),
1601 Offset),
1602 F: MachineMemOperand::MOStore, Size: 128, BaseAlignment: Align(16));
1603 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1604 DL, DAG.getVTList(MVT::Other),
1605 SaveXMMOps, MVT::i8, StoreMMO));
1606 }
1607
1608 if (!MemOps.empty())
1609 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1610 }
1611}
1612
1613void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1614 // Find the largest legal vector type.
1615 MVT VecVT = MVT::Other;
1616 // FIXME: Only some x86_32 calling conventions support AVX512.
1617 if (Subtarget.useAVX512Regs() &&
1618 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1619 CallConv == CallingConv::Intel_OCL_BI)))
1620 VecVT = MVT::v16f32;
1621 else if (Subtarget.hasAVX())
1622 VecVT = MVT::v8f32;
1623 else if (Subtarget.hasSSE2())
1624 VecVT = MVT::v4f32;
1625
1626 // We forward some GPRs and some vector types.
1627 SmallVector<MVT, 2> RegParmTypes;
1628 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1629 RegParmTypes.push_back(Elt: IntVT);
1630 if (VecVT != MVT::Other)
1631 RegParmTypes.push_back(Elt: VecVT);
1632
1633 // Compute the set of forwarded registers. The rest are scratch.
1634 SmallVectorImpl<ForwardedRegister> &Forwards =
1635 FuncInfo->getForwardedMustTailRegParms();
1636 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: CC_X86);
1637
1638 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1639 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1640 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1641 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1642 }
1643
1644 // Copy all forwards from physical to virtual registers.
1645 for (ForwardedRegister &FR : Forwards) {
1646 // FIXME: Can we use a less constrained schedule?
1647 SDValue RegVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: FR.VReg, VT: FR.VT);
1648 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1649 RegClass: TargLowering.getRegClassFor(VT: FR.VT));
1650 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: FR.VReg, N: RegVal);
1651 }
1652}
1653
1654void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1655 unsigned StackSize) {
1656 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1657 // If necessary, it would be set into the correct value later.
1658 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1659 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1660
1661 if (FrameInfo.hasVAStart())
1662 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1663
1664 if (FrameInfo.hasMustTailInVarArgFunc())
1665 forwardMustTailParameters(Chain);
1666}
1667
1668SDValue X86TargetLowering::LowerFormalArguments(
1669 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1670 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1671 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1672 MachineFunction &MF = DAG.getMachineFunction();
1673 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1674
1675 const Function &F = MF.getFunction();
1676 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1677 F.getName() == "main")
1678 FuncInfo->setForceFramePointer(true);
1679
1680 MachineFrameInfo &MFI = MF.getFrameInfo();
1681 bool Is64Bit = Subtarget.is64Bit();
1682 bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
1683
1684 assert(
1685 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1686 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1687
1688 // Assign locations to all of the incoming arguments.
1689 SmallVector<CCValAssign, 16> ArgLocs;
1690 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1691
1692 // Allocate shadow area for Win64.
1693 if (IsWin64)
1694 CCInfo.AllocateStack(Size: 32, Alignment: Align(8));
1695
1696 CCInfo.AnalyzeArguments(Ins, Fn: CC_X86);
1697
1698 // In vectorcall calling convention a second pass is required for the HVA
1699 // types.
1700 if (CallingConv::X86_VectorCall == CallConv) {
1701 CCInfo.AnalyzeArgumentsSecondPass(Args: Ins, Fn: CC_X86);
1702 }
1703
1704 // The next loop assumes that the locations are in the same order of the
1705 // input arguments.
1706 assert(isSortedByValueNo(ArgLocs) &&
1707 "Argument Location list must be sorted before lowering");
1708
1709 SDValue ArgValue;
1710 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1711 ++I, ++InsIndex) {
1712 assert(InsIndex < Ins.size() && "Invalid Ins index");
1713 CCValAssign &VA = ArgLocs[I];
1714
1715 if (VA.isRegLoc()) {
1716 EVT RegVT = VA.getLocVT();
1717 if (VA.needsCustom()) {
1718 assert(
1719 VA.getValVT() == MVT::v64i1 &&
1720 "Currently the only custom case is when we split v64i1 to 2 regs");
1721
1722 // v64i1 values, in regcall calling convention, that are
1723 // compiled to 32 bit arch, are split up into two registers.
1724 ArgValue =
1725 getv64i1Argument(VA, NextVA&: ArgLocs[++I], Root&: Chain, DAG, DL: dl, Subtarget);
1726 } else {
1727 const TargetRegisterClass *RC;
1728 if (RegVT == MVT::i8)
1729 RC = &X86::GR8RegClass;
1730 else if (RegVT == MVT::i16)
1731 RC = &X86::GR16RegClass;
1732 else if (RegVT == MVT::i32)
1733 RC = &X86::GR32RegClass;
1734 else if (Is64Bit && RegVT == MVT::i64)
1735 RC = &X86::GR64RegClass;
1736 else if (RegVT == MVT::f16)
1737 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1738 else if (RegVT == MVT::f32)
1739 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1740 else if (RegVT == MVT::f64)
1741 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1742 else if (RegVT == MVT::f80)
1743 RC = &X86::RFP80RegClass;
1744 else if (RegVT == MVT::f128)
1745 RC = &X86::VR128RegClass;
1746 else if (RegVT.is512BitVector())
1747 RC = &X86::VR512RegClass;
1748 else if (RegVT.is256BitVector())
1749 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1750 else if (RegVT.is128BitVector())
1751 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1752 else if (RegVT == MVT::x86mmx)
1753 RC = &X86::VR64RegClass;
1754 else if (RegVT == MVT::v1i1)
1755 RC = &X86::VK1RegClass;
1756 else if (RegVT == MVT::v8i1)
1757 RC = &X86::VK8RegClass;
1758 else if (RegVT == MVT::v16i1)
1759 RC = &X86::VK16RegClass;
1760 else if (RegVT == MVT::v32i1)
1761 RC = &X86::VK32RegClass;
1762 else if (RegVT == MVT::v64i1)
1763 RC = &X86::VK64RegClass;
1764 else
1765 llvm_unreachable("Unknown argument type!");
1766
1767 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1768 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, VT: RegVT);
1769 }
1770
1771 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1772 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1773 // right size.
1774 if (VA.getLocInfo() == CCValAssign::SExt)
1775 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: RegVT, N1: ArgValue,
1776 N2: DAG.getValueType(VA.getValVT()));
1777 else if (VA.getLocInfo() == CCValAssign::ZExt)
1778 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RegVT, N1: ArgValue,
1779 N2: DAG.getValueType(VA.getValVT()));
1780 else if (VA.getLocInfo() == CCValAssign::BCvt)
1781 ArgValue = DAG.getBitcast(VT: VA.getValVT(), V: ArgValue);
1782
1783 if (VA.isExtInLoc()) {
1784 // Handle MMX values passed in XMM regs.
1785 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1786 ArgValue = DAG.getNode(Opcode: X86ISD::MOVDQ2Q, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1787 else if (VA.getValVT().isVector() &&
1788 VA.getValVT().getScalarType() == MVT::i1 &&
1789 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1790 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1791 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1792 ArgValue = lowerRegToMasks(ValArg: ArgValue, ValVT: VA.getValVT(), ValLoc: RegVT, DL: dl, DAG);
1793 } else
1794 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1795 }
1796 } else {
1797 assert(VA.isMemLoc());
1798 ArgValue =
1799 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i: InsIndex);
1800 }
1801
1802 // If value is passed via pointer - do a load.
1803 if (VA.getLocInfo() == CCValAssign::Indirect &&
1804 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1805 ArgValue =
1806 DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: ArgValue, PtrInfo: MachinePointerInfo());
1807 }
1808
1809 InVals.push_back(Elt: ArgValue);
1810 }
1811
1812 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1813 if (Ins[I].Flags.isSwiftAsync()) {
1814 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1815 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1816 X86FI->setHasSwiftAsyncContext(true);
1817 else {
1818 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1819 int FI =
1820 MF.getFrameInfo().CreateStackObject(Size: PtrSize, Alignment: Align(PtrSize), isSpillSlot: false);
1821 X86FI->setSwiftAsyncContextFrameIdx(FI);
1822 SDValue St = DAG.getStore(
1823 DAG.getEntryNode(), dl, InVals[I],
1824 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1825 MachinePointerInfo::getFixedStack(MF, FI));
1826 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1827 }
1828 }
1829
1830 // Swift calling convention does not require we copy the sret argument
1831 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1832 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1833 continue;
1834
1835 // All x86 ABIs require that for returning structs by value we copy the
1836 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1837 // the argument into a virtual register so that we can access it from the
1838 // return points.
1839 if (Ins[I].Flags.isSRet()) {
1840 assert(!FuncInfo->getSRetReturnReg() &&
1841 "SRet return has already been set");
1842 MVT PtrTy = getPointerTy(DL: DAG.getDataLayout());
1843 Register Reg =
1844 MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrTy));
1845 FuncInfo->setSRetReturnReg(Reg);
1846 SDValue Copy = DAG.getCopyToReg(Chain: DAG.getEntryNode(), dl, Reg, N: InVals[I]);
1847 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1848 break;
1849 }
1850 }
1851
1852 unsigned StackSize = CCInfo.getStackSize();
1853 // Align stack specially for tail calls.
1854 if (shouldGuaranteeTCO(CC: CallConv,
1855 GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt))
1856 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1857
1858 if (IsVarArg)
1859 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1860 .lowerVarArgsParameters(Chain, StackSize);
1861
1862 // Some CCs need callee pop.
1863 if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg,
1864 GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt)) {
1865 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1866 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1867 // X86 interrupts must pop the error code (and the alignment padding) if
1868 // present.
1869 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1870 } else {
1871 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1872 // If this is an sret function, the return should pop the hidden pointer.
1873 if (!canGuaranteeTCO(CC: CallConv) && hasCalleePopSRet(Args: Ins, Subtarget))
1874 FuncInfo->setBytesToPopOnReturn(4);
1875 }
1876
1877 if (!Is64Bit) {
1878 // RegSaveFrameIndex is X86-64 only.
1879 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1880 }
1881
1882 FuncInfo->setArgumentStackSize(StackSize);
1883
1884 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1885 EHPersonality Personality = classifyEHPersonality(Pers: F.getPersonalityFn());
1886 if (Personality == EHPersonality::CoreCLR) {
1887 assert(Is64Bit);
1888 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1889 // that we'd prefer this slot be allocated towards the bottom of the frame
1890 // (i.e. near the stack pointer after allocating the frame). Every
1891 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1892 // offset from the bottom of this and each funclet's frame must be the
1893 // same, so the size of funclets' (mostly empty) frames is dictated by
1894 // how far this slot is from the bottom (since they allocate just enough
1895 // space to accommodate holding this slot at the correct offset).
1896 int PSPSymFI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), /*isSpillSlot=*/false);
1897 EHInfo->PSPSymFrameIdx = PSPSymFI;
1898 }
1899 }
1900
1901 if (shouldDisableArgRegFromCSR(CC: CallConv) ||
1902 F.hasFnAttribute(Kind: "no_caller_saved_registers")) {
1903 MachineRegisterInfo &MRI = MF.getRegInfo();
1904 for (std::pair<Register, Register> Pair : MRI.liveins())
1905 MRI.disableCalleeSavedRegister(Reg: Pair.first);
1906 }
1907
1908 if (CallingConv::PreserveNone == CallConv)
1909 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1910 if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1911 Ins[I].Flags.isSwiftError()) {
1912 errorUnsupported(DAG, dl,
1913 Msg: "Swift attributes can't be used with preserve_none");
1914 break;
1915 }
1916 }
1917
1918 return Chain;
1919}
1920
1921SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1922 SDValue Arg, const SDLoc &dl,
1923 SelectionDAG &DAG,
1924 const CCValAssign &VA,
1925 ISD::ArgFlagsTy Flags,
1926 bool isByVal) const {
1927 unsigned LocMemOffset = VA.getLocMemOffset();
1928 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
1929 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
1930 N1: StackPtr, N2: PtrOff);
1931 if (isByVal)
1932 return CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff, Chain, Flags, DAG, dl);
1933
1934 MaybeAlign Alignment;
1935 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1936 Arg.getSimpleValueType() != MVT::f80)
1937 Alignment = MaybeAlign(4);
1938 return DAG.getStore(
1939 Chain, dl, Val: Arg, Ptr: PtrOff,
1940 PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: LocMemOffset),
1941 Alignment);
1942}
1943
1944/// Emit a load of return address if tail call
1945/// optimization is performed and it is required.
1946SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1947 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1948 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1949 // Adjust the Return address stack slot.
1950 EVT VT = getPointerTy(DL: DAG.getDataLayout());
1951 OutRetAddr = getReturnAddressFrameIndex(DAG);
1952
1953 // Load the "old" Return address.
1954 OutRetAddr = DAG.getLoad(VT, dl, Chain, Ptr: OutRetAddr, PtrInfo: MachinePointerInfo());
1955 return SDValue(OutRetAddr.getNode(), 1);
1956}
1957
1958/// Emit a store of the return address if tail call
1959/// optimization is performed and it is required (FPDiff!=0).
1960static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1961 SDValue Chain, SDValue RetAddrFrIdx,
1962 EVT PtrVT, unsigned SlotSize,
1963 int FPDiff, const SDLoc &dl) {
1964 // Store the return address to the appropriate stack slot.
1965 if (!FPDiff) return Chain;
1966 // Calculate the new stack slot for the return address.
1967 int NewReturnAddrFI =
1968 MF.getFrameInfo().CreateFixedObject(Size: SlotSize, SPOffset: (int64_t)FPDiff - SlotSize,
1969 IsImmutable: false);
1970 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(FI: NewReturnAddrFI, VT: PtrVT);
1971 Chain = DAG.getStore(Chain, dl, Val: RetAddrFrIdx, Ptr: NewRetAddrFrIdx,
1972 PtrInfo: MachinePointerInfo::getFixedStack(
1973 MF&: DAG.getMachineFunction(), FI: NewReturnAddrFI));
1974 return Chain;
1975}
1976
1977/// Returns a vector_shuffle mask for an movs{s|d}, movd
1978/// operation of specified width.
1979SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1980 SDValue V1, SDValue V2) const {
1981 unsigned NumElems = VT.getVectorNumElements();
1982 SmallVector<int, 8> Mask;
1983 Mask.push_back(Elt: NumElems);
1984 for (unsigned i = 1; i != NumElems; ++i)
1985 Mask.push_back(Elt: i);
1986 return DAG.getVectorShuffle(VT, dl, N1: V1, N2: V2, Mask);
1987}
1988
1989SDValue
1990X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1991 SmallVectorImpl<SDValue> &InVals) const {
1992 SelectionDAG &DAG = CLI.DAG;
1993 SDLoc &dl = CLI.DL;
1994 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1995 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1996 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1997 SDValue Chain = CLI.Chain;
1998 SDValue Callee = CLI.Callee;
1999 CallingConv::ID CallConv = CLI.CallConv;
2000 bool &isTailCall = CLI.IsTailCall;
2001 bool isVarArg = CLI.IsVarArg;
2002 const auto *CB = CLI.CB;
2003
2004 MachineFunction &MF = DAG.getMachineFunction();
2005 bool Is64Bit = Subtarget.is64Bit();
2006 bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
2007 bool IsSibcall = false;
2008 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2009 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2010 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Args: Outs, Subtarget);
2011 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2012 bool HasNCSR = (CB && isa<CallInst>(Val: CB) &&
2013 CB->hasFnAttr(Kind: "no_caller_saved_registers"));
2014 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2015 bool IsIndirectCall = (CB && isa<CallInst>(Val: CB) && CB->isIndirectCall());
2016 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2017 const Module *M = MF.getMMI().getModule();
2018 Metadata *IsCFProtectionSupported = M->getModuleFlag(Key: "cf-protection-branch");
2019
2020 MachineFunction::CallSiteInfo CSInfo;
2021 if (CallConv == CallingConv::X86_INTR)
2022 report_fatal_error(reason: "X86 interrupts may not be called directly");
2023
2024 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2025 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2026 // If we are using a GOT, disable tail calls to external symbols with
2027 // default visibility. Tail calling such a symbol requires using a GOT
2028 // relocation, which forces early binding of the symbol. This breaks code
2029 // that require lazy function symbol resolution. Using musttail or
2030 // GuaranteedTailCallOpt will override this.
2031 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2032 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2033 G->getGlobal()->hasDefaultVisibility()))
2034 isTailCall = false;
2035 }
2036
2037 if (isTailCall && !IsMustTail) {
2038 // Check if it's really possible to do a tail call.
2039 isTailCall = IsEligibleForTailCallOptimization(
2040 Callee, CalleeCC: CallConv, IsCalleeStackStructRet: IsCalleePopSRet, isVarArg, RetTy: CLI.RetTy, Outs, OutVals,
2041 Ins, DAG);
2042
2043 // Sibcalls are automatically detected tailcalls which do not require
2044 // ABI changes.
2045 if (!IsGuaranteeTCO && isTailCall)
2046 IsSibcall = true;
2047
2048 if (isTailCall)
2049 ++NumTailCalls;
2050 }
2051
2052 if (IsMustTail && !isTailCall)
2053 report_fatal_error(reason: "failed to perform tail call elimination on a call "
2054 "site marked musttail");
2055
2056 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2057 "Var args not supported with calling convention fastcc, ghc or hipe");
2058
2059 // Analyze operands of the call, assigning locations to each operand.
2060 SmallVector<CCValAssign, 16> ArgLocs;
2061 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2062
2063 // Allocate shadow area for Win64.
2064 if (IsWin64)
2065 CCInfo.AllocateStack(Size: 32, Alignment: Align(8));
2066
2067 CCInfo.AnalyzeArguments(Outs, Fn: CC_X86);
2068
2069 // In vectorcall calling convention a second pass is required for the HVA
2070 // types.
2071 if (CallingConv::X86_VectorCall == CallConv) {
2072 CCInfo.AnalyzeArgumentsSecondPass(Args: Outs, Fn: CC_X86);
2073 }
2074
2075 // Get a count of how many bytes are to be pushed on the stack.
2076 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2077 if (IsSibcall)
2078 // This is a sibcall. The memory operands are available in caller's
2079 // own caller's stack.
2080 NumBytes = 0;
2081 else if (IsGuaranteeTCO && canGuaranteeTCO(CC: CallConv))
2082 NumBytes = GetAlignedArgumentStackSize(StackSize: NumBytes, DAG);
2083
2084 int FPDiff = 0;
2085 if (isTailCall &&
2086 shouldGuaranteeTCO(CC: CallConv,
2087 GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
2088 // Lower arguments at fp - stackoffset + fpdiff.
2089 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2090
2091 FPDiff = NumBytesCallerPushed - NumBytes;
2092
2093 // Set the delta of movement of the returnaddr stackslot.
2094 // But only set if delta is greater than previous delta.
2095 if (FPDiff < X86Info->getTCReturnAddrDelta())
2096 X86Info->setTCReturnAddrDelta(FPDiff);
2097 }
2098
2099 unsigned NumBytesToPush = NumBytes;
2100 unsigned NumBytesToPop = NumBytes;
2101
2102 // If we have an inalloca argument, all stack space has already been allocated
2103 // for us and be right at the top of the stack. We don't support multiple
2104 // arguments passed in memory when using inalloca.
2105 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2106 NumBytesToPush = 0;
2107 if (!ArgLocs.back().isMemLoc())
2108 report_fatal_error(reason: "cannot use inalloca attribute on a register "
2109 "parameter");
2110 if (ArgLocs.back().getLocMemOffset() != 0)
2111 report_fatal_error(reason: "any parameter with the inalloca attribute must be "
2112 "the only memory argument");
2113 } else if (CLI.IsPreallocated) {
2114 assert(ArgLocs.back().isMemLoc() &&
2115 "cannot use preallocated attribute on a register "
2116 "parameter");
2117 SmallVector<size_t, 4> PreallocatedOffsets;
2118 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2119 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2120 PreallocatedOffsets.push_back(Elt: ArgLocs[i].getLocMemOffset());
2121 }
2122 }
2123 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2124 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CS: CLI.CB);
2125 MFI->setPreallocatedStackSize(Id: PreallocatedId, StackSize: NumBytes);
2126 MFI->setPreallocatedArgOffsets(Id: PreallocatedId, AO: PreallocatedOffsets);
2127 NumBytesToPush = 0;
2128 }
2129
2130 if (!IsSibcall && !IsMustTail)
2131 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytesToPush,
2132 OutSize: NumBytes - NumBytesToPush, DL: dl);
2133
2134 SDValue RetAddrFrIdx;
2135 // Load return address for tail calls.
2136 if (isTailCall && FPDiff)
2137 Chain = EmitTailCallLoadRetAddr(DAG, OutRetAddr&: RetAddrFrIdx, Chain, IsTailCall: isTailCall,
2138 Is64Bit, FPDiff, dl);
2139
2140 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2141 SmallVector<SDValue, 8> MemOpChains;
2142 SDValue StackPtr;
2143
2144 // The next loop assumes that the locations are in the same order of the
2145 // input arguments.
2146 assert(isSortedByValueNo(ArgLocs) &&
2147 "Argument Location list must be sorted before lowering");
2148
2149 // Walk the register/memloc assignments, inserting copies/loads. In the case
2150 // of tail call optimization arguments are handle later.
2151 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2152 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2153 ++I, ++OutIndex) {
2154 assert(OutIndex < Outs.size() && "Invalid Out index");
2155 // Skip inalloca/preallocated arguments, they have already been written.
2156 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2157 if (Flags.isInAlloca() || Flags.isPreallocated())
2158 continue;
2159
2160 CCValAssign &VA = ArgLocs[I];
2161 EVT RegVT = VA.getLocVT();
2162 SDValue Arg = OutVals[OutIndex];
2163 bool isByVal = Flags.isByVal();
2164
2165 // Promote the value if needed.
2166 switch (VA.getLocInfo()) {
2167 default: llvm_unreachable("Unknown loc info!");
2168 case CCValAssign::Full: break;
2169 case CCValAssign::SExt:
2170 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2171 break;
2172 case CCValAssign::ZExt:
2173 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2174 break;
2175 case CCValAssign::AExt:
2176 if (Arg.getValueType().isVector() &&
2177 Arg.getValueType().getVectorElementType() == MVT::i1)
2178 Arg = lowerMasksToReg(ValArg: Arg, ValLoc: RegVT, DL: dl, DAG);
2179 else if (RegVT.is128BitVector()) {
2180 // Special case: passing MMX values in XMM registers.
2181 Arg = DAG.getBitcast(MVT::i64, Arg);
2182 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2183 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2184 } else
2185 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2186 break;
2187 case CCValAssign::BCvt:
2188 Arg = DAG.getBitcast(VT: RegVT, V: Arg);
2189 break;
2190 case CCValAssign::Indirect: {
2191 if (isByVal) {
2192 // Memcpy the argument to a temporary stack slot to prevent
2193 // the caller from seeing any modifications the callee may make
2194 // as guaranteed by the `byval` attribute.
2195 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2196 Size: Flags.getByValSize(),
2197 Alignment: std::max(a: Align(16), b: Flags.getNonZeroByValAlign()), isSpillSlot: false);
2198 SDValue StackSlot =
2199 DAG.getFrameIndex(FI: FrameIdx, VT: getPointerTy(DL: DAG.getDataLayout()));
2200 Chain =
2201 CreateCopyOfByValArgument(Src: Arg, Dst: StackSlot, Chain, Flags, DAG, dl);
2202 // From now on treat this as a regular pointer
2203 Arg = StackSlot;
2204 isByVal = false;
2205 } else {
2206 // Store the argument.
2207 SDValue SpillSlot = DAG.CreateStackTemporary(VT: VA.getValVT());
2208 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
2209 Chain = DAG.getStore(
2210 Chain, dl, Val: Arg, Ptr: SpillSlot,
2211 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
2212 Arg = SpillSlot;
2213 }
2214 break;
2215 }
2216 }
2217
2218 if (VA.needsCustom()) {
2219 assert(VA.getValVT() == MVT::v64i1 &&
2220 "Currently the only custom case is when we split v64i1 to 2 regs");
2221 // Split v64i1 value into two registers
2222 Passv64i1ArgInRegs(DL: dl, DAG, Arg, RegsToPass, VA, NextVA&: ArgLocs[++I], Subtarget);
2223 } else if (VA.isRegLoc()) {
2224 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
2225 const TargetOptions &Options = DAG.getTarget().Options;
2226 if (Options.EmitCallSiteInfo)
2227 CSInfo.ArgRegPairs.emplace_back(Args: VA.getLocReg(), Args&: I);
2228 if (isVarArg && IsWin64) {
2229 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2230 // shadow reg if callee is a varargs function.
2231 Register ShadowReg;
2232 switch (VA.getLocReg()) {
2233 case X86::XMM0: ShadowReg = X86::RCX; break;
2234 case X86::XMM1: ShadowReg = X86::RDX; break;
2235 case X86::XMM2: ShadowReg = X86::R8; break;
2236 case X86::XMM3: ShadowReg = X86::R9; break;
2237 }
2238 if (ShadowReg)
2239 RegsToPass.push_back(Elt: std::make_pair(x&: ShadowReg, y&: Arg));
2240 }
2241 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2242 assert(VA.isMemLoc());
2243 if (!StackPtr.getNode())
2244 StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2245 VT: getPointerTy(DL: DAG.getDataLayout()));
2246 MemOpChains.push_back(Elt: LowerMemOpCallTo(Chain, StackPtr, Arg,
2247 dl, DAG, VA, Flags, isByVal));
2248 }
2249 }
2250
2251 if (!MemOpChains.empty())
2252 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2253
2254 if (Subtarget.isPICStyleGOT()) {
2255 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2256 // GOT pointer (except regcall).
2257 if (!isTailCall) {
2258 // Indirect call with RegCall calling convertion may use up all the
2259 // general registers, so it is not suitable to bind EBX reister for
2260 // GOT address, just let register allocator handle it.
2261 if (CallConv != CallingConv::X86_RegCall)
2262 RegsToPass.push_back(std::make_pair(
2263 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2264 getPointerTy(DAG.getDataLayout()))));
2265 } else {
2266 // If we are tail calling and generating PIC/GOT style code load the
2267 // address of the callee into ECX. The value in ecx is used as target of
2268 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2269 // for tail calls on PIC/GOT architectures. Normally we would just put the
2270 // address of GOT into ebx and then call target@PLT. But for tail calls
2271 // ebx would be restored (since ebx is callee saved) before jumping to the
2272 // target@PLT.
2273
2274 // Note: The actual moving to ECX is done further down.
2275 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2276 if (G && !G->getGlobal()->hasLocalLinkage() &&
2277 G->getGlobal()->hasDefaultVisibility())
2278 Callee = LowerGlobalAddress(Op: Callee, DAG);
2279 else if (isa<ExternalSymbolSDNode>(Val: Callee))
2280 Callee = LowerExternalSymbol(Op: Callee, DAG);
2281 }
2282 }
2283
2284 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2285 (Subtarget.hasSSE1() || !M->getModuleFlag(Key: "SkipRaxSetup"))) {
2286 // From AMD64 ABI document:
2287 // For calls that may call functions that use varargs or stdargs
2288 // (prototype-less calls or calls to functions containing ellipsis (...) in
2289 // the declaration) %al is used as hidden argument to specify the number
2290 // of SSE registers used. The contents of %al do not need to match exactly
2291 // the number of registers, but must be an ubound on the number of SSE
2292 // registers used and is in the range 0 - 8 inclusive.
2293
2294 // Count the number of XMM registers allocated.
2295 static const MCPhysReg XMMArgRegs[] = {
2296 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2297 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2298 };
2299 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2300 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2301 && "SSE registers cannot be used when SSE is disabled");
2302 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2303 DAG.getConstant(NumXMMRegs, dl,
2304 MVT::i8)));
2305 }
2306
2307 if (isVarArg && IsMustTail) {
2308 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2309 for (const auto &F : Forwards) {
2310 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: F.VReg, VT: F.VT);
2311 RegsToPass.push_back(Elt: std::make_pair(x: F.PReg, y&: Val));
2312 }
2313 }
2314
2315 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2316 // don't need this because the eligibility check rejects calls that require
2317 // shuffling arguments passed in memory.
2318 if (!IsSibcall && isTailCall) {
2319 // Force all the incoming stack arguments to be loaded from the stack
2320 // before any new outgoing arguments are stored to the stack, because the
2321 // outgoing stack slots may alias the incoming argument stack slots, and
2322 // the alias isn't otherwise explicit. This is slightly more conservative
2323 // than necessary, because it means that each store effectively depends
2324 // on every argument instead of just those arguments it would clobber.
2325 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2326
2327 SmallVector<SDValue, 8> MemOpChains2;
2328 SDValue FIN;
2329 int FI = 0;
2330 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2331 ++I, ++OutsIndex) {
2332 CCValAssign &VA = ArgLocs[I];
2333
2334 if (VA.isRegLoc()) {
2335 if (VA.needsCustom()) {
2336 assert((CallConv == CallingConv::X86_RegCall) &&
2337 "Expecting custom case only in regcall calling convention");
2338 // This means that we are in special case where one argument was
2339 // passed through two register locations - Skip the next location
2340 ++I;
2341 }
2342
2343 continue;
2344 }
2345
2346 assert(VA.isMemLoc());
2347 SDValue Arg = OutVals[OutsIndex];
2348 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2349 // Skip inalloca/preallocated arguments. They don't require any work.
2350 if (Flags.isInAlloca() || Flags.isPreallocated())
2351 continue;
2352 // Create frame index.
2353 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2354 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2355 FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
2356 FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2357
2358 if (Flags.isByVal()) {
2359 // Copy relative to framepointer.
2360 SDValue Source = DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL: dl);
2361 if (!StackPtr.getNode())
2362 StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2363 VT: getPointerTy(DL: DAG.getDataLayout()));
2364 Source = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
2365 N1: StackPtr, N2: Source);
2366
2367 MemOpChains2.push_back(Elt: CreateCopyOfByValArgument(Src: Source, Dst: FIN,
2368 Chain: ArgChain,
2369 Flags, DAG, dl));
2370 } else {
2371 // Store relative to framepointer.
2372 MemOpChains2.push_back(Elt: DAG.getStore(
2373 Chain: ArgChain, dl, Val: Arg, Ptr: FIN,
2374 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
2375 }
2376 }
2377
2378 if (!MemOpChains2.empty())
2379 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2380
2381 // Store the return address to the appropriate stack slot.
2382 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2383 PtrVT: getPointerTy(DL: DAG.getDataLayout()),
2384 SlotSize: RegInfo->getSlotSize(), FPDiff, dl);
2385 }
2386
2387 // Build a sequence of copy-to-reg nodes chained together with token chain
2388 // and glue operands which copy the outgoing args into registers.
2389 SDValue InGlue;
2390 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2391 Chain = DAG.getCopyToReg(Chain, dl, Reg: RegsToPass[i].first,
2392 N: RegsToPass[i].second, Glue: InGlue);
2393 InGlue = Chain.getValue(R: 1);
2394 }
2395
2396 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2397 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2398 // In the 64-bit large code model, we have to make all calls
2399 // through a register, since the call instruction's 32-bit
2400 // pc-relative offset may not be large enough to hold the whole
2401 // address.
2402 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2403 Callee->getOpcode() == ISD::ExternalSymbol) {
2404 // Lower direct calls to global addresses and external symbols. Setting
2405 // ForCall to true here has the effect of removing WrapperRIP when possible
2406 // to allow direct calls to be selected without first materializing the
2407 // address into a register.
2408 Callee = LowerGlobalOrExternal(Op: Callee, DAG, /*ForCall=*/true);
2409 } else if (Subtarget.isTarget64BitILP32() &&
2410 Callee.getValueType() == MVT::i32) {
2411 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2412 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2413 }
2414
2415 // Returns a chain & a glue for retval copy to use.
2416 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2417 SmallVector<SDValue, 8> Ops;
2418
2419 if (!IsSibcall && isTailCall && !IsMustTail) {
2420 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: 0, Glue: InGlue, DL: dl);
2421 InGlue = Chain.getValue(R: 1);
2422 }
2423
2424 Ops.push_back(Elt: Chain);
2425 Ops.push_back(Elt: Callee);
2426
2427 if (isTailCall)
2428 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2429
2430 // Add argument registers to the end of the list so that they are known live
2431 // into the call.
2432 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2433 Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass[i].first,
2434 VT: RegsToPass[i].second.getValueType()));
2435
2436 // Add a register mask operand representing the call-preserved registers.
2437 const uint32_t *Mask = [&]() {
2438 auto AdaptedCC = CallConv;
2439 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2440 // use X86_INTR calling convention because it has the same CSR mask
2441 // (same preserved registers).
2442 if (HasNCSR)
2443 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2444 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2445 // to use the CSR_NoRegs_RegMask.
2446 if (CB && CB->hasFnAttr(Kind: "no_callee_saved_registers"))
2447 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2448 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2449 }();
2450 assert(Mask && "Missing call preserved mask for calling convention");
2451
2452 // If this is an invoke in a 32-bit function using a funclet-based
2453 // personality, assume the function clobbers all registers. If an exception
2454 // is thrown, the runtime will not restore CSRs.
2455 // FIXME: Model this more precisely so that we can register allocate across
2456 // the normal edge and spill and fill across the exceptional edge.
2457 if (!Is64Bit && CLI.CB && isa<InvokeInst>(Val: CLI.CB)) {
2458 const Function &CallerFn = MF.getFunction();
2459 EHPersonality Pers =
2460 CallerFn.hasPersonalityFn()
2461 ? classifyEHPersonality(Pers: CallerFn.getPersonalityFn())
2462 : EHPersonality::Unknown;
2463 if (isFuncletEHPersonality(Pers))
2464 Mask = RegInfo->getNoPreservedMask();
2465 }
2466
2467 // Define a new register mask from the existing mask.
2468 uint32_t *RegMask = nullptr;
2469
2470 // In some calling conventions we need to remove the used physical registers
2471 // from the reg mask. Create a new RegMask for such calling conventions.
2472 // RegMask for calling conventions that disable only return registers (e.g.
2473 // preserve_most) will be modified later in LowerCallResult.
2474 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CC: CallConv) || HasNCSR;
2475 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CC: CallConv)) {
2476 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2477
2478 // Allocate a new Reg Mask and copy Mask.
2479 RegMask = MF.allocateRegMask();
2480 unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs());
2481 memcpy(dest: RegMask, src: Mask, n: sizeof(RegMask[0]) * RegMaskSize);
2482
2483 // Make sure all sub registers of the argument registers are reset
2484 // in the RegMask.
2485 if (ShouldDisableArgRegs) {
2486 for (auto const &RegPair : RegsToPass)
2487 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2488 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2489 }
2490
2491 // Create the RegMask Operand according to our updated mask.
2492 Ops.push_back(Elt: DAG.getRegisterMask(RegMask));
2493 } else {
2494 // Create the RegMask Operand according to the static mask.
2495 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
2496 }
2497
2498 if (InGlue.getNode())
2499 Ops.push_back(Elt: InGlue);
2500
2501 if (isTailCall) {
2502 // We used to do:
2503 //// If this is the first return lowered for this function, add the regs
2504 //// to the liveout set for the function.
2505 // This isn't right, although it's probably harmless on x86; liveouts
2506 // should be computed from returns not tail calls. Consider a void
2507 // function making a tail call to a function returning int.
2508 MF.getFrameInfo().setHasTailCall();
2509 SDValue Ret = DAG.getNode(Opcode: X86ISD::TC_RETURN, DL: dl, VTList: NodeTys, Ops);
2510
2511 if (IsCFICall)
2512 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2513
2514 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
2515 DAG.addCallSiteInfo(Node: Ret.getNode(), CallInfo: std::move(CSInfo));
2516 return Ret;
2517 }
2518
2519 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2520 Chain = DAG.getNode(Opcode: X86ISD::NT_CALL, DL: dl, VTList: NodeTys, Ops);
2521 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CB: CLI.CB)) {
2522 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2523 // expanded to the call, directly followed by a special marker sequence and
2524 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2525 assert(!isTailCall &&
2526 "tail calls cannot be marked with clang.arc.attachedcall");
2527 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2528
2529 // Add a target global address for the retainRV/claimRV runtime function
2530 // just before the call target.
2531 Function *ARCFn = *objcarc::getAttachedARCFunction(CB: CLI.CB);
2532 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
2533 auto GA = DAG.getTargetGlobalAddress(GV: ARCFn, DL: dl, VT: PtrVT);
2534 Ops.insert(I: Ops.begin() + 1, Elt: GA);
2535 Chain = DAG.getNode(Opcode: X86ISD::CALL_RVMARKER, DL: dl, VTList: NodeTys, Ops);
2536 } else {
2537 Chain = DAG.getNode(Opcode: X86ISD::CALL, DL: dl, VTList: NodeTys, Ops);
2538 }
2539
2540 if (IsCFICall)
2541 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2542
2543 InGlue = Chain.getValue(R: 1);
2544 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
2545 DAG.addCallSiteInfo(Node: Chain.getNode(), CallInfo: std::move(CSInfo));
2546
2547 // Save heapallocsite metadata.
2548 if (CLI.CB)
2549 if (MDNode *HeapAlloc = CLI.CB->getMetadata(Kind: "heapallocsite"))
2550 DAG.addHeapAllocSite(Node: Chain.getNode(), MD: HeapAlloc);
2551
2552 // Create the CALLSEQ_END node.
2553 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2554 if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg: isVarArg,
2555 GuaranteeTCO: DAG.getTarget().Options.GuaranteedTailCallOpt))
2556 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2557 else if (!canGuaranteeTCO(CC: CallConv) && IsCalleePopSRet)
2558 // If this call passes a struct-return pointer, the callee
2559 // pops that struct pointer.
2560 NumBytesForCalleeToPop = 4;
2561
2562 // Returns a glue for retval copy to use.
2563 if (!IsSibcall) {
2564 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: NumBytesForCalleeToPop,
2565 Glue: InGlue, DL: dl);
2566 InGlue = Chain.getValue(R: 1);
2567 }
2568
2569 if (CallingConv::PreserveNone == CallConv)
2570 for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2571 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2572 Outs[I].Flags.isSwiftError()) {
2573 errorUnsupported(DAG, dl,
2574 Msg: "Swift attributes can't be used with preserve_none");
2575 break;
2576 }
2577 }
2578
2579 // Handle result values, copying them out of physregs into vregs that we
2580 // return.
2581 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2582 InVals, RegMask);
2583}
2584
2585//===----------------------------------------------------------------------===//
2586// Fast Calling Convention (tail call) implementation
2587//===----------------------------------------------------------------------===//
2588
2589// Like std call, callee cleans arguments, convention except that ECX is
2590// reserved for storing the tail called function address. Only 2 registers are
2591// free for argument passing (inreg). Tail call optimization is performed
2592// provided:
2593// * tailcallopt is enabled
2594// * caller/callee are fastcc
2595// On X86_64 architecture with GOT-style position independent code only local
2596// (within module) calls are supported at the moment.
2597// To keep the stack aligned according to platform abi the function
2598// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2599// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2600// If a tail called function callee has more arguments than the caller the
2601// caller needs to make sure that there is room to move the RETADDR to. This is
2602// achieved by reserving an area the size of the argument delta right after the
2603// original RETADDR, but before the saved framepointer or the spilled registers
2604// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2605// stack layout:
2606// arg1
2607// arg2
2608// RETADDR
2609// [ new RETADDR
2610// move area ]
2611// (possible EBP)
2612// ESI
2613// EDI
2614// local1 ..
2615
2616/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2617/// requirement.
2618unsigned
2619X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2620 SelectionDAG &DAG) const {
2621 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2622 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2623 assert(StackSize % SlotSize == 0 &&
2624 "StackSize must be a multiple of SlotSize");
2625 return alignTo(Size: StackSize + SlotSize, A: StackAlignment) - SlotSize;
2626}
2627
2628/// Return true if the given stack call argument is already available in the
2629/// same position (relatively) of the caller's incoming argument stack.
2630static
2631bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2632 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2633 const X86InstrInfo *TII, const CCValAssign &VA) {
2634 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2635
2636 for (;;) {
2637 // Look through nodes that don't alter the bits of the incoming value.
2638 unsigned Op = Arg.getOpcode();
2639 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2640 Op == ISD::AssertZext) {
2641 Arg = Arg.getOperand(i: 0);
2642 continue;
2643 }
2644 if (Op == ISD::TRUNCATE) {
2645 const SDValue &TruncInput = Arg.getOperand(i: 0);
2646 if (TruncInput.getOpcode() == ISD::AssertZext &&
2647 cast<VTSDNode>(Val: TruncInput.getOperand(i: 1))->getVT() ==
2648 Arg.getValueType()) {
2649 Arg = TruncInput.getOperand(i: 0);
2650 continue;
2651 }
2652 }
2653 break;
2654 }
2655
2656 int FI = INT_MAX;
2657 if (Arg.getOpcode() == ISD::CopyFromReg) {
2658 Register VR = cast<RegisterSDNode>(Val: Arg.getOperand(i: 1))->getReg();
2659 if (!VR.isVirtual())
2660 return false;
2661 MachineInstr *Def = MRI->getVRegDef(Reg: VR);
2662 if (!Def)
2663 return false;
2664 if (!Flags.isByVal()) {
2665 if (!TII->isLoadFromStackSlot(MI: *Def, FrameIndex&: FI))
2666 return false;
2667 } else {
2668 unsigned Opcode = Def->getOpcode();
2669 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2670 Opcode == X86::LEA64_32r) &&
2671 Def->getOperand(1).isFI()) {
2672 FI = Def->getOperand(i: 1).getIndex();
2673 Bytes = Flags.getByValSize();
2674 } else
2675 return false;
2676 }
2677 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val&: Arg)) {
2678 if (Flags.isByVal())
2679 // ByVal argument is passed in as a pointer but it's now being
2680 // dereferenced. e.g.
2681 // define @foo(%struct.X* %A) {
2682 // tail call @bar(%struct.X* byval %A)
2683 // }
2684 return false;
2685 SDValue Ptr = Ld->getBasePtr();
2686 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Val&: Ptr);
2687 if (!FINode)
2688 return false;
2689 FI = FINode->getIndex();
2690 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2691 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Val&: Arg);
2692 FI = FINode->getIndex();
2693 Bytes = Flags.getByValSize();
2694 } else
2695 return false;
2696
2697 assert(FI != INT_MAX);
2698 if (!MFI.isFixedObjectIndex(ObjectIdx: FI))
2699 return false;
2700
2701 if (Offset != MFI.getObjectOffset(ObjectIdx: FI))
2702 return false;
2703
2704 // If this is not byval, check that the argument stack object is immutable.
2705 // inalloca and argument copy elision can create mutable argument stack
2706 // objects. Byval objects can be mutated, but a byval call intends to pass the
2707 // mutated memory.
2708 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(ObjectIdx: FI))
2709 return false;
2710
2711 if (VA.getLocVT().getFixedSizeInBits() >
2712 Arg.getValueSizeInBits().getFixedValue()) {
2713 // If the argument location is wider than the argument type, check that any
2714 // extension flags match.
2715 if (Flags.isZExt() != MFI.isObjectZExt(ObjectIdx: FI) ||
2716 Flags.isSExt() != MFI.isObjectSExt(ObjectIdx: FI)) {
2717 return false;
2718 }
2719 }
2720
2721 return Bytes == MFI.getObjectSize(ObjectIdx: FI);
2722}
2723
2724/// Check whether the call is eligible for tail call optimization. Targets
2725/// that want to do tail call optimization should implement this function.
2726bool X86TargetLowering::IsEligibleForTailCallOptimization(
2727 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
2728 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
2729 const SmallVectorImpl<SDValue> &OutVals,
2730 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2731 if (!mayTailCallThisCC(CC: CalleeCC))
2732 return false;
2733
2734 // If -tailcallopt is specified, make fastcc functions tail-callable.
2735 MachineFunction &MF = DAG.getMachineFunction();
2736 const Function &CallerF = MF.getFunction();
2737
2738 // If the function return type is x86_fp80 and the callee return type is not,
2739 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2740 // perform a tailcall optimization here.
2741 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
2742 return false;
2743
2744 CallingConv::ID CallerCC = CallerF.getCallingConv();
2745 bool CCMatch = CallerCC == CalleeCC;
2746 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CC: CalleeCC);
2747 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CC: CallerCC);
2748 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2749 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2750
2751 // Win64 functions have extra shadow space for argument homing. Don't do the
2752 // sibcall if the caller and callee have mismatched expectations for this
2753 // space.
2754 if (IsCalleeWin64 != IsCallerWin64)
2755 return false;
2756
2757 if (IsGuaranteeTCO) {
2758 if (canGuaranteeTCO(CC: CalleeCC) && CCMatch)
2759 return true;
2760 return false;
2761 }
2762
2763 // Look for obvious safe cases to perform tail call optimization that do not
2764 // require ABI changes. This is what gcc calls sibcall.
2765
2766 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2767 // emit a special epilogue.
2768 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2769 if (RegInfo->hasStackRealignment(MF))
2770 return false;
2771
2772 // Also avoid sibcall optimization if we're an sret return fn and the callee
2773 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2774 // insufficient.
2775 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2776 // For a compatible tail call the callee must return our sret pointer. So it
2777 // needs to be (a) an sret function itself and (b) we pass our sret as its
2778 // sret. Condition #b is harder to determine.
2779 return false;
2780 } else if (IsCalleePopSRet)
2781 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2782 // expect that.
2783 return false;
2784
2785 // Do not sibcall optimize vararg calls unless all arguments are passed via
2786 // registers.
2787 LLVMContext &C = *DAG.getContext();
2788 if (isVarArg && !Outs.empty()) {
2789 // Optimizing for varargs on Win64 is unlikely to be safe without
2790 // additional testing.
2791 if (IsCalleeWin64 || IsCallerWin64)
2792 return false;
2793
2794 SmallVector<CCValAssign, 16> ArgLocs;
2795 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2796 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_X86);
2797 for (const auto &VA : ArgLocs)
2798 if (!VA.isRegLoc())
2799 return false;
2800 }
2801
2802 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2803 // stack. Therefore, if it's not used by the call it is not safe to optimize
2804 // this into a sibcall.
2805 bool Unused = false;
2806 for (const auto &In : Ins) {
2807 if (!In.Used) {
2808 Unused = true;
2809 break;
2810 }
2811 }
2812 if (Unused) {
2813 SmallVector<CCValAssign, 16> RVLocs;
2814 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
2815 CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
2816 for (const auto &VA : RVLocs) {
2817 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2818 return false;
2819 }
2820 }
2821
2822 // Check that the call results are passed in the same way.
2823 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2824 CalleeFn: RetCC_X86, CallerFn: RetCC_X86))
2825 return false;
2826 // The callee has to preserve all registers the caller needs to preserve.
2827 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2828 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2829 if (!CCMatch) {
2830 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2831 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2832 return false;
2833 }
2834
2835 unsigned StackArgsSize = 0;
2836
2837 // If the callee takes no arguments then go on to check the results of the
2838 // call.
2839 if (!Outs.empty()) {
2840 // Check if stack adjustment is needed. For now, do not do this if any
2841 // argument is passed on the stack.
2842 SmallVector<CCValAssign, 16> ArgLocs;
2843 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2844
2845 // Allocate shadow area for Win64
2846 if (IsCalleeWin64)
2847 CCInfo.AllocateStack(Size: 32, Alignment: Align(8));
2848
2849 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_X86);
2850 StackArgsSize = CCInfo.getStackSize();
2851
2852 if (CCInfo.getStackSize()) {
2853 // Check if the arguments are already laid out in the right way as
2854 // the caller's fixed stack objects.
2855 MachineFrameInfo &MFI = MF.getFrameInfo();
2856 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2857 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2858 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2859 const CCValAssign &VA = ArgLocs[I];
2860 SDValue Arg = OutVals[I];
2861 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2862 if (VA.getLocInfo() == CCValAssign::Indirect)
2863 return false;
2864 if (!VA.isRegLoc()) {
2865 if (!MatchingStackOffset(Arg, Offset: VA.getLocMemOffset(), Flags, MFI, MRI,
2866 TII, VA))
2867 return false;
2868 }
2869 }
2870 }
2871
2872 bool PositionIndependent = isPositionIndependent();
2873 // If the tailcall address may be in a register, then make sure it's
2874 // possible to register allocate for it. In 32-bit, the call address can
2875 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2876 // callee-saved registers are restored. These happen to be the same
2877 // registers used to pass 'inreg' arguments so watch out for those.
2878 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Val: Callee) &&
2879 !isa<ExternalSymbolSDNode>(Val: Callee)) ||
2880 PositionIndependent)) {
2881 unsigned NumInRegs = 0;
2882 // In PIC we need an extra register to formulate the address computation
2883 // for the callee.
2884 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2885
2886 for (const auto &VA : ArgLocs) {
2887 if (!VA.isRegLoc())
2888 continue;
2889 Register Reg = VA.getLocReg();
2890 switch (Reg) {
2891 default: break;
2892 case X86::EAX: case X86::EDX: case X86::ECX:
2893 if (++NumInRegs == MaxInRegs)
2894 return false;
2895 break;
2896 }
2897 }
2898 }
2899
2900 const MachineRegisterInfo &MRI = MF.getRegInfo();
2901 if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
2902 return false;
2903 }
2904
2905 bool CalleeWillPop =
2906 X86::isCalleePop(CallingConv: CalleeCC, is64Bit: Subtarget.is64Bit(), IsVarArg: isVarArg,
2907 GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt);
2908
2909 if (unsigned BytesToPop =
2910 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2911 // If we have bytes to pop, the callee must pop them.
2912 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2913 if (!CalleePopMatches)
2914 return false;
2915 } else if (CalleeWillPop && StackArgsSize > 0) {
2916 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2917 return false;
2918 }
2919
2920 return true;
2921}
2922
2923/// Determines whether the callee is required to pop its own arguments.
2924/// Callee pop is necessary to support tail calls.
2925bool X86::isCalleePop(CallingConv::ID CallingConv,
2926 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2927 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2928 // can guarantee TCO.
2929 if (!IsVarArg && shouldGuaranteeTCO(CC: CallingConv, GuaranteedTailCallOpt: GuaranteeTCO))
2930 return true;
2931
2932 switch (CallingConv) {
2933 default:
2934 return false;
2935 case CallingConv::X86_StdCall:
2936 case CallingConv::X86_FastCall:
2937 case CallingConv::X86_ThisCall:
2938 case CallingConv::X86_VectorCall:
2939 return !is64Bit;
2940 }
2941}
2942

source code of llvm/lib/Target/X86/X86ISelLoweringCall.cpp