X86ISelLoweringCall.cpp source code [llvm/lib/Target/X86/X86ISelLoweringCall.cpp]

1	//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file implements the lowering of LLVM calls to DAG nodes.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "X86.h"
15	#include "X86CallingConv.h"
16	#include "X86FrameLowering.h"
17	#include "X86ISelLowering.h"
18	#include "X86InstrBuilder.h"
19	#include "X86MachineFunctionInfo.h"
20	#include "X86TargetMachine.h"
21	#include "X86TargetObjectFile.h"
22	#include "llvm/ADT/Statistic.h"
23	#include "llvm/Analysis/ObjCARCUtil.h"
24	#include "llvm/CodeGen/MachineJumpTableInfo.h"
25	#include "llvm/CodeGen/MachineModuleInfo.h"
26	#include "llvm/CodeGen/WinEHFuncInfo.h"
27	#include "llvm/IR/DiagnosticInfo.h"
28	#include "llvm/IR/IRBuilder.h"
29
30	#define DEBUG_TYPE "x86-isel"
31
32	using namespace llvm;
33
34	STATISTIC(NumTailCalls, "Number of tail calls");
35
36	/// Call this when the user attempts to do something unsupported, like
37	/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38	/// report_fatal_error, so calling code should attempt to recover without
39	/// crashing.
40	static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41	const char *Msg) {
42	MachineFunction &MF = DAG.getMachineFunction();
43	DAG.getContext()->diagnose(
44	DI: DiagnosticInfoUnsupported (MF.getFunction(), Msg, dl.getDebugLoc()));
45	}
46
47	/// Returns true if a CC can dynamically exclude a register from the list of
48	/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49	/// the return registers.
50	static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
51	switch (CC) {
52	default:
53	return false;
54	case CallingConv::X86_RegCall:
55	case CallingConv::PreserveMost:
56	case CallingConv::PreserveAll:
57	return true;
58	}
59	}
60
61	/// Returns true if a CC can dynamically exclude a register from the list of
62	/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63	/// the parameters.
64	static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
65	return CC == CallingConv::X86_RegCall;
66	}
67
68	static std::pair<MVT, unsigned>
69	handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
70	const X86Subtarget &Subtarget) {
71	// v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72	// convention is one that uses k registers.
73	if (NumElts == `2`)
74	return {MVT::v2i64, `1`};
75	if (NumElts == `4`)
76	return {MVT::v4i32, `1`};
77	if (NumElts == `8` && CC != CallingConv::X86_RegCall &&
78	CC != CallingConv::Intel_OCL_BI)
79	return {MVT::v8i16, `1`};
80	if (NumElts == `16` && CC != CallingConv::X86_RegCall &&
81	CC != CallingConv::Intel_OCL_BI)
82	return {MVT::v16i8, `1`};
83	// v32i1 passes in ymm unless we have BWI and the calling convention is
84	// regcall.
85	if (NumElts == `32` && (!Subtarget.hasBWI() \|\| CC != CallingConv::X86_RegCall))
86	return {MVT::v32i8, `1`};
87	// Split v64i1 vectors if we don't have v64i8 available.
88	if (NumElts == `64` && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89	if (Subtarget.useAVX512Regs())
90	return {MVT::v64i8, `1`};
91	return {MVT::v32i8, `2`};
92	}
93
94	// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95	if (!isPowerOf2_32(NumElts) \|\| (NumElts == `64` && !Subtarget.hasBWI()) \|\|
96	NumElts > `64`)
97	return {MVT::i8, NumElts};
98
99	return {MVT::INVALID_SIMPLE_VALUE_TYPE, `0`};
100	}
101
102	MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
103	CallingConv::ID CC,
104	EVT VT) const {
105	if (VT.isVector()) {
106	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107	unsigned NumElts = VT.getVectorNumElements();
108
109	MVT RegisterVT;
110	unsigned NumRegisters;
111	std::tie(args&: RegisterVT, args&: NumRegisters) =
112	handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113	if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114	return RegisterVT;
115	}
116
117	if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < `8`)
118	return MVT::v8f16;
119	}
120
121	// We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122	if ((VT == MVT::f64 \|\| VT == MVT::f80) && !Subtarget.is64Bit() &&
123	!Subtarget.hasX87())
124	return MVT::i32;
125
126	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
127	return getRegisterTypeForCallingConv(Context, CC,
128	VT: VT.changeVectorElementType(MVT::EltVT: f16));
129
130	if (VT == MVT::bf16)
131	return MVT::f16;
132
133	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
134	}
135
136	unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
137	CallingConv::ID CC,
138	EVT VT) const {
139	if (VT.isVector()) {
140	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
141	unsigned NumElts = VT.getVectorNumElements();
142
143	MVT RegisterVT;
144	unsigned NumRegisters;
145	std::tie(args&: RegisterVT, args&: NumRegisters) =
146	handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
147	if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
148	return NumRegisters;
149	}
150
151	if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < `8`)
152	return `1`;
153	}
154
155	// We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
156	// x87 is disabled.
157	if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
158	if (VT == MVT::f64)
159	return `2`;
160	if (VT == MVT::f80)
161	return `3`;
162	}
163
164	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
165	return getNumRegistersForCallingConv(Context, CC,
166	VT: VT.changeVectorElementType(MVT::EltVT: f16));
167
168	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
169	}
170
171	unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
172	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
173	unsigned &NumIntermediates, MVT &RegisterVT) const {
174	// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
175	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
176	Subtarget.hasAVX512() &&
177	(!isPowerOf2_32(Value: VT.getVectorNumElements()) \|\|
178	(VT.getVectorNumElements() == `64` && !Subtarget.hasBWI()) \|\|
179	VT.getVectorNumElements() > `64`)) {
180	RegisterVT = MVT::i8;
181	IntermediateVT = MVT::i1;
182	NumIntermediates = VT.getVectorNumElements();
183	return NumIntermediates;
184	}
185
186	// Split v64i1 vectors if we don't have v64i8 available.
187	if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
188	CC != CallingConv::X86_RegCall) {
189	RegisterVT = MVT::v32i8;
190	IntermediateVT = MVT::v32i1;
191	NumIntermediates = `2`;
192	return `2`;
193	}
194
195	// Split vNbf16 vectors according to vNf16.
196	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
197	VT = VT.changeVectorElementType(MVT::EltVT: f16);
198
199	return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
200	NumIntermediates, RegisterVT);
201	}
202
203	EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
204	LLVMContext& Context,
205	EVT VT) const {
206	if (!VT.isVector())
207	return MVT::i8;
208
209	if (Subtarget.hasAVX512()) {
210	// Figure out what this type will be legalized to.
211	EVT LegalVT = VT;
212	while (getTypeAction(Context, VT: LegalVT) != TypeLegal)
213	LegalVT = getTypeToTransformTo(Context, VT: LegalVT);
214
215	// If we got a 512-bit vector then we'll definitely have a vXi1 compare.
216	if (LegalVT.getSimpleVT().is512BitVector())
217	return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
218
219	if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
220	// If we legalized to less than a 512-bit vector, then we will use a vXi1
221	// compare for vXi32/vXi64 for sure. If we have BWI we will also support
222	// vXi16/vXi8.
223	MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
224	if (Subtarget.hasBWI() \|\| EltVT.getSizeInBits() >= `32`)
225	return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
226	}
227	}
228
229	return VT.changeVectorElementTypeToInteger();
230	}
231
232	/// Helper for getByValTypeAlignment to determine
233	/// the desired ByVal argument alignment.
234	static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
235	if (MaxAlign == `16`)
236	return;
237	if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
238	if (VTy->getPrimitiveSizeInBits().getFixedValue() == `128`)
239	MaxAlign = Align (`16`);
240	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
241	Align EltAlign;
242	getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign);
243	if (EltAlign > MaxAlign)
244	MaxAlign = EltAlign;
245	} else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
246	for (auto *EltTy : STy->elements()) {
247	Align EltAlign;
248	getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign);
249	if (EltAlign > MaxAlign)
250	MaxAlign = EltAlign;
251	if (MaxAlign == `16`)
252	break;
253	}
254	}
255	}
256
257	/// Return the desired alignment for ByVal aggregate
258	/// function arguments in the caller parameter area. For X86, aggregates
259	/// that contain SSE vectors are placed at 16-byte boundaries while the rest
260	/// are at 4-byte boundaries.
261	uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
262	const DataLayout &DL) const {
263	if (Subtarget.is64Bit()) {
264	// Max of 8 and alignment of type.
265	Align TyAlign = DL.getABITypeAlign(Ty);
266	if (TyAlign > `8`)
267	return TyAlign.value();
268	return `8`;
269	}
270
271	Align Alignment(`4`);
272	if (Subtarget.hasSSE1())
273	getMaxByValAlign(Ty, MaxAlign&: Alignment);
274	return Alignment.value();
275	}
276
277	/// It returns EVT::Other if the type should be determined using generic
278	/// target-independent logic.
279	/// For vector ops we check that the overall size isn't larger than our
280	/// preferred vector width.
281	EVT X86TargetLowering::getOptimalMemOpType(
282	const MemOp &Op, const AttributeList &FuncAttributes) const {
283	if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
284	if (Op.size() >= `16` &&
285	(!Subtarget.isUnalignedMem16Slow() \|\| Op.isAligned(AlignCheck: Align (`16`)))) {
286	// FIXME: Check if unaligned 64-byte accesses are slow.
287	if (Op.size() >= `64` && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
288	(Subtarget.getPreferVectorWidth() >= `512`)) {
289	return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
290	}
291	// FIXME: Check if unaligned 32-byte accesses are slow.
292	if (Op.size() >= `32` && Subtarget.hasAVX() &&
293	Subtarget.useLight256BitInstructions()) {
294	// Although this isn't a well-supported type for AVX1, we'll let
295	// legalization and shuffle lowering produce the optimal codegen. If we
296	// choose an optimal type with a vector element larger than a byte,
297	// getMemsetStores() may create an intermediate splat (using an integer
298	// multiply) before we splat as a vector.
299	return MVT::v32i8;
300	}
301	if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= `128`))
302	return MVT::v16i8;
303	// TODO: Can SSE1 handle a byte vector?
304	// If we have SSE1 registers we should be able to use them.
305	if (Subtarget.hasSSE1() && (Subtarget.is64Bit() \|\| Subtarget.hasX87()) &&
306	(Subtarget.getPreferVectorWidth() >= `128`))
307	return MVT::v4f32;
308	} else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) \|\| Op.isZeroMemset()) &&
309	Op.size() >= `8` && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
310	// Do not use f64 to lower memcpy if source is string constant. It's
311	// better to use i32 to avoid the loads.
312	// Also, do not use f64 to lower memset unless this is a memset of zeros.
313	// The gymnastics of splatting a byte value into an XMM register and then
314	// only using 8-byte stores (because this is a CPU with slow unaligned
315	// 16-byte accesses) makes that a loser.
316	return MVT::f64;
317	}
318	}
319	// This is a compromise. If we reach here, unaligned accesses may be slow on
320	// this target. However, creating smaller, aligned accesses could be even
321	// slower and would certainly be a lot more code.
322	if (Subtarget.is64Bit() && Op.size() >= `8`)
323	return MVT::i64;
324	return MVT::i32;
325	}
326
327	bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
328	if (VT == MVT::f32)
329	return Subtarget.hasSSE1();
330	if (VT == MVT::f64)
331	return Subtarget.hasSSE2();
332	return true;
333	}
334
335	static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
336	return (`8` * Alignment.value()) % SizeInBits == `0`;
337	}
338
339	bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
340	if (isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
341	return true;
342	switch (VT.getSizeInBits()) {
343	default:
344	// 8-byte and under are always assumed to be fast.
345	return true;
346	case `128`:
347	return !Subtarget.isUnalignedMem16Slow();
348	case `256`:
349	return !Subtarget.isUnalignedMem32Slow();
350	// TODO: What about AVX-512 (512-bit) accesses?
351	}
352	}
353
354	bool X86TargetLowering::allowsMisalignedMemoryAccesses(
355	EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
356	unsigned Fast) const* {
357	if (Fast)
358	*Fast = isMemoryAccessFast(VT, Alignment);
359	// NonTemporal vector memory ops must be aligned.
360	if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
361	// NT loads can only be vector aligned, so if its less aligned than the
362	// minimum vector size (which we can split the vector down to), we might as
363	// well use a regular unaligned vector load.
364	// We don't have any NT loads pre-SSE41.
365	if (!!(Flags & MachineMemOperand::MOLoad))
366	return (Alignment < `16` \|\| !Subtarget.hasSSE41());
367	return false;
368	}
369	// Misaligned accesses of any size are always allowed.
370	return true;
371	}
372
373	bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
374	const DataLayout &DL, EVT VT,
375	unsigned AddrSpace, Align Alignment,
376	MachineMemOperand::Flags Flags,
377	unsigned Fast) const* {
378	if (Fast)
379	*Fast = isMemoryAccessFast(VT, Alignment);
380	if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
381	if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
382	/Fast=/nullptr))
383	return true;
384	// NonTemporal vector memory ops are special, and must be aligned.
385	if (!isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
386	return false;
387	switch (VT.getSizeInBits()) {
388	case `128`:
389	if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
390	return true;
391	if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
392	return true;
393	return false;
394	case `256`:
395	if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
396	return true;
397	if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
398	return true;
399	return false;
400	case `512`:
401	if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
402	return true;
403	return false;
404	default:
405	return false; // Don't have NonTemporal vector memory ops of this size.
406	}
407	}
408	return true;
409	}
410
411	/// Return the entry encoding for a jump table in the
412	/// current function. The returned value is a member of the
413	/// MachineJumpTableInfo::JTEntryKind enum.
414	unsigned X86TargetLowering::getJumpTableEncoding() const {
415	// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
416	// symbol.
417	if (isPositionIndependent() && Subtarget.isPICStyleGOT())
418	return MachineJumpTableInfo::EK_Custom32;
419	if (isPositionIndependent() &&
420	getTargetMachine().getCodeModel() == CodeModel::Large)
421	return MachineJumpTableInfo::EK_LabelDifference64;
422
423	// Otherwise, use the normal jump table encoding heuristics.
424	return TargetLowering::getJumpTableEncoding();
425	}
426
427	bool X86TargetLowering::useSoftFloat() const {
428	return Subtarget.useSoftFloat();
429	}
430
431	void X86TargetLowering::markLibCallAttributes(MachineFunction MF, unsigned* CC,
432	ArgListTy &Args) const {
433
434	// Only relabel X86-32 for C / Stdcall CCs.
435	if (Subtarget.is64Bit())
436	return;
437	if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
438	return;
439	unsigned ParamRegs = `0`;
440	if (auto *M = MF->getFunction().getParent())
441	ParamRegs = M->getNumberRegisterParameters();
442
443	// Mark the first N int arguments as having reg
444	for (auto &Arg : Args) {
445	Type *T = Arg.Ty;
446	if (T->isIntOrPtrTy())
447	if (MF->getDataLayout().getTypeAllocSize(Ty: T) <= `8`) {
448	unsigned numRegs = `1`;
449	if (MF->getDataLayout().getTypeAllocSize(Ty: T) > `4`)
450	numRegs = `2`;
451	if (ParamRegs < numRegs)
452	return;
453	ParamRegs -= numRegs;
454	Arg.IsInReg = true;
455	}
456	}
457	}
458
459	const MCExpr *
460	X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
461	const MachineBasicBlock *MBB,
462	unsigned uid,MCContext &Ctx) const{
463	assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
464	// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
465	// entries.
466	return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(),
467	Kind: MCSymbolRefExpr::VK_GOTOFF, Ctx);
468	}
469
470	/// Returns relocation base for the given PIC jumptable.
471	SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
472	SelectionDAG &DAG) const {
473	if (!Subtarget.is64Bit())
474	// This doesn't have SDLoc associated with it, but is not really the
475	// same as a Register.
476	return DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc (),
477	VT: getPointerTy(DL: DAG.getDataLayout()));
478	return Table;
479	}
480
481	/// This returns the relocation base for the given PIC jumptable,
482	/// the same as getPICJumpTableRelocBase, but as an MCExpr.
483	const MCExpr *X86TargetLowering::
484	getPICJumpTableRelocBaseExpr(const MachineFunction MF, unsigned* JTI,
485	MCContext &Ctx) const {
486	// X86-64 uses RIP relative addressing based on the jump table label.
487	if (Subtarget.isPICStyleRIPRel() \|\|
488	(Subtarget.is64Bit() &&
489	getTargetMachine().getCodeModel() == CodeModel::Large))
490	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
491
492	// Otherwise, the reference is relative to the PIC base.
493	return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
494	}
495
496	std::pair<const TargetRegisterClass *, uint8_t>
497	X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
498	MVT VT) const {
499	const TargetRegisterClass RRC = nullptr*;
500	uint8_t Cost = `1`;
501	switch (VT.SimpleTy) {
502	default:
503	return TargetLowering::findRepresentativeClass(TRI, VT);
504	case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
505	RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
506	break;
507	case MVT::x86mmx:
508	RRC = &X86::VR64RegClass;
509	break;
510	case MVT::f32: case MVT::f64:
511	case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
512	case MVT::v4f32: case MVT::v2f64:
513	case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
514	case MVT::v8f32: case MVT::v4f64:
515	case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
516	case MVT::v16f32: case MVT::v8f64:
517	RRC = &X86::VR128XRegClass;
518	break;
519	}
520	return std::make_pair(x&: RRC, y&: Cost);
521	}
522
523	unsigned X86TargetLowering::getAddressSpace() const {
524	if (Subtarget.is64Bit())
525	return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? `256` : `257`;
526	return `256`;
527	}
528
529	static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
530	return TargetTriple.isOSGlibc() \|\| TargetTriple.isOSFuchsia() \|\|
531	(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(Major: `17`));
532	}
533
534	static Constant* SegmentOffset(IRBuilderBase &IRB,
535	int Offset, unsigned AddressSpace) {
536	return ConstantExpr::getIntToPtr(
537	C: ConstantInt::get(Ty: Type::getInt32Ty(C&: IRB.getContext()), V: Offset),
538	Ty: IRB.getPtrTy(AddrSpace: AddressSpace));
539	}
540
541	Value X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const* {
542	// glibc, bionic, and Fuchsia have a special slot for the stack guard in
543	// tcbhead_t; use it instead of the usual global variable (see
544	// sysdeps/{i386,x86_64}/nptl/tls.h)
545	if (hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple())) {
546	unsigned AddressSpace = getAddressSpace();
547
548	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
549	if (Subtarget.isTargetFuchsia())
550	return SegmentOffset(IRB, Offset: `0x10`, AddressSpace);
551
552	Module *M = IRB.GetInsertBlock()->getParent()->getParent();
553	// Specially, some users may customize the base reg and offset.
554	int Offset = M->getStackProtectorGuardOffset();
555	// If we don't set -stack-protector-guard-offset value:
556	// %fs:0x28, unless we're using a Kernel code model, in which case
557	// it's %gs:0x28. gs:0x14 on i386.
558	if (Offset == INT_MAX)
559	Offset = (Subtarget.is64Bit()) ? `0x28` : `0x14`;
560
561	StringRef GuardReg = M->getStackProtectorGuardReg();
562	if (GuardReg == "fs")
563	AddressSpace = X86AS::FS;
564	else if (GuardReg == "gs")
565	AddressSpace = X86AS::GS;
566
567	// Use symbol guard if user specify.
568	StringRef GuardSymb = M->getStackProtectorGuardSymbol();
569	if (!GuardSymb.empty()) {
570	GlobalVariable *GV = M->getGlobalVariable(Name: GuardSymb);
571	if (!GV) {
572	Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(C&: M->getContext())
573	: Type::getInt32Ty(C&: M->getContext());
574	GV = new GlobalVariable (M, Ty, false*, GlobalValue::ExternalLinkage,
575	nullptr, GuardSymb, nullptr,
576	GlobalValue::NotThreadLocal, AddressSpace);
577	if (!Subtarget.isTargetDarwin())
578	GV->setDSOLocal(M->getDirectAccessExternalData());
579	}
580	return GV;
581	}
582
583	return SegmentOffset(IRB, Offset, AddressSpace);
584	}
585	return TargetLowering::getIRStackGuard(IRB);
586	}
587
588	void X86TargetLowering::insertSSPDeclarations(Module &M) const {
589	// MSVC CRT provides functionalities for stack protection.
590	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
591	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
592	// MSVC CRT has a global variable holding security cookie.
593	M.getOrInsertGlobal(Name: "__security_cookie",
594	Ty: PointerType::getUnqual(C&: M.getContext()));
595
596	// MSVC CRT has a function to validate security cookie.
597	FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
598	Name: "__security_check_cookie", RetTy: Type::getVoidTy(C&: M.getContext()),
599	Args: PointerType::getUnqual(C&: M.getContext()));
600	if (Function *F = dyn_cast<Function>(Val: SecurityCheckCookie.getCallee())) {
601	F->setCallingConv(CallingConv::X86_FastCall);
602	F->addParamAttr(`0`, Attribute::AttrKind::InReg);
603	}
604	return;
605	}
606
607	StringRef GuardMode = M.getStackProtectorGuard();
608
609	// glibc, bionic, and Fuchsia have a special slot for the stack guard.
610	if ((GuardMode == "tls" \|\| GuardMode.empty()) &&
611	hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple()))
612	return;
613	TargetLowering::insertSSPDeclarations(M);
614	}
615
616	Value X86TargetLowering::getSDagStackGuard(const* Module &M) const {
617	// MSVC CRT has a global variable holding security cookie.
618	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
619	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
620	return M.getGlobalVariable(Name: "__security_cookie");
621	}
622	return TargetLowering::getSDagStackGuard(M);
623	}
624
625	Function X86TargetLowering::getSSPStackGuardCheck(const* Module &M) const {
626	// MSVC CRT has a function to validate security cookie.
627	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
628	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
629	return M.getFunction(Name: "__security_check_cookie");
630	}
631	return TargetLowering::getSSPStackGuardCheck(M);
632	}
633
634	Value *
635	X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
636	// Android provides a fixed TLS slot for the SafeStack pointer. See the
637	// definition of TLS_SLOT_SAFESTACK in
638	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
639	if (Subtarget.isTargetAndroid()) {
640	// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
641	// %gs:0x24 on i386
642	int Offset = (Subtarget.is64Bit()) ? `0x48` : `0x24`;
643	return SegmentOffset(IRB, Offset, AddressSpace: getAddressSpace());
644	}
645
646	// Fuchsia is similar.
647	if (Subtarget.isTargetFuchsia()) {
648	// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
649	return SegmentOffset(IRB, Offset: `0x18`, AddressSpace: getAddressSpace());
650	}
651
652	return TargetLowering::getSafeStackPointerLocation(IRB);
653	}
654
655	//===----------------------------------------------------------------------===//
656	// Return Value Calling Convention Implementation
657	//===----------------------------------------------------------------------===//
658
659	bool X86TargetLowering::CanLowerReturn(
660	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
661	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
662	SmallVector<CCValAssign, `16`> RVLocs;
663	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
664	return CCInfo.CheckReturn(Outs, Fn: RetCC_X86);
665	}
666
667	const MCPhysReg X86TargetLowering::getScratchRegisters(CallingConv::ID) const* {
668	static const MCPhysReg ScratchRegs[] = { X86::R11, `0` };
669	return ScratchRegs;
670	}
671
672	ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
673	static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
674	return RCRegs;
675	}
676
677	/// Lowers masks values (vi1) to the local register values*
678	/// \returns DAG node after lowering to register type
679	static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
680	const SDLoc &DL, SelectionDAG &DAG) {
681	EVT ValVT = ValArg.getValueType();
682
683	if (ValVT == MVT::v1i1)
684	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ValLoc, N1: ValArg,
685	N2: DAG.getIntPtrConstant(Val: `0`, DL));
686
687	if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 \|\| ValLoc == MVT::i32)) \|\|
688	(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 \|\| ValLoc == MVT::i32))) {
689	// Two stage lowering might be required
690	// bitcast: v8i1 -> i8 / v16i1 -> i16
691	// anyextend: i8 -> i32 / i16 -> i32
692	EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
693	SDValue ValToCopy = DAG.getBitcast(VT: TempValLoc, V: ValArg);
694	if (ValLoc == MVT::i32)
695	ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValToCopy);
696	return ValToCopy;
697	}
698
699	if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) \|\|
700	(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
701	// One stage lowering is required
702	// bitcast: v32i1 -> i32 / v64i1 -> i64
703	return DAG.getBitcast(VT: ValLoc, V: ValArg);
704	}
705
706	return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValArg);
707	}
708
709	/// Breaks v64i1 value into two registers and adds the new node to the DAG
710	static void Passv64i1ArgInRegs(
711	const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
712	SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
713	CCValAssign &NextVA, const X86Subtarget &Subtarget) {
714	assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
715	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
716	assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
717	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
718	"The value should reside in two registers");
719
720	// Before splitting the value we cast it to i64
721	Arg = DAG.getBitcast(MVT::i64, Arg);
722
723	// Splitting the value into two i32 types
724	SDValue Lo, Hi;
725	std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
726
727	// Attach the two i32 types into corresponding registers
728	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Lo));
729	RegsToPass.push_back(Elt: std::make_pair(x: NextVA.getLocReg(), y&: Hi));
730	}
731
732	SDValue
733	X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
734	bool isVarArg,
735	const SmallVectorImpl<ISD::OutputArg> &Outs,
736	const SmallVectorImpl<SDValue> &OutVals,
737	const SDLoc &dl, SelectionDAG &DAG) const {
738	MachineFunction &MF = DAG.getMachineFunction();
739	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
740
741	// In some cases we need to disable registers from the default CSR list.
742	// For example, when they are used as return registers (preserve_ and X86's*
743	// regcall) or for argument passing (X86's regcall).
744	bool ShouldDisableCalleeSavedRegister =
745	shouldDisableRetRegFromCSR(CC: CallConv) \|\|
746	MF.getFunction().hasFnAttribute(Kind: "no_caller_saved_registers");
747
748	if (CallConv == CallingConv::X86_INTR && !Outs.empty())
749	report_fatal_error(reason: "X86 interrupts may not return any value");
750
751	SmallVector<CCValAssign, `16`> RVLocs;
752	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
753	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_X86);
754
755	SmallVector<std::pair<Register, SDValue>, `4`> RetVals;
756	for (unsigned I = `0`, OutsIndex = `0`, E = RVLocs.size(); I != E;
757	++I, ++OutsIndex) {
758	CCValAssign &VA = RVLocs [I];
759	assert(VA.isRegLoc() && "Can only return in registers!");
760
761	// Add the register to the CalleeSaveDisableRegs list.
762	if (ShouldDisableCalleeSavedRegister)
763	MF.getRegInfo().disableCalleeSavedRegister(Reg: VA.getLocReg());
764
765	SDValue ValToCopy = OutVals [OutsIndex];
766	EVT ValVT = ValToCopy.getValueType();
767
768	// Promote values to the appropriate types.
769	if (VA.getLocInfo() == CCValAssign::SExt)
770	ValToCopy = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
771	else if (VA.getLocInfo() == CCValAssign::ZExt)
772	ValToCopy = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
773	else if (VA.getLocInfo() == CCValAssign::AExt) {
774	if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
775	ValToCopy = lowerMasksToReg(ValArg: ValToCopy, ValLoc: VA.getLocVT(), DL: dl, DAG);
776	else
777	ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
778	}
779	else if (VA.getLocInfo() == CCValAssign::BCvt)
780	ValToCopy = DAG.getBitcast(VT: VA.getLocVT(), V: ValToCopy);
781
782	assert(VA.getLocInfo() != CCValAssign::FPExt &&
783	"Unexpected FP-extend for return value.");
784
785	// Report an error if we have attempted to return a value via an XMM
786	// register and SSE was disabled.
787	if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
788	errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
789	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
790	} else if (!Subtarget.hasSSE2() &&
791	X86::FR64XRegClass.contains(VA.getLocReg()) &&
792	ValVT == MVT::f64) {
793	// When returning a double via an XMM register, report an error if SSE2 is
794	// not enabled.
795	errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
796	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
797	}
798
799	// Returns in ST0/ST1 are handled specially: these are pushed as operands to
800	// the RET instruction and handled by the FP Stackifier.
801	if (VA.getLocReg() == X86::FP0 \|\|
802	VA.getLocReg() == X86::FP1) {
803	// If this is a copy from an xmm register to ST(0), use an FPExtend to
804	// change the value to the FP stack register class.
805	if (isScalarFPTypeInSSEReg(VA.getValVT()))
806	ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
807	RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
808	// Don't emit a copytoreg.
809	continue;
810	}
811
812	// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
813	// which is returned in RAX / RDX.
814	if (Subtarget.is64Bit()) {
815	if (ValVT == MVT::x86mmx) {
816	if (VA.getLocReg() == X86::XMM0 \|\| VA.getLocReg() == X86::XMM1) {
817	ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
818	ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
819	ValToCopy);
820	// If we don't have SSE2 available, convert to v4f32 so the generated
821	// register is legal.
822	if (!Subtarget.hasSSE2())
823	ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
824	}
825	}
826	}
827
828	if (VA.needsCustom()) {
829	assert(VA.getValVT() == MVT::v64i1 &&
830	"Currently the only custom case is when we split v64i1 to 2 regs");
831
832	Passv64i1ArgInRegs(DL: dl, DAG, Arg&: ValToCopy, RegsToPass&: RetVals, VA, NextVA&: RVLocs [++I],
833	Subtarget);
834
835	// Add the second register to the CalleeSaveDisableRegs list.
836	if (ShouldDisableCalleeSavedRegister)
837	MF.getRegInfo().disableCalleeSavedRegister(Reg: RVLocs [I].getLocReg());
838	} else {
839	RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
840	}
841	}
842
843	SDValue Glue;
844	SmallVector<SDValue, `6`> RetOps;
845	RetOps.push_back(Elt: Chain); // Operand #0 = Chain (updated below)
846	// Operand #1 = Bytes To Pop
847	RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
848	MVT::i32));
849
850	// Copy the result values into the output registers.
851	for (auto &RetVal : RetVals) {
852	if (RetVal.first == X86::FP0 \|\| RetVal.first == X86::FP1) {
853	RetOps.push_back(Elt: RetVal.second);
854	continue; // Don't emit a copytoreg.
855	}
856
857	Chain = DAG.getCopyToReg(Chain, dl, Reg: RetVal.first, N: RetVal.second, Glue);
858	Glue = Chain.getValue(R: `1`);
859	RetOps.push_back(
860	Elt: DAG.getRegister(Reg: RetVal.first, VT: RetVal.second.getValueType()));
861	}
862
863	// Swift calling convention does not require we copy the sret argument
864	// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
865
866	// All x86 ABIs require that for returning structs by value we copy
867	// the sret argument into %rax/%eax (depending on ABI) for the return.
868	// We saved the argument into a virtual register in the entry block,
869	// so now we copy the value out and into %rax/%eax.
870	//
871	// Checking Function.hasStructRetAttr() here is insufficient because the IR
872	// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
873	// false, then an sret argument may be implicitly inserted in the SelDAG. In
874	// either case FuncInfo->setSRetReturnReg() will have been called.
875	if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
876	// When we have both sret and another return value, we should use the
877	// original Chain stored in RetOps[0], instead of the current Chain updated
878	// in the above loop. If we only have sret, RetOps[0] equals to Chain.
879
880	// For the case of sret and another return value, we have
881	// Chain_0 at the function entry
882	// Chain_1 = getCopyToReg(Chain_0) in the above loop
883	// If we use Chain_1 in getCopyFromReg, we will have
884	// Val = getCopyFromReg(Chain_1)
885	// Chain_2 = getCopyToReg(Chain_1, Val) from below
886
887	// getCopyToReg(Chain_0) will be glued together with
888	// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
889	// in Unit B, and we will have cyclic dependency between Unit A and Unit B:
890	// Data dependency from Unit B to Unit A due to usage of Val in
891	// getCopyToReg(Chain_1, Val)
892	// Chain dependency from Unit A to Unit B
893
894	// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
895	SDValue Val = DAG.getCopyFromReg(Chain: RetOps [`0`], dl, Reg: SRetReg,
896	VT: getPointerTy(DL: MF.getDataLayout()));
897
898	Register RetValReg
899	= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
900	X86::RAX : X86::EAX;
901	Chain = DAG.getCopyToReg(Chain, dl, Reg: RetValReg, N: Val, Glue);
902	Glue = Chain.getValue(R: `1`);
903
904	// RAX/EAX now acts like a return value.
905	RetOps.push_back(
906	Elt: DAG.getRegister(Reg: RetValReg, VT: getPointerTy(DL: DAG.getDataLayout())));
907
908	// Add the returned register to the CalleeSaveDisableRegs list. Don't do
909	// this however for preserve_most/preserve_all to minimize the number of
910	// callee-saved registers for these CCs.
911	if (ShouldDisableCalleeSavedRegister &&
912	CallConv != CallingConv::PreserveAll &&
913	CallConv != CallingConv::PreserveMost)
914	MF.getRegInfo().disableCalleeSavedRegister(Reg: RetValReg);
915	}
916
917	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
918	const MCPhysReg *I =
919	TRI->getCalleeSavedRegsViaCopy(MF: &DAG.getMachineFunction());
920	if (I) {
921	for (; *I; ++I) {
922	if (X86::GR64RegClass.contains(*I))
923	RetOps.push_back(DAG.getRegister(*I, MVT::i64));
924	else
925	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
926	}
927	}
928
929	RetOps [`0`] = Chain; // Update chain.
930
931	// Add the glue if we have it.
932	if (Glue.getNode())
933	RetOps.push_back(Elt: Glue);
934
935	X86ISD::NodeType opcode = X86ISD::RET_GLUE;
936	if (CallConv == CallingConv::X86_INTR)
937	opcode = X86ISD::IRET;
938	return DAG.getNode(opcode, dl, MVT::Other, RetOps);
939	}
940
941	bool X86TargetLowering::isUsedByReturnOnly(SDNode N, SDValue &Chain) const* {
942	if (N->getNumValues() != `1` \|\| !N->hasNUsesOfValue(NUses: `1`, Value: `0`))
943	return false;
944
945	SDValue TCChain = Chain;
946	SDNode Copy = N->use_begin();
947	if (Copy->getOpcode() == ISD::CopyToReg) {
948	// If the copy has a glue operand, we conservatively assume it isn't safe to
949	// perform a tail call.
950	if (Copy->getOperand(Copy->getNumOperands()-`1`).getValueType() == MVT::Glue)
951	return false;
952	TCChain = Copy->getOperand(Num: `0`);
953	} else if (Copy->getOpcode() != ISD::FP_EXTEND)
954	return false;
955
956	bool HasRet = false;
957	for (const SDNode *U : Copy->uses()) {
958	if (U->getOpcode() != X86ISD::RET_GLUE)
959	return false;
960	// If we are returning more than one value, we can definitely
961	// not make a tail call see PR19530
962	if (U->getNumOperands() > `4`)
963	return false;
964	if (U->getNumOperands() == `4` &&
965	U->getOperand(U->getNumOperands() - `1`).getValueType() != MVT::Glue)
966	return false;
967	HasRet = true;
968	}
969
970	if (!HasRet)
971	return false;
972
973	Chain = TCChain;
974	return true;
975	}
976
977	EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
978	ISD::NodeType ExtendKind) const {
979	MVT ReturnMVT = MVT::i32;
980
981	bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
982	if (VT == MVT::i1 \|\| (!Darwin && (VT == MVT::i8 \|\| VT == MVT::i16))) {
983	// The ABI does not require i1, i8 or i16 to be extended.
984	//
985	// On Darwin, there is code in the wild relying on Clang's old behaviour of
986	// always extending i8/i16 return values, so keep doing that for now.
987	// (PR26665).
988	ReturnMVT = MVT::i8;
989	}
990
991	EVT MinVT = getRegisterType(Context, VT: ReturnMVT);
992	return VT.bitsLT(VT: MinVT) ? MinVT : VT;
993	}
994
995	/// Reads two 32 bit registers and creates a 64 bit mask value.
996	/// \param VA The current 32 bit value that need to be assigned.
997	/// \param NextVA The next 32 bit value that need to be assigned.
998	/// \param Root The parent DAG node.
999	/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1000	/// glue purposes. In the case the DAG is already using
1001	/// physical register instead of virtual, we should glue
1002	/// our new SDValue to InGlue SDvalue.
1003	/// \return a new SDvalue of size 64bit.
1004	static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1005	SDValue &Root, SelectionDAG &DAG,
1006	const SDLoc &DL, const X86Subtarget &Subtarget,
1007	SDValue InGlue = nullptr*) {
1008	assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1009	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1010	assert(VA.getValVT() == MVT::v64i1 &&
1011	"Expecting first location of 64 bit width type");
1012	assert(NextVA.getValVT() == VA.getValVT() &&
1013	"The locations should have the same type");
1014	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1015	"The values should reside in two registers");
1016
1017	SDValue Lo, Hi;
1018	SDValue ArgValueLo, ArgValueHi;
1019
1020	MachineFunction &MF = DAG.getMachineFunction();
1021	const TargetRegisterClass *RC = &X86::GR32RegClass;
1022
1023	// Read a 32 bit value from the registers.
1024	if (nullptr == InGlue) {
1025	// When no physical register is present,
1026	// create an intermediate virtual register.
1027	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1028	ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1029	Reg = MF.addLiveIn(PReg: NextVA.getLocReg(), RC);
1030	ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1031	} else {
1032	// When a physical register is available read the value from it and glue
1033	// the reads together.
1034	ArgValueLo =
1035	DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1036	*InGlue = ArgValueLo.getValue(R: `2`);
1037	ArgValueHi =
1038	DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1039	*InGlue = ArgValueHi.getValue(R: `2`);
1040	}
1041
1042	// Convert the i32 type into v32i1 type.
1043	Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1044
1045	// Convert the i32 type into v32i1 type.
1046	Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1047
1048	// Concatenate the two values together.
1049	return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1050	}
1051
1052	/// The function will lower a register of various sizes (8/16/32/64)
1053	/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1054	/// \returns a DAG node contains the operand after lowering to mask type.
1055	static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1056	const EVT &ValLoc, const SDLoc &DL,
1057	SelectionDAG &DAG) {
1058	SDValue ValReturned = ValArg;
1059
1060	if (ValVT == MVT::v1i1)
1061	return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1062
1063	if (ValVT == MVT::v64i1) {
1064	// In 32 bit machine, this case is handled by getv64i1Argument
1065	assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1066	// In 64 bit machine, There is no need to truncate the value only bitcast
1067	} else {
1068	MVT MaskLenVT;
1069	switch (ValVT.getSimpleVT().SimpleTy) {
1070	case MVT::v8i1:
1071	MaskLenVT = MVT::i8;
1072	break;
1073	case MVT::v16i1:
1074	MaskLenVT = MVT::i16;
1075	break;
1076	case MVT::v32i1:
1077	MaskLenVT = MVT::i32;
1078	break;
1079	default:
1080	llvm_unreachable("Expecting a vector of i1 types");
1081	}
1082
1083	ValReturned = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaskLenVT, Operand: ValReturned);
1084	}
1085	return DAG.getBitcast(VT: ValVT, V: ValReturned);
1086	}
1087
1088	/// Lower the result values of a call into the
1089	/// appropriate copies out of appropriate physical registers.
1090	///
1091	SDValue X86TargetLowering::LowerCallResult(
1092	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1093	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1094	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1095	uint32_t RegMask) const* {
1096
1097	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1098	// Assign locations to each value returned by this call.
1099	SmallVector<CCValAssign, `16`> RVLocs;
1100	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1101	*DAG.getContext());
1102	CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
1103
1104	// Copy all of the result registers out of their specified physreg.
1105	for (unsigned I = `0`, InsIndex = `0`, E = RVLocs.size(); I != E;
1106	++I, ++InsIndex) {
1107	CCValAssign &VA = RVLocs [I];
1108	EVT CopyVT = VA.getLocVT();
1109
1110	// In some calling conventions we need to remove the used registers
1111	// from the register mask.
1112	if (RegMask) {
1113	for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1114	RegMask[SubReg / `32`] &= ~(`1u` << (SubReg % `32`));
1115	}
1116
1117	// Report an error if there was an attempt to return FP values via XMM
1118	// registers.
1119	if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1120	errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
1121	if (VA.getLocReg() == X86::XMM1)
1122	VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1123	else
1124	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1125	} else if (!Subtarget.hasSSE2() &&
1126	X86::FR64XRegClass.contains(VA.getLocReg()) &&
1127	CopyVT == MVT::f64) {
1128	errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
1129	if (VA.getLocReg() == X86::XMM1)
1130	VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1131	else
1132	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1133	}
1134
1135	// If we prefer to use the value in xmm registers, copy it out as f80 and
1136	// use a truncate to move it from fp stack reg to xmm reg.
1137	bool RoundAfterCopy = false;
1138	if ((VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1) &&
1139	isScalarFPTypeInSSEReg(VA.getValVT())) {
1140	if (!Subtarget.hasX87())
1141	report_fatal_error(reason: "X87 register return with X87 disabled");
1142	CopyVT = MVT::f80;
1143	RoundAfterCopy = (CopyVT != VA.getLocVT());
1144	}
1145
1146	SDValue Val;
1147	if (VA.needsCustom()) {
1148	assert(VA.getValVT() == MVT::v64i1 &&
1149	"Currently the only custom case is when we split v64i1 to 2 regs");
1150	Val =
1151	getv64i1Argument(VA, NextVA&: RVLocs [++I], Root&: Chain, DAG, DL: dl, Subtarget, InGlue: &InGlue);
1152	} else {
1153	Chain = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: CopyVT, Glue: InGlue)
1154	.getValue(R: `1`);
1155	Val = Chain.getValue(R: `0`);
1156	InGlue = Chain.getValue(R: `2`);
1157	}
1158
1159	if (RoundAfterCopy)
1160	Val = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: VA.getValVT(), N1: Val,
1161	// This truncation won't change the value.
1162	N2: DAG.getIntPtrConstant(Val: `1`, DL: dl, /isTarget=/true));
1163
1164	if (VA.isExtInLoc()) {
1165	if (VA.getValVT().isVector() &&
1166	VA.getValVT().getScalarType() == MVT::i1 &&
1167	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
1168	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
1169	// promoting a mask type (vi1) into a register of type i64/i32/i16/i8*
1170	Val = lowerRegToMasks(ValArg: Val, ValVT: VA.getValVT(), ValLoc: VA.getLocVT(), DL: dl, DAG);
1171	} else
1172	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
1173	}
1174
1175	if (VA.getLocInfo() == CCValAssign::BCvt)
1176	Val = DAG.getBitcast(VT: VA.getValVT(), V: Val);
1177
1178	InVals.push_back(Elt: Val);
1179	}
1180
1181	return Chain;
1182	}
1183
1184	//===----------------------------------------------------------------------===//
1185	// C & StdCall & Fast Calling Convention implementation
1186	//===----------------------------------------------------------------------===//
1187	// StdCall calling convention seems to be standard for many Windows' API
1188	// routines and around. It differs from C calling convention just a little:
1189	// callee should clean up the stack, not caller. Symbols should be also
1190	// decorated in some fancy way :) It doesn't support any vector arguments.
1191	// For info on fast calling convention see Fast Calling Convention (tail call)
1192	// implementation LowerX86_32FastCCCallTo.
1193
1194	/// Determines whether Args, either a set of outgoing arguments to a call, or a
1195	/// set of incoming args of a call, contains an sret pointer that the callee
1196	/// pops
1197	template <typename T>
1198	static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1199	const X86Subtarget &Subtarget) {
1200	// Not C++20 (yet), so no concepts available.
1201	static_assert(std::is_same_v<T, ISD::OutputArg> \|\|
1202	std::is_same_v<T, ISD::InputArg>,
1203	"requires ISD::OutputArg or ISD::InputArg");
1204
1205	// Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1206	// for most compilations.
1207	if (!Subtarget.is32Bit())
1208	return false;
1209
1210	if (Args.empty())
1211	return false;
1212
1213	// Most calls do not have an sret argument, check the arg next.
1214	const ISD::ArgFlagsTy &Flags = Args[`0`].Flags;
1215	if (!Flags.isSRet() \|\| Flags.isInReg())
1216	return false;
1217
1218	// The MSVCabi does not pop the sret.
1219	if (Subtarget.getTargetTriple().isOSMSVCRT())
1220	return false;
1221
1222	// MCUs don't pop the sret
1223	if (Subtarget.isTargetMCU())
1224	return false;
1225
1226	// Callee pops argument
1227	return true;
1228	}
1229
1230	/// Make a copy of an aggregate at address specified by "Src" to address
1231	/// "Dst" with size and alignment information specified by the specific
1232	/// parameter attribute. The copy will be passed as a byval function parameter.
1233	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1234	SDValue Chain, ISD::ArgFlagsTy Flags,
1235	SelectionDAG &DAG, const SDLoc &dl) {
1236	SDValue SizeNode = DAG.getIntPtrConstant(Val: Flags.getByValSize(), DL: dl);
1237
1238	return DAG.getMemcpy(
1239	Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(),
1240	/isVolatile/ isVol: false, /AlwaysInline=/true,
1241	/isTailCall/ false, DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
1242	}
1243
1244	/// Return true if the calling convention is one that we can guarantee TCO for.
1245	static bool canGuaranteeTCO(CallingConv::ID CC) {
1246	return (CC == CallingConv::Fast \|\| CC == CallingConv::GHC \|\|
1247	CC == CallingConv::X86_RegCall \|\| CC == CallingConv::HiPE \|\|
1248	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail);
1249	}
1250
1251	/// Return true if we might ever do TCO for calls with this calling convention.
1252	static bool mayTailCallThisCC(CallingConv::ID CC) {
1253	switch (CC) {
1254	// C calling conventions:
1255	case CallingConv::C:
1256	case CallingConv::Win64:
1257	case CallingConv::X86_64_SysV:
1258	case CallingConv::PreserveNone:
1259	// Callee pop conventions:
1260	case CallingConv::X86_ThisCall:
1261	case CallingConv::X86_StdCall:
1262	case CallingConv::X86_VectorCall:
1263	case CallingConv::X86_FastCall:
1264	// Swift:
1265	case CallingConv::Swift:
1266	return true;
1267	default:
1268	return canGuaranteeTCO(CC);
1269	}
1270	}
1271
1272	/// Return true if the function is being made into a tailcall target by
1273	/// changing its ABI.
1274	static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1275	return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) \|\|
1276	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail;
1277	}
1278
1279	bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
1280	if (!CI->isTailCall())
1281	return false;
1282
1283	CallingConv::ID CalleeCC = CI->getCallingConv();
1284	if (!mayTailCallThisCC(CC: CalleeCC))
1285	return false;
1286
1287	return true;
1288	}
1289
1290	SDValue
1291	X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1292	const SmallVectorImpl<ISD::InputArg> &Ins,
1293	const SDLoc &dl, SelectionDAG &DAG,
1294	const CCValAssign &VA,
1295	MachineFrameInfo &MFI, unsigned i) const {
1296	// Create the nodes corresponding to a load from this parameter slot.
1297	ISD::ArgFlagsTy Flags = Ins [i].Flags;
1298	bool AlwaysUseMutable = shouldGuaranteeTCO(
1299	CC: CallConv, GuaranteedTailCallOpt: DAG.getTarget().Options.GuaranteedTailCallOpt);
1300	bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1301	EVT ValVT;
1302	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1303
1304	// If value is passed by pointer we have address passed instead of the value
1305	// itself. No need to extend if the mask value and location share the same
1306	// absolute size.
1307	bool ExtendedInMem =
1308	VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1309	VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1310
1311	if (VA.getLocInfo() == CCValAssign::Indirect \|\| ExtendedInMem)
1312	ValVT = VA.getLocVT();
1313	else
1314	ValVT = VA.getValVT();
1315
1316	// FIXME: For now, all byval parameter objects are marked mutable. This can be
1317	// changed with more analysis.
1318	// In case of tail call optimization mark all arguments mutable. Since they
1319	// could be overwritten by lowering of arguments in case of a tail call.
1320	if (Flags.isByVal()) {
1321	unsigned Bytes = Flags.getByValSize();
1322	if (Bytes == `0`) Bytes = `1`; // Don't create zero-sized stack objects.
1323
1324	// FIXME: For now, all byval parameter objects are marked as aliasing. This
1325	// can be improved with deeper analysis.
1326	int FI = MFI.CreateFixedObject(Size: Bytes, SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable,
1327	/isAliased=/true);
1328	return DAG.getFrameIndex(FI, VT: PtrVT);
1329	}
1330
1331	EVT ArgVT = Ins [i].ArgVT;
1332
1333	// If this is a vector that has been split into multiple parts, don't elide
1334	// the copy. The layout on the stack may not match the packed in-memory
1335	// layout.
1336	bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1337
1338	// This is an argument in memory. We might be able to perform copy elision.
1339	// If the argument is passed directly in memory without any extension, then we
1340	// can perform copy elision. Large vector types, for example, may be passed
1341	// indirectly by pointer.
1342	if (Flags.isCopyElisionCandidate() &&
1343	VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1344	!ScalarizedVector) {
1345	SDValue PartAddr;
1346	if (Ins [i].PartOffset == `0`) {
1347	// If this is a one-part value or the first part of a multi-part value,
1348	// create a stack object for the entire argument value type and return a
1349	// load from our portion of it. This assumes that if the first part of an
1350	// argument is in memory, the rest will also be in memory.
1351	int FI = MFI.CreateFixedObject(Size: ArgVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
1352	/IsImmutable=/false);
1353	PartAddr = DAG.getFrameIndex(FI, VT: PtrVT);
1354	return DAG.getLoad(
1355	VT: ValVT, dl, Chain, Ptr: PartAddr,
1356	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
1357	}
1358
1359	// This is not the first piece of an argument in memory. See if there is
1360	// already a fixed stack object including this offset. If so, assume it
1361	// was created by the PartOffset == 0 branch above and create a load from
1362	// the appropriate offset into it.
1363	int64_t PartBegin = VA.getLocMemOffset();
1364	int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / `8`;
1365	int FI = MFI.getObjectIndexBegin();
1366	for (; MFI.isFixedObjectIndex(ObjectIdx: FI); ++FI) {
1367	int64_t ObjBegin = MFI.getObjectOffset(ObjectIdx: FI);
1368	int64_t ObjEnd = ObjBegin + MFI.getObjectSize(ObjectIdx: FI);
1369	if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1370	break;
1371	}
1372	if (MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1373	SDValue Addr =
1374	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: DAG.getFrameIndex(FI, VT: PtrVT),
1375	N2: DAG.getIntPtrConstant(Val: Ins [i].PartOffset, DL: dl));
1376	return DAG.getLoad(VT: ValVT, dl, Chain, Ptr: Addr,
1377	PtrInfo: MachinePointerInfo::getFixedStack(
1378	MF&: DAG.getMachineFunction(), FI, Offset: Ins [i].PartOffset));
1379	}
1380	}
1381
1382	int FI = MFI.CreateFixedObject(Size: ValVT.getSizeInBits() / `8`,
1383	SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable);
1384
1385	// Set SExt or ZExt flag.
1386	if (VA.getLocInfo() == CCValAssign::ZExt) {
1387	MFI.setObjectZExt(ObjectIdx: FI, IsZExt: true);
1388	} else if (VA.getLocInfo() == CCValAssign::SExt) {
1389	MFI.setObjectSExt(ObjectIdx: FI, IsSExt: true);
1390	}
1391
1392	MaybeAlign Alignment;
1393	if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1394	ValVT != MVT::f80)
1395	Alignment = MaybeAlign (`4`);
1396	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
1397	SDValue Val = DAG.getLoad(
1398	VT: ValVT, dl, Chain, Ptr: FIN,
1399	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI),
1400	Alignment);
1401	return ExtendedInMem
1402	? (VA.getValVT().isVector()
1403	? DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VA.getValVT(), Operand: Val)
1404	: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val))
1405	: Val;
1406	}
1407
1408	// FIXME: Get this from tablegen.
1409	static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1410	const X86Subtarget &Subtarget) {
1411	assert(Subtarget.is64Bit());
1412
1413	if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1414	static const MCPhysReg GPR64ArgRegsWin64[] = {
1415	X86::RCX, X86::RDX, X86::R8, X86::R9
1416	};
1417	return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1418	}
1419
1420	static const MCPhysReg GPR64ArgRegs64Bit[] = {
1421	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1422	};
1423	return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1424	}
1425
1426	// FIXME: Get this from tablegen.
1427	static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1428	CallingConv::ID CallConv,
1429	const X86Subtarget &Subtarget) {
1430	assert(Subtarget.is64Bit());
1431	if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1432	// The XMM registers which might contain var arg parameters are shadowed
1433	// in their paired GPR. So we only need to save the GPR to their home
1434	// slots.
1435	// TODO: __vectorcall will change this.
1436	return std::nullopt;
1437	}
1438
1439	bool isSoftFloat = Subtarget.useSoftFloat();
1440	if (isSoftFloat \|\| !Subtarget.hasSSE1())
1441	// Kernel mode asks for SSE to be disabled, so there are no XMM argument
1442	// registers.
1443	return std::nullopt;
1444
1445	static const MCPhysReg XMMArgRegs64Bit[] = {
1446	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1447	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1448	};
1449	return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1450	}
1451
1452	#ifndef NDEBUG
1453	static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1454	return llvm::is_sorted(
1455	Range&: ArgLocs, C: [](const CCValAssign &A, const CCValAssign &B) -> bool {
1456	return A.getValNo() < B.getValNo();
1457	});
1458	}
1459	#endif
1460
1461	namespace {
1462	/// This is a helper class for lowering variable arguments parameters.
1463	class VarArgsLoweringHelper {
1464	public:
1465	VarArgsLoweringHelper(X86MachineFunctionInfo FuncInfo, const* SDLoc &Loc,
1466	SelectionDAG &DAG, const X86Subtarget &Subtarget,
1467	CallingConv::ID CallConv, CCState &CCInfo)
1468	: FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1469	TheMachineFunction(DAG.getMachineFunction()),
1470	TheFunction(TheMachineFunction.getFunction()),
1471	FrameInfo(TheMachineFunction.getFrameInfo()),
1472	FrameLowering(*Subtarget.getFrameLowering()),
1473	TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1474	CCInfo(CCInfo) {}
1475
1476	// Lower variable arguments parameters.
1477	void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1478
1479	private:
1480	void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1481
1482	void forwardMustTailParameters(SDValue &Chain);
1483
1484	bool is64Bit() const { return Subtarget.is64Bit(); }
1485	bool isWin64() const { return Subtarget.isCallingConvWin64(CC: CallConv); }
1486
1487	X86MachineFunctionInfo *FuncInfo;
1488	const SDLoc &DL;
1489	SelectionDAG &DAG;
1490	const X86Subtarget &Subtarget;
1491	MachineFunction &TheMachineFunction;
1492	const Function &TheFunction;
1493	MachineFrameInfo &FrameInfo;
1494	const TargetFrameLowering &FrameLowering;
1495	const TargetLowering &TargLowering;
1496	CallingConv::ID CallConv;
1497	CCState &CCInfo;
1498	};
1499	} // namespace
1500
1501	void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1502	SDValue &Chain, unsigned StackSize) {
1503	// If the function takes variable number of arguments, make a frame index for
1504	// the start of the first vararg value... for expansion of llvm.va_start. We
1505	// can skip this if there are no va_start calls.
1506	if (is64Bit() \|\| (CallConv != CallingConv::X86_FastCall &&
1507	CallConv != CallingConv::X86_ThisCall)) {
1508	FuncInfo->setVarArgsFrameIndex(
1509	FrameInfo.CreateFixedObject(Size: `1`, SPOffset: StackSize, IsImmutable: true));
1510	}
1511
1512	// 64-bit calling conventions support varargs and register parameters, so we
1513	// have to do extra work to spill them in the prologue.
1514	if (is64Bit()) {
1515	// Find the first unallocated argument registers.
1516	ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1517	ArrayRef<MCPhysReg> ArgXMMs =
1518	get64BitArgumentXMMs(MF&: TheMachineFunction, CallConv, Subtarget);
1519	unsigned NumIntRegs = CCInfo.getFirstUnallocated(Regs: ArgGPRs);
1520	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: ArgXMMs);
1521
1522	assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1523	"SSE register cannot be used when SSE is disabled!");
1524
1525	if (isWin64()) {
1526	// Get to the caller-allocated home save location. Add 8 to account
1527	// for the return address.
1528	int HomeOffset = FrameLowering.getOffsetOfLocalArea() + `8`;
1529	FuncInfo->setRegSaveFrameIndex(
1530	FrameInfo.CreateFixedObject(Size: `1`, SPOffset: NumIntRegs * `8` + HomeOffset, IsImmutable: false));
1531	// Fixup to set vararg frame on shadow area (4 x i64).
1532	if (NumIntRegs < `4`)
1533	FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1534	} else {
1535	// For X86-64, if there are vararg parameters that are passed via
1536	// registers, then we must store them to their spots on the stack so
1537	// they may be loaded by dereferencing the result of va_next.
1538	FuncInfo->setVarArgsGPOffset(NumIntRegs * `8`);
1539	FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * `8` + NumXMMRegs * `16`);
1540	FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1541	Size: ArgGPRs.size() * `8` + ArgXMMs.size() * `16`, Alignment: Align (`16`), isSpillSlot: false));
1542	}
1543
1544	SmallVector<SDValue, `6`>
1545	LiveGPRs; // list of SDValue for GPR registers keeping live input value
1546	SmallVector<SDValue, `8`> LiveXMMRegs; // list of SDValue for XMM registers
1547	// keeping live input value
1548	SDValue ALVal; // if applicable keeps SDValue for %al register
1549
1550	// Gather all the live in physical registers.
1551	for (MCPhysReg Reg : ArgGPRs.slice(N: NumIntRegs)) {
1552	Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1553	LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1554	}
1555	const auto &AvailableXmms = ArgXMMs.slice(N: NumXMMRegs);
1556	if (!AvailableXmms.empty()) {
1557	Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1558	ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1559	for (MCPhysReg Reg : AvailableXmms) {
1560	// FastRegisterAllocator spills virtual registers at basic
1561	// block boundary. That leads to usages of xmm registers
1562	// outside of check for %al. Pass physical registers to
1563	// VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1564	TheMachineFunction.getRegInfo().addLiveIn(Reg);
1565	LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1566	}
1567	}
1568
1569	// Store the integer parameter registers.
1570	SmallVector<SDValue, `8`> MemOps;
1571	SDValue RSFIN =
1572	DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(),
1573	VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()));
1574	unsigned Offset = FuncInfo->getVarArgsGPOffset();
1575	for (SDValue Val : LiveGPRs) {
1576	SDValue FIN = DAG.getNode(Opcode: ISD::ADD, DL,
1577	VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()),
1578	N1: RSFIN, N2: DAG.getIntPtrConstant(Val: Offset, DL));
1579	SDValue Store =
1580	DAG.getStore(Chain: Val.getValue(R: `1`), dl: DL, Val, Ptr: FIN,
1581	PtrInfo: MachinePointerInfo::getFixedStack(
1582	MF&: DAG.getMachineFunction(),
1583	FI: FuncInfo->getRegSaveFrameIndex(), Offset));
1584	MemOps.push_back(Elt: Store);
1585	Offset += `8`;
1586	}
1587
1588	// Now store the XMM (fp + vector) parameter registers.
1589	if (!LiveXMMRegs.empty()) {
1590	SmallVector<SDValue, `12`> SaveXMMOps;
1591	SaveXMMOps.push_back(Elt: Chain);
1592	SaveXMMOps.push_back(Elt: ALVal);
1593	SaveXMMOps.push_back(Elt: RSFIN);
1594	SaveXMMOps.push_back(
1595	DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1596	llvm::append_range(C&: SaveXMMOps, R&: LiveXMMRegs);
1597	MachineMemOperand *StoreMMO =
1598	DAG.getMachineFunction().getMachineMemOperand(
1599	PtrInfo: MachinePointerInfo::getFixedStack(
1600	MF&: DAG.getMachineFunction(), FI: FuncInfo->getRegSaveFrameIndex(),
1601	Offset),
1602	F: MachineMemOperand::MOStore, Size: `128`, BaseAlignment: Align (`16`));
1603	MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1604	DL, DAG.getVTList(MVT::Other),
1605	SaveXMMOps, MVT::i8, StoreMMO));
1606	}
1607
1608	if (!MemOps.empty())
1609	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1610	}
1611	}
1612
1613	void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1614	// Find the largest legal vector type.
1615	MVT VecVT = MVT::Other;
1616	// FIXME: Only some x86_32 calling conventions support AVX512.
1617	if (Subtarget.useAVX512Regs() &&
1618	(is64Bit() \|\| (CallConv == CallingConv::X86_VectorCall \|\|
1619	CallConv == CallingConv::Intel_OCL_BI)))
1620	VecVT = MVT::v16f32;
1621	else if (Subtarget.hasAVX())
1622	VecVT = MVT::v8f32;
1623	else if (Subtarget.hasSSE2())
1624	VecVT = MVT::v4f32;
1625
1626	// We forward some GPRs and some vector types.
1627	SmallVector<MVT, `2`> RegParmTypes;
1628	MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1629	RegParmTypes.push_back(Elt: IntVT);
1630	if (VecVT != MVT::Other)
1631	RegParmTypes.push_back(Elt: VecVT);
1632
1633	// Compute the set of forwarded registers. The rest are scratch.
1634	SmallVectorImpl<ForwardedRegister> &Forwards =
1635	FuncInfo->getForwardedMustTailRegParms();
1636	CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: CC_X86);
1637
1638	// Forward AL for SysV x86_64 targets, since it is used for varargs.
1639	if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1640	Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1641	Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1642	}
1643
1644	// Copy all forwards from physical to virtual registers.
1645	for (ForwardedRegister &FR : Forwards) {
1646	// FIXME: Can we use a less constrained schedule?
1647	SDValue RegVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: FR.VReg, VT: FR.VT);
1648	FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1649	RegClass: TargLowering.getRegClassFor(VT: FR.VT));
1650	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: FR.VReg, N: RegVal);
1651	}
1652	}
1653
1654	void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1655	unsigned StackSize) {
1656	// Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1657	// If necessary, it would be set into the correct value later.
1658	FuncInfo->setVarArgsFrameIndex(`0xAAAAAAA`);
1659	FuncInfo->setRegSaveFrameIndex(`0xAAAAAAA`);
1660
1661	if (FrameInfo.hasVAStart())
1662	createVarArgAreaAndStoreRegisters(Chain, StackSize);
1663
1664	if (FrameInfo.hasMustTailInVarArgFunc())
1665	forwardMustTailParameters(Chain);
1666	}
1667
1668	SDValue X86TargetLowering::LowerFormalArguments(
1669	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1670	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1671	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1672	MachineFunction &MF = DAG.getMachineFunction();
1673	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1674
1675	const Function &F = MF.getFunction();
1676	if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1677	F.getName() == "main")
1678	FuncInfo->setForceFramePointer(true);
1679
1680	MachineFrameInfo &MFI = MF.getFrameInfo();
1681	bool Is64Bit = Subtarget.is64Bit();
1682	bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
1683
1684	assert(
1685	!(IsVarArg && canGuaranteeTCO(CallConv)) &&
1686	"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1687
1688	// Assign locations to all of the incoming arguments.
1689	SmallVector<CCValAssign, `16`> ArgLocs;
1690	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1691
1692	// Allocate shadow area for Win64.
1693	if (IsWin64)
1694	CCInfo.AllocateStack(Size: `32`, Alignment: Align (`8`));
1695
1696	CCInfo.AnalyzeArguments(Ins, Fn: CC_X86);
1697
1698	// In vectorcall calling convention a second pass is required for the HVA
1699	// types.
1700	if (CallingConv::X86_VectorCall == CallConv) {
1701	CCInfo.AnalyzeArgumentsSecondPass(Args: Ins, Fn: CC_X86);
1702	}
1703
1704	// The next loop assumes that the locations are in the same order of the
1705	// input arguments.
1706	assert(isSortedByValueNo(ArgLocs) &&
1707	"Argument Location list must be sorted before lowering");
1708
1709	SDValue ArgValue;
1710	for (unsigned I = `0`, InsIndex = `0`, E = ArgLocs.size(); I != E;
1711	++I, ++InsIndex) {
1712	assert(InsIndex < Ins.size() && "Invalid Ins index");
1713	CCValAssign &VA = ArgLocs [I];
1714
1715	if (VA.isRegLoc()) {
1716	EVT RegVT = VA.getLocVT();
1717	if (VA.needsCustom()) {
1718	assert(
1719	VA.getValVT() == MVT::v64i1 &&
1720	"Currently the only custom case is when we split v64i1 to 2 regs");
1721
1722	// v64i1 values, in regcall calling convention, that are
1723	// compiled to 32 bit arch, are split up into two registers.
1724	ArgValue =
1725	getv64i1Argument(VA, NextVA&: ArgLocs [++I], Root&: Chain, DAG, DL: dl, Subtarget);
1726	} else {
1727	const TargetRegisterClass *RC;
1728	if (RegVT == MVT::i8)
1729	RC = &X86::GR8RegClass;
1730	else if (RegVT == MVT::i16)
1731	RC = &X86::GR16RegClass;
1732	else if (RegVT == MVT::i32)
1733	RC = &X86::GR32RegClass;
1734	else if (Is64Bit && RegVT == MVT::i64)
1735	RC = &X86::GR64RegClass;
1736	else if (RegVT == MVT::f16)
1737	RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1738	else if (RegVT == MVT::f32)
1739	RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1740	else if (RegVT == MVT::f64)
1741	RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1742	else if (RegVT == MVT::f80)
1743	RC = &X86::RFP80RegClass;
1744	else if (RegVT == MVT::f128)
1745	RC = &X86::VR128RegClass;
1746	else if (RegVT.is512BitVector())
1747	RC = &X86::VR512RegClass;
1748	else if (RegVT.is256BitVector())
1749	RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1750	else if (RegVT.is128BitVector())
1751	RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1752	else if (RegVT == MVT::x86mmx)
1753	RC = &X86::VR64RegClass;
1754	else if (RegVT == MVT::v1i1)
1755	RC = &X86::VK1RegClass;
1756	else if (RegVT == MVT::v8i1)
1757	RC = &X86::VK8RegClass;
1758	else if (RegVT == MVT::v16i1)
1759	RC = &X86::VK16RegClass;
1760	else if (RegVT == MVT::v32i1)
1761	RC = &X86::VK32RegClass;
1762	else if (RegVT == MVT::v64i1)
1763	RC = &X86::VK64RegClass;
1764	else
1765	llvm_unreachable("Unknown argument type!");
1766
1767	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1768	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, VT: RegVT);
1769	}
1770
1771	// If this is an 8 or 16-bit value, it is really passed promoted to 32
1772	// bits. Insert an assert[sz]ext to capture this, then truncate to the
1773	// right size.
1774	if (VA.getLocInfo() == CCValAssign::SExt)
1775	ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: RegVT, N1: ArgValue,
1776	N2: DAG.getValueType(VA.getValVT()));
1777	else if (VA.getLocInfo() == CCValAssign::ZExt)
1778	ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RegVT, N1: ArgValue,
1779	N2: DAG.getValueType(VA.getValVT()));
1780	else if (VA.getLocInfo() == CCValAssign::BCvt)
1781	ArgValue = DAG.getBitcast(VT: VA.getValVT(), V: ArgValue);
1782
1783	if (VA.isExtInLoc()) {
1784	// Handle MMX values passed in XMM regs.
1785	if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1786	ArgValue = DAG.getNode(Opcode: X86ISD::MOVDQ2Q, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1787	else if (VA.getValVT().isVector() &&
1788	VA.getValVT().getScalarType() == MVT::i1 &&
1789	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
1790	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
1791	// Promoting a mask type (vi1) into a register of type i64/i32/i16/i8*
1792	ArgValue = lowerRegToMasks(ValArg: ArgValue, ValVT: VA.getValVT(), ValLoc: RegVT, DL: dl, DAG);
1793	} else
1794	ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1795	}
1796	} else {
1797	assert(VA.isMemLoc());
1798	ArgValue =
1799	LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i: InsIndex);
1800	}
1801
1802	// If value is passed via pointer - do a load.
1803	if (VA.getLocInfo() == CCValAssign::Indirect &&
1804	!(Ins [I].Flags.isByVal() && VA.isRegLoc())) {
1805	ArgValue =
1806	DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: ArgValue, PtrInfo: MachinePointerInfo ());
1807	}
1808
1809	InVals.push_back(Elt: ArgValue);
1810	}
1811
1812	for (unsigned I = `0`, E = Ins.size(); I != E; ++I) {
1813	if (Ins [I].Flags.isSwiftAsync()) {
1814	auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1815	if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1816	X86FI->setHasSwiftAsyncContext(true);
1817	else {
1818	int PtrSize = Subtarget.is64Bit() ? `8` : `4`;
1819	int FI =
1820	MF.getFrameInfo().CreateStackObject(Size: PtrSize, Alignment: Align (PtrSize), isSpillSlot: false);
1821	X86FI->setSwiftAsyncContextFrameIdx(FI);
1822	SDValue St = DAG.getStore(
1823	DAG.getEntryNode(), dl, InVals[I],
1824	DAG.getFrameIndex(FI, PtrSize == `8` ? MVT::i64 : MVT::i32),
1825	MachinePointerInfo::getFixedStack(MF, FI));
1826	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1827	}
1828	}
1829
1830	// Swift calling convention does not require we copy the sret argument
1831	// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1832	if (CallConv == CallingConv::Swift \|\| CallConv == CallingConv::SwiftTail)
1833	continue;
1834
1835	// All x86 ABIs require that for returning structs by value we copy the
1836	// sret argument into %rax/%eax (depending on ABI) for the return. Save
1837	// the argument into a virtual register so that we can access it from the
1838	// return points.
1839	if (Ins [I].Flags.isSRet()) {
1840	assert(!FuncInfo->getSRetReturnReg() &&
1841	"SRet return has already been set");
1842	MVT PtrTy = getPointerTy(DL: DAG.getDataLayout());
1843	Register Reg =
1844	MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrTy));
1845	FuncInfo->setSRetReturnReg(Reg);
1846	SDValue Copy = DAG.getCopyToReg(Chain: DAG.getEntryNode(), dl, Reg, N: InVals [I]);
1847	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1848	break;
1849	}
1850	}
1851
1852	unsigned StackSize = CCInfo.getStackSize();
1853	// Align stack specially for tail calls.
1854	if (shouldGuaranteeTCO(CC: CallConv,
1855	GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt))
1856	StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1857
1858	if (IsVarArg)
1859	VarArgsLoweringHelper (FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1860	.lowerVarArgsParameters(Chain, StackSize);
1861
1862	// Some CCs need callee pop.
1863	if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg,
1864	GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt)) {
1865	FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1866	} else if (CallConv == CallingConv::X86_INTR && Ins.size() == `2`) {
1867	// X86 interrupts must pop the error code (and the alignment padding) if
1868	// present.
1869	FuncInfo->setBytesToPopOnReturn(Is64Bit ? `16` : `4`);
1870	} else {
1871	FuncInfo->setBytesToPopOnReturn(`0`); // Callee pops nothing.
1872	// If this is an sret function, the return should pop the hidden pointer.
1873	if (!canGuaranteeTCO(CC: CallConv) && hasCalleePopSRet(Args: Ins, Subtarget))
1874	FuncInfo->setBytesToPopOnReturn(`4`);
1875	}
1876
1877	if (!Is64Bit) {
1878	// RegSaveFrameIndex is X86-64 only.
1879	FuncInfo->setRegSaveFrameIndex(`0xAAAAAAA`);
1880	}
1881
1882	FuncInfo->setArgumentStackSize(StackSize);
1883
1884	if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1885	EHPersonality Personality = classifyEHPersonality(Pers: F.getPersonalityFn());
1886	if (Personality == EHPersonality::CoreCLR) {
1887	assert(Is64Bit);
1888	// TODO: Add a mechanism to frame lowering that will allow us to indicate
1889	// that we'd prefer this slot be allocated towards the bottom of the frame
1890	// (i.e. near the stack pointer after allocating the frame). Every
1891	// funclet needs a copy of this slot in its (mostly empty) frame, and the
1892	// offset from the bottom of this and each funclet's frame must be the
1893	// same, so the size of funclets' (mostly empty) frames is dictated by
1894	// how far this slot is from the bottom (since they allocate just enough
1895	// space to accommodate holding this slot at the correct offset).
1896	int PSPSymFI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), /isSpillSlot=/false);
1897	EHInfo->PSPSymFrameIdx = PSPSymFI;
1898	}
1899	}
1900
1901	if (shouldDisableArgRegFromCSR(CC: CallConv) \|\|
1902	F.hasFnAttribute(Kind: "no_caller_saved_registers")) {
1903	MachineRegisterInfo &MRI = MF.getRegInfo();
1904	for (std::pair<Register, Register> Pair : MRI.liveins())
1905	MRI.disableCalleeSavedRegister(Reg: Pair.first);
1906	}
1907
1908	if (CallingConv::PreserveNone == CallConv)
1909	for (unsigned I = `0`, E = Ins.size(); I != E; ++I) {
1910	if (Ins [I].Flags.isSwiftSelf() \|\| Ins [I].Flags.isSwiftAsync() \|\|
1911	Ins [I].Flags.isSwiftError()) {
1912	errorUnsupported(DAG, dl,
1913	Msg: "Swift attributes can't be used with preserve_none");
1914	break;
1915	}
1916	}
1917
1918	return Chain;
1919	}
1920
1921	SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1922	SDValue Arg, const SDLoc &dl,
1923	SelectionDAG &DAG,
1924	const CCValAssign &VA,
1925	ISD::ArgFlagsTy Flags,
1926	bool isByVal) const {
1927	unsigned LocMemOffset = VA.getLocMemOffset();
1928	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
1929	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
1930	N1: StackPtr, N2: PtrOff);
1931	if (isByVal)
1932	return CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff, Chain, Flags, DAG, dl);
1933
1934	MaybeAlign Alignment;
1935	if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1936	Arg.getSimpleValueType() != MVT::f80)
1937	Alignment = MaybeAlign (`4`);
1938	return DAG.getStore(
1939	Chain, dl, Val: Arg, Ptr: PtrOff,
1940	PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: LocMemOffset),
1941	Alignment);
1942	}
1943
1944	/// Emit a load of return address if tail call
1945	/// optimization is performed and it is required.
1946	SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1947	SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1948	bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1949	// Adjust the Return address stack slot.
1950	EVT VT = getPointerTy(DL: DAG.getDataLayout());
1951	OutRetAddr = getReturnAddressFrameIndex(DAG);
1952
1953	// Load the "old" Return address.
1954	OutRetAddr = DAG.getLoad(VT, dl, Chain, Ptr: OutRetAddr, PtrInfo: MachinePointerInfo ());
1955	return SDValue (OutRetAddr.getNode(), `1`);
1956	}
1957
1958	/// Emit a store of the return address if tail call
1959	/// optimization is performed and it is required (FPDiff!=0).
1960	static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1961	SDValue Chain, SDValue RetAddrFrIdx,
1962	EVT PtrVT, unsigned SlotSize,
1963	int FPDiff, const SDLoc &dl) {
1964	// Store the return address to the appropriate stack slot.
1965	if (!FPDiff) return Chain;
1966	// Calculate the new stack slot for the return address.
1967	int NewReturnAddrFI =
1968	MF.getFrameInfo().CreateFixedObject(Size: SlotSize, SPOffset: (int64_t)FPDiff - SlotSize,
1969	IsImmutable: false);
1970	SDValue NewRetAddrFrIdx = DAG.getFrameIndex(FI: NewReturnAddrFI, VT: PtrVT);
1971	Chain = DAG.getStore(Chain, dl, Val: RetAddrFrIdx, Ptr: NewRetAddrFrIdx,
1972	PtrInfo: MachinePointerInfo::getFixedStack(
1973	MF&: DAG.getMachineFunction(), FI: NewReturnAddrFI));
1974	return Chain;
1975	}
1976
1977	/// Returns a vector_shuffle mask for an movs{s\|d}, movd
1978	/// operation of specified width.
1979	SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1980	SDValue V1, SDValue V2) const {
1981	unsigned NumElems = VT.getVectorNumElements();
1982	SmallVector<int, `8`> Mask;
1983	Mask.push_back(Elt: NumElems);
1984	for (unsigned i = `1`; i != NumElems; ++i)
1985	Mask.push_back(Elt: i);
1986	return DAG.getVectorShuffle(VT, dl, N1: V1, N2: V2, Mask);
1987	}
1988
1989	SDValue
1990	X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1991	SmallVectorImpl<SDValue> &InVals) const {
1992	SelectionDAG &DAG = CLI.DAG;
1993	SDLoc &dl = CLI.DL;
1994	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1995	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1996	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1997	SDValue Chain = CLI.Chain;
1998	SDValue Callee = CLI.Callee;
1999	CallingConv::ID CallConv = CLI.CallConv;
2000	bool &isTailCall = CLI.IsTailCall;
2001	bool isVarArg = CLI.IsVarArg;
2002	const auto *CB = CLI.CB;
2003
2004	MachineFunction &MF = DAG.getMachineFunction();
2005	bool Is64Bit = Subtarget.is64Bit();
2006	bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
2007	bool IsSibcall = false;
2008	bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt \|\|
2009	CallConv == CallingConv::Tail \|\| CallConv == CallingConv::SwiftTail;
2010	bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Args: Outs, Subtarget);
2011	X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2012	bool HasNCSR = (CB && isa<CallInst>(Val: CB) &&
2013	CB->hasFnAttr(Kind: "no_caller_saved_registers"));
2014	bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2015	bool IsIndirectCall = (CB && isa<CallInst>(Val: CB) && CB->isIndirectCall());
2016	bool IsCFICall = IsIndirectCall && CLI.CFIType;
2017	const Module *M = MF.getMMI().getModule();
2018	Metadata *IsCFProtectionSupported = M->getModuleFlag(Key: "cf-protection-branch");
2019
2020	MachineFunction::CallSiteInfo CSInfo;
2021	if (CallConv == CallingConv::X86_INTR)
2022	report_fatal_error(reason: "X86 interrupts may not be called directly");
2023
2024	bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2025	if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2026	// If we are using a GOT, disable tail calls to external symbols with
2027	// default visibility. Tail calling such a symbol requires using a GOT
2028	// relocation, which forces early binding of the symbol. This breaks code
2029	// that require lazy function symbol resolution. Using musttail or
2030	// GuaranteedTailCallOpt will override this.
2031	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2032	if (!G \|\| (!G->getGlobal()->hasLocalLinkage() &&
2033	G->getGlobal()->hasDefaultVisibility()))
2034	isTailCall = false;
2035	}
2036
2037	if (isTailCall && !IsMustTail) {
2038	// Check if it's really possible to do a tail call.
2039	isTailCall = IsEligibleForTailCallOptimization(
2040	Callee, CalleeCC: CallConv, IsCalleeStackStructRet: IsCalleePopSRet, isVarArg, RetTy: CLI.RetTy, Outs, OutVals,
2041	Ins, DAG);
2042
2043	// Sibcalls are automatically detected tailcalls which do not require
2044	// ABI changes.
2045	if (!IsGuaranteeTCO && isTailCall)
2046	IsSibcall = true;
2047
2048	if (isTailCall)
2049	++NumTailCalls;
2050	}
2051
2052	if (IsMustTail && !isTailCall)
2053	report_fatal_error(reason: "failed to perform tail call elimination on a call "
2054	"site marked musttail");
2055
2056	assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2057	"Var args not supported with calling convention fastcc, ghc or hipe");
2058
2059	// Analyze operands of the call, assigning locations to each operand.
2060	SmallVector<CCValAssign, `16`> ArgLocs;
2061	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2062
2063	// Allocate shadow area for Win64.
2064	if (IsWin64)
2065	CCInfo.AllocateStack(Size: `32`, Alignment: Align (`8`));
2066
2067	CCInfo.AnalyzeArguments(Outs, Fn: CC_X86);
2068
2069	// In vectorcall calling convention a second pass is required for the HVA
2070	// types.
2071	if (CallingConv::X86_VectorCall == CallConv) {
2072	CCInfo.AnalyzeArgumentsSecondPass(Args: Outs, Fn: CC_X86);
2073	}
2074
2075	// Get a count of how many bytes are to be pushed on the stack.
2076	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2077	if (IsSibcall)
2078	// This is a sibcall. The memory operands are available in caller's
2079	// own caller's stack.
2080	NumBytes = `0`;
2081	else if (IsGuaranteeTCO && canGuaranteeTCO(CC: CallConv))
2082	NumBytes = GetAlignedArgumentStackSize(StackSize: NumBytes, DAG);
2083
2084	int FPDiff = `0`;
2085	if (isTailCall &&
2086	shouldGuaranteeTCO(CC: CallConv,
2087	GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
2088	// Lower arguments at fp - stackoffset + fpdiff.
2089	unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2090
2091	FPDiff = NumBytesCallerPushed - NumBytes;
2092
2093	// Set the delta of movement of the returnaddr stackslot.
2094	// But only set if delta is greater than previous delta.
2095	if (FPDiff < X86Info->getTCReturnAddrDelta())
2096	X86Info->setTCReturnAddrDelta(FPDiff);
2097	}
2098
2099	unsigned NumBytesToPush = NumBytes;
2100	unsigned NumBytesToPop = NumBytes;
2101
2102	// If we have an inalloca argument, all stack space has already been allocated
2103	// for us and be right at the top of the stack. We don't support multiple
2104	// arguments passed in memory when using inalloca.
2105	if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2106	NumBytesToPush = `0`;
2107	if (!ArgLocs.back().isMemLoc())
2108	report_fatal_error(reason: "cannot use inalloca attribute on a register "
2109	"parameter");
2110	if (ArgLocs.back().getLocMemOffset() != `0`)
2111	report_fatal_error(reason: "any parameter with the inalloca attribute must be "
2112	"the only memory argument");
2113	} else if (CLI.IsPreallocated) {
2114	assert(ArgLocs.back().isMemLoc() &&
2115	"cannot use preallocated attribute on a register "
2116	"parameter");
2117	SmallVector<size_t, `4`> PreallocatedOffsets;
2118	for (size_t i = `0`; i < CLI.OutVals.size(); ++i) {
2119	if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2120	PreallocatedOffsets.push_back(Elt: ArgLocs [i].getLocMemOffset());
2121	}
2122	}
2123	auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2124	size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CS: CLI.CB);
2125	MFI->setPreallocatedStackSize(Id: PreallocatedId, StackSize: NumBytes);
2126	MFI->setPreallocatedArgOffsets(Id: PreallocatedId, AO: PreallocatedOffsets);
2127	NumBytesToPush = `0`;
2128	}
2129
2130	if (!IsSibcall && !IsMustTail)
2131	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytesToPush,
2132	OutSize: NumBytes - NumBytesToPush, DL: dl);
2133
2134	SDValue RetAddrFrIdx;
2135	// Load return address for tail calls.
2136	if (isTailCall && FPDiff)
2137	Chain = EmitTailCallLoadRetAddr(DAG, OutRetAddr&: RetAddrFrIdx, Chain, IsTailCall: isTailCall,
2138	Is64Bit, FPDiff, dl);
2139
2140	SmallVector<std::pair<Register, SDValue>, `8`> RegsToPass;
2141	SmallVector<SDValue, `8`> MemOpChains;
2142	SDValue StackPtr;
2143
2144	// The next loop assumes that the locations are in the same order of the
2145	// input arguments.
2146	assert(isSortedByValueNo(ArgLocs) &&
2147	"Argument Location list must be sorted before lowering");
2148
2149	// Walk the register/memloc assignments, inserting copies/loads. In the case
2150	// of tail call optimization arguments are handle later.
2151	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2152	for (unsigned I = `0`, OutIndex = `0`, E = ArgLocs.size(); I != E;
2153	++I, ++OutIndex) {
2154	assert(OutIndex < Outs.size() && "Invalid Out index");
2155	// Skip inalloca/preallocated arguments, they have already been written.
2156	ISD::ArgFlagsTy Flags = Outs [OutIndex].Flags;
2157	if (Flags.isInAlloca() \|\| Flags.isPreallocated())
2158	continue;
2159
2160	CCValAssign &VA = ArgLocs [I];
2161	EVT RegVT = VA.getLocVT();
2162	SDValue Arg = OutVals [OutIndex];
2163	bool isByVal = Flags.isByVal();
2164
2165	// Promote the value if needed.
2166	switch (VA.getLocInfo()) {
2167	default: llvm_unreachable("Unknown loc info!");
2168	case CCValAssign::Full: break;
2169	case CCValAssign::SExt:
2170	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2171	break;
2172	case CCValAssign::ZExt:
2173	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2174	break;
2175	case CCValAssign::AExt:
2176	if (Arg.getValueType().isVector() &&
2177	Arg.getValueType().getVectorElementType() == MVT::i1)
2178	Arg = lowerMasksToReg(ValArg: Arg, ValLoc: RegVT, DL: dl, DAG);
2179	else if (RegVT.is128BitVector()) {
2180	// Special case: passing MMX values in XMM registers.
2181	Arg = DAG.getBitcast(MVT::i64, Arg);
2182	Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2183	Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2184	} else
2185	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2186	break;
2187	case CCValAssign::BCvt:
2188	Arg = DAG.getBitcast(VT: RegVT, V: Arg);
2189	break;
2190	case CCValAssign::Indirect: {
2191	if (isByVal) {
2192	// Memcpy the argument to a temporary stack slot to prevent
2193	// the caller from seeing any modifications the callee may make
2194	// as guaranteed by the `byval` attribute.
2195	int FrameIdx = MF.getFrameInfo().CreateStackObject(
2196	Size: Flags.getByValSize(),
2197	Alignment: std::max(a: Align (`16`), b: Flags.getNonZeroByValAlign()), isSpillSlot: false);
2198	SDValue StackSlot =
2199	DAG.getFrameIndex(FI: FrameIdx, VT: getPointerTy(DL: DAG.getDataLayout()));
2200	Chain =
2201	CreateCopyOfByValArgument(Src: Arg, Dst: StackSlot, Chain, Flags, DAG, dl);
2202	// From now on treat this as a regular pointer
2203	Arg = StackSlot;
2204	isByVal = false;
2205	} else {
2206	// Store the argument.
2207	SDValue SpillSlot = DAG.CreateStackTemporary(VT: VA.getValVT());
2208	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
2209	Chain = DAG.getStore(
2210	Chain, dl, Val: Arg, Ptr: SpillSlot,
2211	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
2212	Arg = SpillSlot;
2213	}
2214	break;
2215	}
2216	}
2217
2218	if (VA.needsCustom()) {
2219	assert(VA.getValVT() == MVT::v64i1 &&
2220	"Currently the only custom case is when we split v64i1 to 2 regs");
2221	// Split v64i1 value into two registers
2222	Passv64i1ArgInRegs(DL: dl, DAG, Arg, RegsToPass, VA, NextVA&: ArgLocs [++I], Subtarget);
2223	} else if (VA.isRegLoc()) {
2224	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
2225	const TargetOptions &Options = DAG.getTarget().Options;
2226	if (Options.EmitCallSiteInfo)
2227	CSInfo.ArgRegPairs.emplace_back(Args: VA.getLocReg(), Args&: I);
2228	if (isVarArg && IsWin64) {
2229	// Win64 ABI requires argument XMM reg to be copied to the corresponding
2230	// shadow reg if callee is a varargs function.
2231	Register ShadowReg;
2232	switch (VA.getLocReg()) {
2233	case X86::XMM0: ShadowReg = X86::RCX; break;
2234	case X86::XMM1: ShadowReg = X86::RDX; break;
2235	case X86::XMM2: ShadowReg = X86::R8; break;
2236	case X86::XMM3: ShadowReg = X86::R9; break;
2237	}
2238	if (ShadowReg)
2239	RegsToPass.push_back(Elt: std::make_pair(x&: ShadowReg, y&: Arg));
2240	}
2241	} else if (!IsSibcall && (!isTailCall \|\| isByVal)) {
2242	assert(VA.isMemLoc());
2243	if (!StackPtr.getNode())
2244	StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2245	VT: getPointerTy(DL: DAG.getDataLayout()));
2246	MemOpChains.push_back(Elt: LowerMemOpCallTo(Chain, StackPtr, Arg,
2247	dl, DAG, VA, Flags, isByVal));
2248	}
2249	}
2250
2251	if (!MemOpChains.empty())
2252	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2253
2254	if (Subtarget.isPICStyleGOT()) {
2255	// ELF / PIC requires GOT in the EBX register before function calls via PLT
2256	// GOT pointer (except regcall).
2257	if (!isTailCall) {
2258	// Indirect call with RegCall calling convertion may use up all the
2259	// general registers, so it is not suitable to bind EBX reister for
2260	// GOT address, just let register allocator handle it.
2261	if (CallConv != CallingConv::X86_RegCall)
2262	RegsToPass.push_back(std::make_pair(
2263	Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2264	getPointerTy(DAG.getDataLayout()))));
2265	} else {
2266	// If we are tail calling and generating PIC/GOT style code load the
2267	// address of the callee into ECX. The value in ecx is used as target of
2268	// the tail jump. This is done to circumvent the ebx/callee-saved problem
2269	// for tail calls on PIC/GOT architectures. Normally we would just put the
2270	// address of GOT into ebx and then call target@PLT. But for tail calls
2271	// ebx would be restored (since ebx is callee saved) before jumping to the
2272	// target@PLT.
2273
2274	// Note: The actual moving to ECX is done further down.
2275	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2276	if (G && !G->getGlobal()->hasLocalLinkage() &&
2277	G->getGlobal()->hasDefaultVisibility())
2278	Callee = LowerGlobalAddress(Op: Callee, DAG);
2279	else if (isa<ExternalSymbolSDNode>(Val: Callee))
2280	Callee = LowerExternalSymbol(Op: Callee, DAG);
2281	}
2282	}
2283
2284	if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2285	(Subtarget.hasSSE1() \|\| !M->getModuleFlag(Key: "SkipRaxSetup"))) {
2286	// From AMD64 ABI document:
2287	// For calls that may call functions that use varargs or stdargs
2288	// (prototype-less calls or calls to functions containing ellipsis (...) in
2289	// the declaration) %al is used as hidden argument to specify the number
2290	// of SSE registers used. The contents of %al do not need to match exactly
2291	// the number of registers, but must be an ubound on the number of SSE
2292	// registers used and is in the range 0 - 8 inclusive.
2293
2294	// Count the number of XMM registers allocated.
2295	static const MCPhysReg XMMArgRegs[] = {
2296	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2297	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2298	};
2299	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2300	assert((Subtarget.hasSSE1() \|\| !NumXMMRegs)
2301	&& "SSE registers cannot be used when SSE is disabled");
2302	RegsToPass.push_back(std::make_pair(Register(X86::AL),
2303	DAG.getConstant(NumXMMRegs, dl,
2304	MVT::i8)));
2305	}
2306
2307	if (isVarArg && IsMustTail) {
2308	const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2309	for (const auto &F : Forwards) {
2310	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: F.VReg, VT: F.VT);
2311	RegsToPass.push_back(Elt: std::make_pair(x: F.PReg, y&: Val));
2312	}
2313	}
2314
2315	// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2316	// don't need this because the eligibility check rejects calls that require
2317	// shuffling arguments passed in memory.
2318	if (!IsSibcall && isTailCall) {
2319	// Force all the incoming stack arguments to be loaded from the stack
2320	// before any new outgoing arguments are stored to the stack, because the
2321	// outgoing stack slots may alias the incoming argument stack slots, and
2322	// the alias isn't otherwise explicit. This is slightly more conservative
2323	// than necessary, because it means that each store effectively depends
2324	// on every argument instead of just those arguments it would clobber.
2325	SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2326
2327	SmallVector<SDValue, `8`> MemOpChains2;
2328	SDValue FIN;
2329	int FI = `0`;
2330	for (unsigned I = `0`, OutsIndex = `0`, E = ArgLocs.size(); I != E;
2331	++I, ++OutsIndex) {
2332	CCValAssign &VA = ArgLocs [I];
2333
2334	if (VA.isRegLoc()) {
2335	if (VA.needsCustom()) {
2336	assert((CallConv == CallingConv::X86_RegCall) &&
2337	"Expecting custom case only in regcall calling convention");
2338	// This means that we are in special case where one argument was
2339	// passed through two register locations - Skip the next location
2340	++I;
2341	}
2342
2343	continue;
2344	}
2345
2346	assert(VA.isMemLoc());
2347	SDValue Arg = OutVals [OutsIndex];
2348	ISD::ArgFlagsTy Flags = Outs [OutsIndex].Flags;
2349	// Skip inalloca/preallocated arguments. They don't require any work.
2350	if (Flags.isInAlloca() \|\| Flags.isPreallocated())
2351	continue;
2352	// Create frame index.
2353	int32_t Offset = VA.getLocMemOffset()+FPDiff;
2354	uint32_t OpSize = (VA.getLocVT().getSizeInBits()+`7`)/`8`;
2355	FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
2356	FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2357
2358	if (Flags.isByVal()) {
2359	// Copy relative to framepointer.
2360	SDValue Source = DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL: dl);
2361	if (!StackPtr.getNode())
2362	StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2363	VT: getPointerTy(DL: DAG.getDataLayout()));
2364	Source = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
2365	N1: StackPtr, N2: Source);
2366
2367	MemOpChains2.push_back(Elt: CreateCopyOfByValArgument(Src: Source, Dst: FIN,
2368	Chain: ArgChain,
2369	Flags, DAG, dl));
2370	} else {
2371	// Store relative to framepointer.
2372	MemOpChains2.push_back(Elt: DAG.getStore(
2373	Chain: ArgChain, dl, Val: Arg, Ptr: FIN,
2374	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
2375	}
2376	}
2377
2378	if (!MemOpChains2.empty())
2379	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2380
2381	// Store the return address to the appropriate stack slot.
2382	Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2383	PtrVT: getPointerTy(DL: DAG.getDataLayout()),
2384	SlotSize: RegInfo->getSlotSize(), FPDiff, dl);
2385	}
2386
2387	// Build a sequence of copy-to-reg nodes chained together with token chain
2388	// and glue operands which copy the outgoing args into registers.
2389	SDValue InGlue;
2390	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i) {
2391	Chain = DAG.getCopyToReg(Chain, dl, Reg: RegsToPass [i].first,
2392	N: RegsToPass [i].second, Glue: InGlue);
2393	InGlue = Chain.getValue(R: `1`);
2394	}
2395
2396	if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2397	assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2398	// In the 64-bit large code model, we have to make all calls
2399	// through a register, since the call instruction's 32-bit
2400	// pc-relative offset may not be large enough to hold the whole
2401	// address.
2402	} else if (Callee ->getOpcode() == ISD::GlobalAddress \|\|
2403	Callee ->getOpcode() == ISD::ExternalSymbol) {
2404	// Lower direct calls to global addresses and external symbols. Setting
2405	// ForCall to true here has the effect of removing WrapperRIP when possible
2406	// to allow direct calls to be selected without first materializing the
2407	// address into a register.
2408	Callee = LowerGlobalOrExternal(Op: Callee, DAG, /ForCall=/true);
2409	} else if (Subtarget.isTarget64BitILP32() &&
2410	Callee.getValueType() == MVT::i32) {
2411	// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2412	Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2413	}
2414
2415	// Returns a chain & a glue for retval copy to use.
2416	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2417	SmallVector<SDValue, `8`> Ops;
2418
2419	if (!IsSibcall && isTailCall && !IsMustTail) {
2420	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: `0`, Glue: InGlue, DL: dl);
2421	InGlue = Chain.getValue(R: `1`);
2422	}
2423
2424	Ops.push_back(Elt: Chain);
2425	Ops.push_back(Elt: Callee);
2426
2427	if (isTailCall)
2428	Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2429
2430	// Add argument registers to the end of the list so that they are known live
2431	// into the call.
2432	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i)
2433	Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass [i].first,
2434	VT: RegsToPass [i].second.getValueType()));
2435
2436	// Add a register mask operand representing the call-preserved registers.
2437	const uint32_t *Mask = [&]() {
2438	auto AdaptedCC = CallConv;
2439	// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2440	// use X86_INTR calling convention because it has the same CSR mask
2441	// (same preserved registers).
2442	if (HasNCSR)
2443	AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2444	// If NoCalleeSavedRegisters is requested, than use GHC since it happens
2445	// to use the CSR_NoRegs_RegMask.
2446	if (CB && CB->hasFnAttr(Kind: "no_callee_saved_registers"))
2447	AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2448	return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2449	}();
2450	assert(Mask && "Missing call preserved mask for calling convention");
2451
2452	// If this is an invoke in a 32-bit function using a funclet-based
2453	// personality, assume the function clobbers all registers. If an exception
2454	// is thrown, the runtime will not restore CSRs.
2455	// FIXME: Model this more precisely so that we can register allocate across
2456	// the normal edge and spill and fill across the exceptional edge.
2457	if (!Is64Bit && CLI.CB && isa<InvokeInst>(Val: CLI.CB)) {
2458	const Function &CallerFn = MF.getFunction();
2459	EHPersonality Pers =
2460	CallerFn.hasPersonalityFn()
2461	? classifyEHPersonality(Pers: CallerFn.getPersonalityFn())
2462	: EHPersonality::Unknown;
2463	if (isFuncletEHPersonality(Pers))
2464	Mask = RegInfo->getNoPreservedMask();
2465	}
2466
2467	// Define a new register mask from the existing mask.
2468	uint32_t RegMask = nullptr*;
2469
2470	// In some calling conventions we need to remove the used physical registers
2471	// from the reg mask. Create a new RegMask for such calling conventions.
2472	// RegMask for calling conventions that disable only return registers (e.g.
2473	// preserve_most) will be modified later in LowerCallResult.
2474	bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CC: CallConv) \|\| HasNCSR;
2475	if (ShouldDisableArgRegs \|\| shouldDisableRetRegFromCSR(CC: CallConv)) {
2476	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2477
2478	// Allocate a new Reg Mask and copy Mask.
2479	RegMask = MF.allocateRegMask();
2480	unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs());
2481	memcpy(dest: RegMask, src: Mask, n: sizeof(RegMask[`0`]) * RegMaskSize);
2482
2483	// Make sure all sub registers of the argument registers are reset
2484	// in the RegMask.
2485	if (ShouldDisableArgRegs) {
2486	for (auto const &RegPair : RegsToPass)
2487	for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2488	RegMask[SubReg / `32`] &= ~(`1u` << (SubReg % `32`));
2489	}
2490
2491	// Create the RegMask Operand according to our updated mask.
2492	Ops.push_back(Elt: DAG.getRegisterMask(RegMask));
2493	} else {
2494	// Create the RegMask Operand according to the static mask.
2495	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
2496	}
2497
2498	if (InGlue.getNode())
2499	Ops.push_back(Elt: InGlue);
2500
2501	if (isTailCall) {
2502	// We used to do:
2503	//// If this is the first return lowered for this function, add the regs
2504	//// to the liveout set for the function.
2505	// This isn't right, although it's probably harmless on x86; liveouts
2506	// should be computed from returns not tail calls. Consider a void
2507	// function making a tail call to a function returning int.
2508	MF.getFrameInfo().setHasTailCall();
2509	SDValue Ret = DAG.getNode(Opcode: X86ISD::TC_RETURN, DL: dl, VTList: NodeTys, Ops);
2510
2511	if (IsCFICall)
2512	Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2513
2514	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
2515	DAG.addCallSiteInfo(Node: Ret.getNode(), CallInfo: std::move(CSInfo));
2516	return Ret;
2517	}
2518
2519	if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2520	Chain = DAG.getNode(Opcode: X86ISD::NT_CALL, DL: dl, VTList: NodeTys, Ops);
2521	} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CB: CLI.CB)) {
2522	// Calls with a "clang.arc.attachedcall" bundle are special. They should be
2523	// expanded to the call, directly followed by a special marker sequence and
2524	// a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2525	assert(!isTailCall &&
2526	"tail calls cannot be marked with clang.arc.attachedcall");
2527	assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2528
2529	// Add a target global address for the retainRV/claimRV runtime function
2530	// just before the call target.
2531	Function ARCFn = objcarc::getAttachedARCFunction(CB: CLI.CB);
2532	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
2533	auto GA = DAG.getTargetGlobalAddress(GV: ARCFn, DL: dl, VT: PtrVT);
2534	Ops.insert(I: Ops.begin() + `1`, Elt: GA);
2535	Chain = DAG.getNode(Opcode: X86ISD::CALL_RVMARKER, DL: dl, VTList: NodeTys, Ops);
2536	} else {
2537	Chain = DAG.getNode(Opcode: X86ISD::CALL, DL: dl, VTList: NodeTys, Ops);
2538	}
2539
2540	if (IsCFICall)
2541	Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2542
2543	InGlue = Chain.getValue(R: `1`);
2544	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
2545	DAG.addCallSiteInfo(Node: Chain.getNode(), CallInfo: std::move(CSInfo));
2546
2547	// Save heapallocsite metadata.
2548	if (CLI.CB)
2549	if (MDNode *HeapAlloc = CLI.CB->getMetadata(Kind: "heapallocsite"))
2550	DAG.addHeapAllocSite(Node: Chain.getNode(), MD: HeapAlloc);
2551
2552	// Create the CALLSEQ_END node.
2553	unsigned NumBytesForCalleeToPop = `0`; // Callee pops nothing.
2554	if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg: isVarArg,
2555	GuaranteeTCO: DAG.getTarget().Options.GuaranteedTailCallOpt))
2556	NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2557	else if (!canGuaranteeTCO(CC: CallConv) && IsCalleePopSRet)
2558	// If this call passes a struct-return pointer, the callee
2559	// pops that struct pointer.
2560	NumBytesForCalleeToPop = `4`;
2561
2562	// Returns a glue for retval copy to use.
2563	if (!IsSibcall) {
2564	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: NumBytesForCalleeToPop,
2565	Glue: InGlue, DL: dl);
2566	InGlue = Chain.getValue(R: `1`);
2567	}
2568
2569	if (CallingConv::PreserveNone == CallConv)
2570	for (unsigned I = `0`, E = Outs.size(); I != E; ++I) {
2571	if (Outs [I].Flags.isSwiftSelf() \|\| Outs [I].Flags.isSwiftAsync() \|\|
2572	Outs [I].Flags.isSwiftError()) {
2573	errorUnsupported(DAG, dl,
2574	Msg: "Swift attributes can't be used with preserve_none");
2575	break;
2576	}
2577	}
2578
2579	// Handle result values, copying them out of physregs into vregs that we
2580	// return.
2581	return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2582	InVals, RegMask);
2583	}
2584
2585	//===----------------------------------------------------------------------===//
2586	// Fast Calling Convention (tail call) implementation
2587	//===----------------------------------------------------------------------===//
2588
2589	// Like std call, callee cleans arguments, convention except that ECX is
2590	// reserved for storing the tail called function address. Only 2 registers are
2591	// free for argument passing (inreg). Tail call optimization is performed
2592	// provided:
2593	// tailcallopt is enabled*
2594	// caller/callee are fastcc*
2595	// On X86_64 architecture with GOT-style position independent code only local
2596	// (within module) calls are supported at the moment.
2597	// To keep the stack aligned according to platform abi the function
2598	// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2599	// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2600	// If a tail called function callee has more arguments than the caller the
2601	// caller needs to make sure that there is room to move the RETADDR to. This is
2602	// achieved by reserving an area the size of the argument delta right after the
2603	// original RETADDR, but before the saved framepointer or the spilled registers
2604	// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2605	// stack layout:
2606	// arg1
2607	// arg2
2608	// RETADDR
2609	// [ new RETADDR
2610	// move area ]
2611	// (possible EBP)
2612	// ESI
2613	// EDI
2614	// local1 ..
2615
2616	/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2617	/// requirement.
2618	unsigned
2619	X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2620	SelectionDAG &DAG) const {
2621	const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2622	const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2623	assert(StackSize % SlotSize == `0` &&
2624	"StackSize must be a multiple of SlotSize");
2625	return alignTo(Size: StackSize + SlotSize, A: StackAlignment) - SlotSize;
2626	}
2627
2628	/// Return true if the given stack call argument is already available in the
2629	/// same position (relatively) of the caller's incoming argument stack.
2630	static
2631	bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2632	MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2633	const X86InstrInfo TII, const* CCValAssign &VA) {
2634	unsigned Bytes = Arg.getValueSizeInBits() / `8`;
2635
2636	for (;;) {
2637	// Look through nodes that don't alter the bits of the incoming value.
2638	unsigned Op = Arg.getOpcode();
2639	if (Op == ISD::ZERO_EXTEND \|\| Op == ISD::ANY_EXTEND \|\| Op == ISD::BITCAST \|\|
2640	Op == ISD::AssertZext) {
2641	Arg = Arg.getOperand(i: `0`);
2642	continue;
2643	}
2644	if (Op == ISD::TRUNCATE) {
2645	const SDValue &TruncInput = Arg.getOperand(i: `0`);
2646	if (TruncInput.getOpcode() == ISD::AssertZext &&
2647	cast<VTSDNode>(Val: TruncInput.getOperand(i: `1`))->getVT() ==
2648	Arg.getValueType()) {
2649	Arg = TruncInput.getOperand(i: `0`);
2650	continue;
2651	}
2652	}
2653	break;
2654	}
2655
2656	int FI = INT_MAX;
2657	if (Arg.getOpcode() == ISD::CopyFromReg) {
2658	Register VR = cast<RegisterSDNode>(Val: Arg.getOperand(i: `1`))->getReg();
2659	if (!VR.isVirtual())
2660	return false;
2661	MachineInstr *Def = MRI->getVRegDef(Reg: VR);
2662	if (!Def)
2663	return false;
2664	if (!Flags.isByVal()) {
2665	if (!TII->isLoadFromStackSlot(MI: *Def, FrameIndex&: FI))
2666	return false;
2667	} else {
2668	unsigned Opcode = Def->getOpcode();
2669	if ((Opcode == X86::LEA32r \|\| Opcode == X86::LEA64r \|\|
2670	Opcode == X86::LEA64_32r) &&
2671	Def->getOperand(`1`).isFI()) {
2672	FI = Def->getOperand(i: `1`).getIndex();
2673	Bytes = Flags.getByValSize();
2674	} else
2675	return false;
2676	}
2677	} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val&: Arg)) {
2678	if (Flags.isByVal())
2679	// ByVal argument is passed in as a pointer but it's now being
2680	// dereferenced. e.g.
2681	// define @foo(%struct.X %A) {*
2682	// tail call @bar(%struct.X byval %A)*
2683	// }
2684	return false;
2685	SDValue Ptr = Ld->getBasePtr();
2686	FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Val&: Ptr);
2687	if (!FINode)
2688	return false;
2689	FI = FINode->getIndex();
2690	} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2691	FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Val&: Arg);
2692	FI = FINode->getIndex();
2693	Bytes = Flags.getByValSize();
2694	} else
2695	return false;
2696
2697	assert(FI != INT_MAX);
2698	if (!MFI.isFixedObjectIndex(ObjectIdx: FI))
2699	return false;
2700
2701	if (Offset != MFI.getObjectOffset(ObjectIdx: FI))
2702	return false;
2703
2704	// If this is not byval, check that the argument stack object is immutable.
2705	// inalloca and argument copy elision can create mutable argument stack
2706	// objects. Byval objects can be mutated, but a byval call intends to pass the
2707	// mutated memory.
2708	if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(ObjectIdx: FI))
2709	return false;
2710
2711	if (VA.getLocVT().getFixedSizeInBits() >
2712	Arg.getValueSizeInBits().getFixedValue()) {
2713	// If the argument location is wider than the argument type, check that any
2714	// extension flags match.
2715	if (Flags.isZExt() != MFI.isObjectZExt(ObjectIdx: FI) \|\|
2716	Flags.isSExt() != MFI.isObjectSExt(ObjectIdx: FI)) {
2717	return false;
2718	}
2719	}
2720
2721	return Bytes == MFI.getObjectSize(ObjectIdx: FI);
2722	}
2723
2724	/// Check whether the call is eligible for tail call optimization. Targets
2725	/// that want to do tail call optimization should implement this function.
2726	bool X86TargetLowering::IsEligibleForTailCallOptimization(
2727	SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
2728	bool isVarArg, Type RetTy, const* SmallVectorImpl<ISD::OutputArg> &Outs,
2729	const SmallVectorImpl<SDValue> &OutVals,
2730	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2731	if (!mayTailCallThisCC(CC: CalleeCC))
2732	return false;
2733
2734	// If -tailcallopt is specified, make fastcc functions tail-callable.
2735	MachineFunction &MF = DAG.getMachineFunction();
2736	const Function &CallerF = MF.getFunction();
2737
2738	// If the function return type is x86_fp80 and the callee return type is not,
2739	// then the FP_EXTEND of the call result is not a nop. It's not safe to
2740	// perform a tailcall optimization here.
2741	if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
2742	return false;
2743
2744	CallingConv::ID CallerCC = CallerF.getCallingConv();
2745	bool CCMatch = CallerCC == CalleeCC;
2746	bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CC: CalleeCC);
2747	bool IsCallerWin64 = Subtarget.isCallingConvWin64(CC: CallerCC);
2748	bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt \|\|
2749	CalleeCC == CallingConv::Tail \|\| CalleeCC == CallingConv::SwiftTail;
2750
2751	// Win64 functions have extra shadow space for argument homing. Don't do the
2752	// sibcall if the caller and callee have mismatched expectations for this
2753	// space.
2754	if (IsCalleeWin64 != IsCallerWin64)
2755	return false;
2756
2757	if (IsGuaranteeTCO) {
2758	if (canGuaranteeTCO(CC: CalleeCC) && CCMatch)
2759	return true;
2760	return false;
2761	}
2762
2763	// Look for obvious safe cases to perform tail call optimization that do not
2764	// require ABI changes. This is what gcc calls sibcall.
2765
2766	// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2767	// emit a special epilogue.
2768	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2769	if (RegInfo->hasStackRealignment(MF))
2770	return false;
2771
2772	// Also avoid sibcall optimization if we're an sret return fn and the callee
2773	// is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2774	// insufficient.
2775	if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2776	// For a compatible tail call the callee must return our sret pointer. So it
2777	// needs to be (a) an sret function itself and (b) we pass our sret as its
2778	// sret. Condition #b is harder to determine.
2779	return false;
2780	} else if (IsCalleePopSRet)
2781	// The callee pops an sret, so we cannot tail-call, as our caller doesn't
2782	// expect that.
2783	return false;
2784
2785	// Do not sibcall optimize vararg calls unless all arguments are passed via
2786	// registers.
2787	LLVMContext &C = *DAG.getContext();
2788	if (isVarArg && !Outs.empty()) {
2789	// Optimizing for varargs on Win64 is unlikely to be safe without
2790	// additional testing.
2791	if (IsCalleeWin64 \|\| IsCallerWin64)
2792	return false;
2793
2794	SmallVector<CCValAssign, `16`> ArgLocs;
2795	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2796	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_X86);
2797	for (const auto &VA : ArgLocs)
2798	if (!VA.isRegLoc())
2799	return false;
2800	}
2801
2802	// If the call result is in ST0 / ST1, it needs to be popped off the x87
2803	// stack. Therefore, if it's not used by the call it is not safe to optimize
2804	// this into a sibcall.
2805	bool Unused = false;
2806	for (const auto &In : Ins) {
2807	if (!In.Used) {
2808	Unused = true;
2809	break;
2810	}
2811	}
2812	if (Unused) {
2813	SmallVector<CCValAssign, `16`> RVLocs;
2814	CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
2815	CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
2816	for (const auto &VA : RVLocs) {
2817	if (VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1)
2818	return false;
2819	}
2820	}
2821
2822	// Check that the call results are passed in the same way.
2823	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2824	CalleeFn: RetCC_X86, CallerFn: RetCC_X86))
2825	return false;
2826	// The callee has to preserve all registers the caller needs to preserve.
2827	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2828	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2829	if (!CCMatch) {
2830	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2831	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2832	return false;
2833	}
2834
2835	unsigned StackArgsSize = `0`;
2836
2837	// If the callee takes no arguments then go on to check the results of the
2838	// call.
2839	if (!Outs.empty()) {
2840	// Check if stack adjustment is needed. For now, do not do this if any
2841	// argument is passed on the stack.
2842	SmallVector<CCValAssign, `16`> ArgLocs;
2843	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2844
2845	// Allocate shadow area for Win64
2846	if (IsCalleeWin64)
2847	CCInfo.AllocateStack(Size: `32`, Alignment: Align (`8`));
2848
2849	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_X86);
2850	StackArgsSize = CCInfo.getStackSize();
2851
2852	if (CCInfo.getStackSize()) {
2853	// Check if the arguments are already laid out in the right way as
2854	// the caller's fixed stack objects.
2855	MachineFrameInfo &MFI = MF.getFrameInfo();
2856	const MachineRegisterInfo *MRI = &MF.getRegInfo();
2857	const X86InstrInfo *TII = Subtarget.getInstrInfo();
2858	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
2859	const CCValAssign &VA = ArgLocs [I];
2860	SDValue Arg = OutVals [I];
2861	ISD::ArgFlagsTy Flags = Outs [I].Flags;
2862	if (VA.getLocInfo() == CCValAssign::Indirect)
2863	return false;
2864	if (!VA.isRegLoc()) {
2865	if (!MatchingStackOffset(Arg, Offset: VA.getLocMemOffset(), Flags, MFI, MRI,
2866	TII, VA))
2867	return false;
2868	}
2869	}
2870	}
2871
2872	bool PositionIndependent = isPositionIndependent();
2873	// If the tailcall address may be in a register, then make sure it's
2874	// possible to register allocate for it. In 32-bit, the call address can
2875	// only target EAX, EDX, or ECX since the tail call must be scheduled after
2876	// callee-saved registers are restored. These happen to be the same
2877	// registers used to pass 'inreg' arguments so watch out for those.
2878	if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Val: Callee) &&
2879	!isa<ExternalSymbolSDNode>(Val: Callee)) \|\|
2880	PositionIndependent)) {
2881	unsigned NumInRegs = `0`;
2882	// In PIC we need an extra register to formulate the address computation
2883	// for the callee.
2884	unsigned MaxInRegs = PositionIndependent ? `2` : `3`;
2885
2886	for (const auto &VA : ArgLocs) {
2887	if (!VA.isRegLoc())
2888	continue;
2889	Register Reg = VA.getLocReg();
2890	switch (Reg) {
2891	default: break;
2892	case X86::EAX: case X86::EDX: case X86::ECX:
2893	if (++NumInRegs == MaxInRegs)
2894	return false;
2895	break;
2896	}
2897	}
2898	}
2899
2900	const MachineRegisterInfo &MRI = MF.getRegInfo();
2901	if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
2902	return false;
2903	}
2904
2905	bool CalleeWillPop =
2906	X86::isCalleePop(CallingConv: CalleeCC, is64Bit: Subtarget.is64Bit(), IsVarArg: isVarArg,
2907	GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt);
2908
2909	if (unsigned BytesToPop =
2910	MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2911	// If we have bytes to pop, the callee must pop them.
2912	bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2913	if (!CalleePopMatches)
2914	return false;
2915	} else if (CalleeWillPop && StackArgsSize > `0`) {
2916	// If we don't have bytes to pop, make sure the callee doesn't pop any.
2917	return false;
2918	}
2919
2920	return true;
2921	}
2922
2923	/// Determines whether the callee is required to pop its own arguments.
2924	/// Callee pop is necessary to support tail calls.
2925	bool X86::isCalleePop(CallingConv::ID CallingConv,
2926	bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2927	// If GuaranteeTCO is true, we force some calls to be callee pop so that we
2928	// can guarantee TCO.
2929	if (!IsVarArg && shouldGuaranteeTCO(CC: CallingConv, GuaranteedTailCallOpt: GuaranteeTCO))
2930	return true;
2931
2932	switch (CallingConv) {
2933	default:
2934	return false;
2935	case CallingConv::X86_StdCall:
2936	case CallingConv::X86_FastCall:
2937	case CallingConv::X86_ThisCall:
2938	case CallingConv::X86_VectorCall:
2939	return !is64Bit;
2940	}
2941	}
2942

source code of llvm/lib/Target/X86/X86ISelLoweringCall.cpp