X86.cpp source code [clang/lib/CodeGen/Targets/X86.cpp]

1	//===- X86.cpp ------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ABIInfoImpl.h"
10	#include "TargetInfo.h"
11	#include "clang/Basic/DiagnosticFrontend.h"
12	#include "llvm/ADT/SmallBitVector.h"
13
14	using namespace clang;
15	using namespace clang::CodeGen;
16
17	namespace {
18
19	/// IsX86_MMXType - Return true if this is an MMX type.
20	bool IsX86_MMXType(llvm::Type *IRType) {
21	// Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
22	return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == `64` &&
23	cast<llvm::VectorType>(Val: IRType)->getElementType()->isIntegerTy() &&
24	IRType->getScalarSizeInBits() != `64`;
25	}
26
27	static llvm::Type *X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
28	StringRef Constraint,
29	llvm::Type *Ty) {
30	if (Constraint == "k") {
31	llvm::Type *Int1Ty = llvm::Type::getInt1Ty(C&: CGF.getLLVMContext());
32	return llvm::FixedVectorType::get(ElementType: Int1Ty, NumElts: Ty->getScalarSizeInBits());
33	}
34
35	// No operation needed
36	return Ty;
37	}
38
39	/// Returns true if this type can be passed in SSE registers with the
40	/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
41	static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
42	if (const BuiltinType *BT = Ty ->getAs<BuiltinType>()) {
43	if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
44	if (BT->getKind() == BuiltinType::LongDouble) {
45	if (&Context.getTargetInfo().getLongDoubleFormat() ==
46	&llvm::APFloat::x87DoubleExtended())
47	return false;
48	}
49	return true;
50	}
51	} else if (const VectorType *VT = Ty ->getAs<VectorType>()) {
52	// vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
53	// registers specially.
54	unsigned VecSize = Context.getTypeSize(T: VT);
55	if (VecSize == `128` \|\| VecSize == `256` \|\| VecSize == `512`)
56	return true;
57	}
58	return false;
59	}
60
61	/// Returns true if this aggregate is small enough to be passed in SSE registers
62	/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
63	static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
64	return NumMembers <= `4`;
65	}
66
67	/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
68	static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
69	auto AI = ABIArgInfo::getDirect(T);
70	AI.setInReg(true);
71	AI.setCanBeFlattened(false);
72	return AI;
73	}
74
75	//===----------------------------------------------------------------------===//
76	// X86-32 ABI Implementation
77	//===----------------------------------------------------------------------===//
78
79	/// Similar to llvm::CCState, but for Clang.
80	struct CCState {
81	CCState(CGFunctionInfo &FI)
82	: IsPreassigned (FI.arg_size()), CC(FI.getCallingConvention()),
83	Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {}
84
85	llvm::SmallBitVector IsPreassigned;
86	unsigned CC = CallingConv::CC_C;
87	unsigned FreeRegs = `0`;
88	unsigned FreeSSERegs = `0`;
89	RequiredArgs Required;
90	bool IsDelegateCall = false;
91	};
92
93	/// X86_32ABIInfo - The X86-32 ABI information.
94	class X86_32ABIInfo : public ABIInfo {
95	enum Class {
96	Integer,
97	Float
98	};
99
100	static const unsigned MinABIStackAlignInBytes = `4`;
101
102	bool IsDarwinVectorABI;
103	bool IsRetSmallStructInRegABI;
104	bool IsWin32StructABI;
105	bool IsSoftFloatABI;
106	bool IsMCUABI;
107	bool IsLinuxABI;
108	unsigned DefaultNumRegisterParameters;
109
110	static bool isRegisterSize(unsigned Size) {
111	return (Size == `8` \|\| Size == `16` \|\| Size == `32` \|\| Size == `64`);
112	}
113
114	bool isHomogeneousAggregateBaseType(QualType Ty) const override {
115	// FIXME: Assumes vectorcall is in use.
116	return isX86VectorTypeForVectorCall(Context&: getContext(), Ty);
117	}
118
119	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
120	uint64_t NumMembers) const override {
121	// FIXME: Assumes vectorcall is in use.
122	return isX86VectorCallAggregateSmallEnough(NumMembers);
123	}
124
125	bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
126
127	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
128	/// such that the argument will be passed in memory.
129	ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
130
131	ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
132
133	/// Return the alignment to use for the given type on the stack.
134	unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
135
136	Class classify(QualType Ty) const;
137	ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
138	ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State,
139	unsigned ArgIndex) const;
140
141	/// Updates the number of available free registers, returns
142	/// true if any registers were allocated.
143	bool updateFreeRegs(QualType Ty, CCState &State) const;
144
145	bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
146	bool &NeedsPadding) const;
147	bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
148
149	bool canExpandIndirectArgument(QualType Ty) const;
150
151	/// Rewrite the function info so that all memory arguments use
152	/// inalloca.
153	void rewriteWithInAlloca(CGFunctionInfo &FI) const;
154
155	void addFieldToArgStruct(SmallVector<llvm::Type *, `6`> &FrameFields,
156	CharUnits &StackOffset, ABIArgInfo &Info,
157	QualType Type) const;
158	void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;
159
160	public:
161
162	void computeInfo(CGFunctionInfo &FI) const override;
163	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
164	AggValueSlot Slot) const override;
165
166	X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
167	bool RetSmallStructInRegABI, bool Win32StructABI,
168	unsigned NumRegisterParameters, bool SoftFloatABI)
169	: ABIInfo (CGT), IsDarwinVectorABI(DarwinVectorABI),
170	IsRetSmallStructInRegABI(RetSmallStructInRegABI),
171	IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
172	IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
173	IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() \|\|
174	CGT.getTarget().getTriple().isOSCygMing()),
175	DefaultNumRegisterParameters(NumRegisterParameters) {}
176	};
177
178	class X86_32SwiftABIInfo : public SwiftABIInfo {
179	public:
180	explicit X86_32SwiftABIInfo(CodeGenTypes &CGT)
181	: SwiftABIInfo (CGT, /SwiftErrorInRegister=/false) {}
182
183	bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
184	bool AsReturnValue) const override {
185	// LLVM's x86-32 lowering currently only assigns up to three
186	// integer registers and three fp registers. Oddly, it'll use up to
187	// four vector registers for vectors, but those can overlap with the
188	// scalar registers.
189	return occupiesMoreThan(scalarTypes: ComponentTys, /total=/maxAllRegisters: `3`);
190	}
191	};
192
193	class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
194	public:
195	X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
196	bool RetSmallStructInRegABI, bool Win32StructABI,
197	unsigned NumRegisterParameters, bool SoftFloatABI)
198	: TargetCodeGenInfo (std::make_unique<X86_32ABIInfo>(
199	args&: CGT, args&: DarwinVectorABI, args&: RetSmallStructInRegABI, args&: Win32StructABI,
200	args&: NumRegisterParameters, args&: SoftFloatABI)) {
201	SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(args&: CGT);
202	}
203
204	static bool isStructReturnInRegABI(
205	const llvm::Triple &Triple, const CodeGenOptions &Opts);
206
207	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
208	CodeGen::CodeGenModule &CGM) const override;
209
210	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
211	// Darwin uses different dwarf register numbers for EH.
212	if (CGM.getTarget().getTriple().isOSDarwin()) return `5`;
213	return `4`;
214	}
215
216	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
217	llvm::Value Address) const* override;
218
219	llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
220	StringRef Constraint,
221	llvm::Type* Ty) const override {
222	return X86AdjustInlineAsmType(CGF, Constraint, Ty);
223	}
224
225	void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
226	std::string &Constraints,
227	std::vector<llvm::Type *> &ResultRegTypes,
228	std::vector<llvm::Type *> &ResultTruncRegTypes,
229	std::vector<LValue> &ResultRegDests,
230	std::string &AsmString,
231	unsigned NumOutputs) const override;
232
233	StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
234	return "movl\t%ebp, %ebp"
235	"\t\t// marker for objc_retainAutoreleaseReturnValue";
236	}
237	};
238
239	}
240
241	/// Rewrite input constraint references after adding some output constraints.
242	/// In the case where there is one output and one input and we add one output,
243	/// we need to replace all operand references greater than or equal to 1:
244	/// mov $0, $1
245	/// mov eax, $1
246	/// The result will be:
247	/// mov $0, $2
248	/// mov eax, $2
249	static void rewriteInputConstraintReferences(unsigned FirstIn,
250	unsigned NumNewOuts,
251	std::string &AsmString) {
252	std::string Buf;
253	llvm::raw_string_ostream OS(Buf);
254	size_t Pos = `0`;
255	while (Pos < AsmString.size()) {
256	size_t DollarStart = AsmString.find(c: `'$'`, pos: Pos);
257	if (DollarStart == std::string::npos)
258	DollarStart = AsmString.size();
259	size_t DollarEnd = AsmString.find_first_not_of(c: `'$'`, pos: DollarStart);
260	if (DollarEnd == std::string::npos)
261	DollarEnd = AsmString.size();
262	OS << StringRef(&AsmString [Pos], DollarEnd - Pos);
263	Pos = DollarEnd;
264	size_t NumDollars = DollarEnd - DollarStart;
265	if (NumDollars % `2` != `0` && Pos < AsmString.size()) {
266	// We have an operand reference.
267	size_t DigitStart = Pos;
268	if (AsmString [DigitStart] == `'{'`) {
269	OS << `'{'`;
270	++DigitStart;
271	}
272	size_t DigitEnd = AsmString.find_first_not_of(s: "0123456789", pos: DigitStart);
273	if (DigitEnd == std::string::npos)
274	DigitEnd = AsmString.size();
275	StringRef OperandStr(&AsmString [DigitStart], DigitEnd - DigitStart);
276	unsigned OperandIndex;
277	if (!OperandStr.getAsInteger(Radix: `10`, Result&: OperandIndex)) {
278	if (OperandIndex >= FirstIn)
279	OperandIndex += NumNewOuts;
280	OS << OperandIndex;
281	} else {
282	OS << OperandStr;
283	}
284	Pos = DigitEnd;
285	}
286	}
287	AsmString = std::move(Buf);
288	}
289
290	/// Add output constraints for EAX:EDX because they are return registers.
291	void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
292	CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
293	std::vector<llvm::Type *> &ResultRegTypes,
294	std::vector<llvm::Type *> &ResultTruncRegTypes,
295	std::vector<LValue> &ResultRegDests, std::string &AsmString,
296	unsigned NumOutputs) const {
297	uint64_t RetWidth = CGF.getContext().getTypeSize(T: ReturnSlot.getType());
298
299	// Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
300	// larger.
301	if (!Constraints.empty())
302	Constraints += `','`;
303	if (RetWidth <= `32`) {
304	Constraints += "={eax}";
305	ResultRegTypes.push_back(x: CGF.Int32Ty);
306	} else {
307	// Use the 'A' constraint for EAX:EDX.
308	Constraints += "=A";
309	ResultRegTypes.push_back(x: CGF.Int64Ty);
310	}
311
312	// Truncate EAX or EAX:EDX to an integer of the appropriate size.
313	llvm::Type *CoerceTy = llvm::IntegerType::get(C&: CGF.getLLVMContext(), NumBits: RetWidth);
314	ResultTruncRegTypes.push_back(x: CoerceTy);
315
316	// Coerce the integer by bitcasting the return slot pointer.
317	ReturnSlot.setAddress(ReturnSlot.getAddress().withElementType(ElemTy: CoerceTy));
318	ResultRegDests.push_back(x: ReturnSlot);
319
320	rewriteInputConstraintReferences(FirstIn: NumOutputs, NumNewOuts: `1`, AsmString);
321	}
322
323	/// shouldReturnTypeInRegister - Determine if the given type should be
324	/// returned in a register (for the Darwin and MCU ABI).
325	bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
326	ASTContext &Context) const {
327	uint64_t Size = Context.getTypeSize(T: Ty);
328
329	// For i386, type must be register sized.
330	// For the MCU ABI, it only needs to be <= 8-byte
331	if ((IsMCUABI && Size > `64`) \|\| (!IsMCUABI && !isRegisterSize(Size)))
332	return false;
333
334	if (Ty ->isVectorType()) {
335	// 64- and 128- bit vectors inside structures are not returned in
336	// registers.
337	if (Size == `64` \|\| Size == `128`)
338	return false;
339
340	return true;
341	}
342
343	// If this is a builtin, pointer, enum, complex type, member pointer, or
344	// member function pointer it is ok.
345	if (Ty ->getAs<BuiltinType>() \|\| Ty ->hasPointerRepresentation() \|\|
346	Ty ->isAnyComplexType() \|\| Ty ->isEnumeralType() \|\|
347	Ty ->isBlockPointerType() \|\| Ty ->isMemberPointerType())
348	return true;
349
350	// Arrays are treated like records.
351	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T: Ty))
352	return shouldReturnTypeInRegister(Ty: AT->getElementType(), Context);
353
354	// Otherwise, it must be a record type.
355	const RecordType *RT = Ty ->getAs<RecordType>();
356	if (!RT) return false;
357
358	// FIXME: Traverse bases here too.
359
360	// Structure types are passed in register if all fields would be
361	// passed in a register.
362	for (const auto *FD : RT->getDecl()->fields()) {
363	// Empty fields are ignored.
364	if (isEmptyField(Context, FD, AllowArrays: true))
365	continue;
366
367	// Check fields recursively.
368	if (!shouldReturnTypeInRegister(Ty: FD->getType(), Context))
369	return false;
370	}
371	return true;
372	}
373
374	static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
375	// Treat complex types as the element type.
376	if (const ComplexType *CTy = Ty ->getAs<ComplexType>())
377	Ty = CTy->getElementType();
378
379	// Check for a type which we know has a simple scalar argument-passing
380	// convention without any padding. (We're specifically looking for 32
381	// and 64-bit integer and integer-equivalents, float, and double.)
382	if (!Ty ->getAs<BuiltinType>() && !Ty ->hasPointerRepresentation() &&
383	!Ty ->isEnumeralType() && !Ty ->isBlockPointerType())
384	return false;
385
386	uint64_t Size = Context.getTypeSize(T: Ty);
387	return Size == `32` \|\| Size == `64`;
388	}
389
390	static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
391	uint64_t &Size) {
392	for (const auto *FD : RD->fields()) {
393	// Scalar arguments on the stack get 4 byte alignment on x86. If the
394	// argument is smaller than 32-bits, expanding the struct will create
395	// alignment padding.
396	if (!is32Or64BitBasicType(Ty: FD->getType(), Context))
397	return false;
398
399	// FIXME: Reject bit-fields wholesale; there are two problems, we don't know
400	// how to expand them yet, and the predicate for telling if a bitfield still
401	// counts as "basic" is more complicated than what we were doing previously.
402	if (FD->isBitField())
403	return false;
404
405	Size += Context.getTypeSize(T: FD->getType());
406	}
407	return true;
408	}
409
410	static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
411	uint64_t &Size) {
412	// Don't do this if there are any non-empty bases.
413	for (const CXXBaseSpecifier &Base : RD->bases()) {
414	if (!addBaseAndFieldSizes(Context, RD: Base.getType()->getAsCXXRecordDecl(),
415	Size))
416	return false;
417	}
418	if (!addFieldSizes(Context, RD, Size))
419	return false;
420	return true;
421	}
422
423	/// Test whether an argument type which is to be passed indirectly (on the
424	/// stack) would have the equivalent layout if it was expanded into separate
425	/// arguments. If so, we prefer to do the latter to avoid inhibiting
426	/// optimizations.
427	bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
428	// We can only expand structure types.
429	const RecordType *RT = Ty ->getAs<RecordType>();
430	if (!RT)
431	return false;
432	const RecordDecl *RD = RT->getDecl();
433	uint64_t Size = `0`;
434	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
435	if (!IsWin32StructABI) {
436	// On non-Windows, we have to conservatively match our old bitcode
437	// prototypes in order to be ABI-compatible at the bitcode level.
438	if (!CXXRD->isCLike())
439	return false;
440	} else {
441	// Don't do this for dynamic classes.
442	if (CXXRD->isDynamicClass())
443	return false;
444	}
445	if (!addBaseAndFieldSizes(Context&: getContext(), RD: CXXRD, Size))
446	return false;
447	} else {
448	if (!addFieldSizes(Context&: getContext(), RD, Size))
449	return false;
450	}
451
452	// We can do this if there was no alignment padding.
453	return Size == getContext().getTypeSize(T: Ty);
454	}
455
456	ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
457	// If the return value is indirect, then the hidden argument is consuming one
458	// integer register.
459	if (State.CC != llvm::CallingConv::X86_FastCall &&
460	State.CC != llvm::CallingConv::X86_VectorCall && State.FreeRegs) {
461	--State.FreeRegs;
462	if (!IsMCUABI)
463	return getNaturalAlignIndirectInReg(Ty: RetTy);
464	}
465	return getNaturalAlignIndirect(
466	Ty: RetTy, /AddrSpace=/getDataLayout().getAllocaAddrSpace(),
467	/ByVal=/false);
468	}
469
470	ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
471	CCState &State) const {
472	if (RetTy ->isVoidType())
473	return ABIArgInfo::getIgnore();
474
475	const Type Base = nullptr*;
476	uint64_t NumElts = `0`;
477	if ((State.CC == llvm::CallingConv::X86_VectorCall \|\|
478	State.CC == llvm::CallingConv::X86_RegCall) &&
479	isHomogeneousAggregate(Ty: RetTy, Base, Members&: NumElts)) {
480	// The LLVM struct type for such an aggregate should lower properly.
481	return ABIArgInfo::getDirect();
482	}
483
484	if (const VectorType *VT = RetTy ->getAs<VectorType>()) {
485	// On Darwin, some vectors are returned in registers.
486	if (IsDarwinVectorABI) {
487	uint64_t Size = getContext().getTypeSize(T: RetTy);
488
489	// 128-bit vectors are a special case; they are returned in
490	// registers and we need to make sure to pick a type the LLVM
491	// backend will like.
492	if (Size == `128`)
493	return ABIArgInfo::getDirect(T: llvm::FixedVectorType::get(
494	ElementType: llvm::Type::getInt64Ty(C&: getVMContext()), NumElts: `2`));
495
496	// Always return in register if it fits in a general purpose
497	// register, or if it is 64 bits and has a single element.
498	if ((Size == `8` \|\| Size == `16` \|\| Size == `32`) \|\|
499	(Size == `64` && VT->getNumElements() == `1`))
500	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(),
501	NumBits: Size));
502
503	return getIndirectReturnResult(RetTy, State);
504	}
505
506	return ABIArgInfo::getDirect();
507	}
508
509	if (isAggregateTypeForABI(T: RetTy)) {
510	if (const RecordType *RT = RetTy ->getAs<RecordType>()) {
511	// Structures with flexible arrays are always indirect.
512	if (RT->getDecl()->hasFlexibleArrayMember())
513	return getIndirectReturnResult(RetTy, State);
514	}
515
516	// If specified, structs and unions are always indirect.
517	if (!IsRetSmallStructInRegABI && !RetTy ->isAnyComplexType())
518	return getIndirectReturnResult(RetTy, State);
519
520	// Ignore empty structs/unions.
521	if (isEmptyRecord(Context&: getContext(), T: RetTy, AllowArrays: true))
522	return ABIArgInfo::getIgnore();
523
524	// Return complex of _Float16 as <2 x half> so the backend will use xmm0.
525	if (const ComplexType *CT = RetTy ->getAs<ComplexType>()) {
526	QualType ET = getContext().getCanonicalType(T: CT->getElementType());
527	if (ET ->isFloat16Type())
528	return ABIArgInfo::getDirect(T: llvm::FixedVectorType::get(
529	ElementType: llvm::Type::getHalfTy(C&: getVMContext()), NumElts: `2`));
530	}
531
532	// Small structures which are register sized are generally returned
533	// in a register.
534	if (shouldReturnTypeInRegister(Ty: RetTy, Context&: getContext())) {
535	uint64_t Size = getContext().getTypeSize(T: RetTy);
536
537	// As a special-case, if the struct is a "single-element" struct, and
538	// the field is of type "float" or "double", return it in a
539	// floating-point register. (MSVC does not apply this special case.)
540	// We apply a similar transformation for pointer types to improve the
541	// quality of the generated IR.
542	if (const Type *SeltTy = isSingleElementStruct(T: RetTy, Context&: getContext()))
543	if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
544	\|\| SeltTy->hasPointerRepresentation())
545	return ABIArgInfo::getDirect(T: CGT.ConvertType(T: QualType (SeltTy, `0`)));
546
547	// FIXME: We should be able to narrow this integer in cases with dead
548	// padding.
549	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(),NumBits: Size));
550	}
551
552	return getIndirectReturnResult(RetTy, State);
553	}
554
555	// Treat an enum type as its underlying type.
556	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
557	RetTy = EnumTy->getDecl()->getIntegerType();
558
559	if (const auto *EIT = RetTy ->getAs<BitIntType>())
560	if (EIT->getNumBits() > `64`)
561	return getIndirectReturnResult(RetTy, State);
562
563	return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
564	: ABIArgInfo::getDirect());
565	}
566
567	unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
568	unsigned Align) const {
569	// Otherwise, if the alignment is less than or equal to the minimum ABI
570	// alignment, just use the default; the backend will handle this.
571	if (Align <= MinABIStackAlignInBytes)
572	return `0`; // Use default alignment.
573
574	if (IsLinuxABI) {
575	// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
576	// want to spend any effort dealing with the ramifications of ABI breaks.
577	//
578	// If the vector type is __m128/__m256/__m512, return the default alignment.
579	if (Ty ->isVectorType() && (Align == `16` \|\| Align == `32` \|\| Align == `64`))
580	return Align;
581	}
582	// On non-Darwin, the stack type alignment is always 4.
583	if (!IsDarwinVectorABI) {
584	// Set explicit alignment, since we may need to realign the top.
585	return MinABIStackAlignInBytes;
586	}
587
588	// Otherwise, if the type contains an SSE vector type, the alignment is 16.
589	if (Align >= `16` && (isSIMDVectorType(Context&: getContext(), Ty) \|\|
590	isRecordWithSIMDVectorType(Context&: getContext(), Ty)))
591	return `16`;
592
593	return MinABIStackAlignInBytes;
594	}
595
596	ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
597	CCState &State) const {
598	if (!ByVal) {
599	if (State.FreeRegs) {
600	--State.FreeRegs; // Non-byval indirects just use one pointer.
601	if (!IsMCUABI)
602	return getNaturalAlignIndirectInReg(Ty);
603	}
604	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
605	ByVal: false);
606	}
607
608	// Compute the byval alignment.
609	unsigned TypeAlign = getContext().getTypeAlign(T: Ty) / `8`;
610	unsigned StackAlign = getTypeStackAlignInBytes(Ty, Align: TypeAlign);
611	if (StackAlign == `0`)
612	return ABIArgInfo::getIndirect(
613	Alignment: CharUnits::fromQuantity(Quantity: `4`),
614	/AddrSpace=/getDataLayout().getAllocaAddrSpace(),
615	/ByVal=/true);
616
617	// If the stack alignment is less than the type alignment, realign the
618	// argument.
619	bool Realign = TypeAlign > StackAlign;
620	return ABIArgInfo::getIndirect(
621	Alignment: CharUnits::fromQuantity(Quantity: StackAlign),
622	/AddrSpace=/getDataLayout().getAllocaAddrSpace(), /ByVal=/true,
623	Realign);
624	}
625
626	X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
627	const Type *T = isSingleElementStruct(T: Ty, Context&: getContext());
628	if (!T)
629	T = Ty.getTypePtr();
630
631	if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
632	BuiltinType::Kind K = BT->getKind();
633	if (K == BuiltinType::Float \|\| K == BuiltinType::Double)
634	return Float;
635	}
636	return Integer;
637	}
638
639	bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
640	if (!IsSoftFloatABI) {
641	Class C = classify(Ty);
642	if (C == Float)
643	return false;
644	}
645
646	unsigned Size = getContext().getTypeSize(T: Ty);
647	unsigned SizeInRegs = (Size + `31`) / `32`;
648
649	if (SizeInRegs == `0`)
650	return false;
651
652	if (!IsMCUABI) {
653	if (SizeInRegs > State.FreeRegs) {
654	State.FreeRegs = `0`;
655	return false;
656	}
657	} else {
658	// The MCU psABI allows passing parameters in-reg even if there are
659	// earlier parameters that are passed on the stack. Also,
660	// it does not allow passing >8-byte structs in-register,
661	// even if there are 3 free registers available.
662	if (SizeInRegs > State.FreeRegs \|\| SizeInRegs > `2`)
663	return false;
664	}
665
666	State.FreeRegs -= SizeInRegs;
667	return true;
668	}
669
670	bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
671	bool &InReg,
672	bool &NeedsPadding) const {
673	// On Windows, aggregates other than HFAs are never passed in registers, and
674	// they do not consume register slots. Homogenous floating-point aggregates
675	// (HFAs) have already been dealt with at this point.
676	if (IsWin32StructABI && isAggregateTypeForABI(T: Ty))
677	return false;
678
679	NeedsPadding = false;
680	InReg = !IsMCUABI;
681
682	if (!updateFreeRegs(Ty, State))
683	return false;
684
685	if (IsMCUABI)
686	return true;
687
688	if (State.CC == llvm::CallingConv::X86_FastCall \|\|
689	State.CC == llvm::CallingConv::X86_VectorCall \|\|
690	State.CC == llvm::CallingConv::X86_RegCall) {
691	if (getContext().getTypeSize(T: Ty) <= `32` && State.FreeRegs)
692	NeedsPadding = true;
693
694	return false;
695	}
696
697	return true;
698	}
699
700	bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
701	bool IsPtrOrInt = (getContext().getTypeSize(T: Ty) <= `32`) &&
702	(Ty ->isIntegralOrEnumerationType() \|\| Ty ->isPointerType() \|\|
703	Ty ->isReferenceType());
704
705	if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall \|\|
706	State.CC == llvm::CallingConv::X86_VectorCall))
707	return false;
708
709	if (!updateFreeRegs(Ty, State))
710	return false;
711
712	if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall)
713	return false;
714
715	// Return true to apply inreg to all legal parameters except for MCU targets.
716	return !IsMCUABI;
717	}
718
719	void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
720	// Vectorcall x86 works subtly different than in x64, so the format is
721	// a bit different than the x64 version. First, all vector types (not HVAs)
722	// are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
723	// This differs from the x64 implementation, where the first 6 by INDEX get
724	// registers.
725	// In the second pass over the arguments, HVAs are passed in the remaining
726	// vector registers if possible, or indirectly by address. The address will be
727	// passed in ECX/EDX if available. Any other arguments are passed according to
728	// the usual fastcall rules.
729	MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
730	for (int I = `0`, E = Args.size(); I < E; ++I) {
731	const Type Base = nullptr*;
732	uint64_t NumElts = `0`;
733	const QualType &Ty = Args [I].type;
734	if ((Ty ->isVectorType() \|\| Ty ->isBuiltinType()) &&
735	isHomogeneousAggregate(Ty, Base, Members&: NumElts)) {
736	if (State.FreeSSERegs >= NumElts) {
737	State.FreeSSERegs -= NumElts;
738	Args [I].info = ABIArgInfo::getDirectInReg();
739	State.IsPreassigned.set(I);
740	}
741	}
742	}
743	}
744
745	ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
746	unsigned ArgIndex) const {
747	// FIXME: Set alignment on indirect arguments.
748	bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
749	bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
750	bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
751
752	Ty = useFirstFieldIfTransparentUnion(Ty);
753	TypeInfo TI = getContext().getTypeInfo(T: Ty);
754
755	// Check with the C++ ABI first.
756	const RecordType *RT = Ty ->getAs<RecordType>();
757	if (RT) {
758	CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, CXXABI&: getCXXABI());
759	if (RAA == CGCXXABI::RAA_Indirect) {
760	return getIndirectResult(Ty, ByVal: false, State);
761	} else if (State.IsDelegateCall) {
762	// Avoid having different alignments on delegate call args by always
763	// setting the alignment to 4, which is what we do for inallocas.
764	ABIArgInfo Res = getIndirectResult(Ty, ByVal: false, State);
765	Res.setIndirectAlign(CharUnits::fromQuantity(Quantity: `4`));
766	return Res;
767	} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
768	// The field index doesn't matter, we'll fix it up later.
769	return ABIArgInfo::getInAlloca(/FieldIndex=/`0`);
770	}
771	}
772
773	// Regcall uses the concept of a homogenous vector aggregate, similar
774	// to other targets.
775	const Type Base = nullptr*;
776	uint64_t NumElts = `0`;
777	if ((IsRegCall \|\| IsVectorCall) &&
778	isHomogeneousAggregate(Ty, Base, Members&: NumElts)) {
779	if (State.FreeSSERegs >= NumElts) {
780	State.FreeSSERegs -= NumElts;
781
782	// Vectorcall passes HVAs directly and does not flatten them, but regcall
783	// does.
784	if (IsVectorCall)
785	return getDirectX86Hva();
786
787	if (Ty ->isBuiltinType() \|\| Ty ->isVectorType())
788	return ABIArgInfo::getDirect();
789	return ABIArgInfo::getExpand();
790	}
791	if (IsVectorCall && Ty ->isBuiltinType())
792	return ABIArgInfo::getDirect();
793	return getIndirectResult(Ty, /ByVal=/false, State);
794	}
795
796	if (isAggregateTypeForABI(T: Ty)) {
797	// Structures with flexible arrays are always indirect.
798	// FIXME: This should not be byval!
799	if (RT && RT->getDecl()->hasFlexibleArrayMember())
800	return getIndirectResult(Ty, ByVal: true, State);
801
802	// Ignore empty structs/unions on non-Windows.
803	if (!IsWin32StructABI && isEmptyRecord(Context&: getContext(), T: Ty, AllowArrays: true))
804	return ABIArgInfo::getIgnore();
805
806	// Ignore 0 sized structs.
807	if (TI.Width == `0`)
808	return ABIArgInfo::getIgnore();
809
810	llvm::LLVMContext &LLVMContext = getVMContext();
811	llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(C&: LLVMContext);
812	bool NeedsPadding = false;
813	bool InReg;
814	if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
815	unsigned SizeInRegs = (TI.Width + `31`) / `32`;
816	SmallVector<llvm::Type*, `3`> Elements(SizeInRegs, Int32);
817	llvm::Type *Result = llvm::StructType::get(Context&: LLVMContext, Elements);
818	if (InReg)
819	return ABIArgInfo::getDirectInReg(T: Result);
820	else
821	return ABIArgInfo::getDirect(T: Result);
822	}
823	llvm::IntegerType PaddingType = NeedsPadding ? Int32 : nullptr*;
824
825	// Pass over-aligned aggregates to non-variadic functions on Windows
826	// indirectly. This behavior was added in MSVC 2015. Use the required
827	// alignment from the record layout, since that may be less than the
828	// regular type alignment, and types with required alignment of less than 4
829	// bytes are not passed indirectly.
830	if (IsWin32StructABI && State.Required.isRequiredArg(argIdx: ArgIndex)) {
831	unsigned AlignInBits = `0`;
832	if (RT) {
833	const ASTRecordLayout &Layout =
834	getContext().getASTRecordLayout(D: RT->getDecl());
835	AlignInBits = getContext().toBits(CharSize: Layout.getRequiredAlignment());
836	} else if (TI.isAlignRequired()) {
837	AlignInBits = TI.Align;
838	}
839	if (AlignInBits > `32`)
840	return getIndirectResult(Ty, /ByVal=/false, State);
841	}
842
843	// Expand small (<= 128-bit) record types when we know that the stack layout
844	// of those arguments will match the struct. This is important because the
845	// LLVM backend isn't smart enough to remove byval, which inhibits many
846	// optimizations.
847	// Don't do this for the MCU if there are still free integer registers
848	// (see X86_64 ABI for full explanation).
849	if (TI.Width <= `4` * `32` && (!IsMCUABI \|\| State.FreeRegs == `0`) &&
850	canExpandIndirectArgument(Ty))
851	return ABIArgInfo::getExpandWithPadding(
852	PaddingInReg: IsFastCall \|\| IsVectorCall \|\| IsRegCall, Padding: PaddingType);
853
854	return getIndirectResult(Ty, ByVal: true, State);
855	}
856
857	if (const VectorType *VT = Ty ->getAs<VectorType>()) {
858	// On Windows, vectors are passed directly if registers are available, or
859	// indirectly if not. This avoids the need to align argument memory. Pass
860	// user-defined vector types larger than 512 bits indirectly for simplicity.
861	if (IsWin32StructABI) {
862	if (TI.Width <= `512` && State.FreeSSERegs > `0`) {
863	--State.FreeSSERegs;
864	return ABIArgInfo::getDirectInReg();
865	}
866	return getIndirectResult(Ty, /ByVal=/false, State);
867	}
868
869	// On Darwin, some vectors are passed in memory, we handle this by passing
870	// it as an i8/i16/i32/i64.
871	if (IsDarwinVectorABI) {
872	if ((TI.Width == `8` \|\| TI.Width == `16` \|\| TI.Width == `32`) \|\|
873	(TI.Width == `64` && VT->getNumElements() == `1`))
874	return ABIArgInfo::getDirect(
875	T: llvm::IntegerType::get(C&: getVMContext(), NumBits: TI.Width));
876	}
877
878	if (IsX86_MMXType(IRType: CGT.ConvertType(T: Ty)))
879	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(), NumBits: `64`));
880
881	return ABIArgInfo::getDirect();
882	}
883
884
885	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
886	Ty = EnumTy->getDecl()->getIntegerType();
887
888	bool InReg = shouldPrimitiveUseInReg(Ty, State);
889
890	if (isPromotableIntegerTypeForABI(Ty)) {
891	if (InReg)
892	return ABIArgInfo::getExtendInReg(Ty, T: CGT.ConvertType(T: Ty));
893	return ABIArgInfo::getExtend(Ty, T: CGT.ConvertType(T: Ty));
894	}
895
896	if (const auto *EIT = Ty ->getAs<BitIntType>()) {
897	if (EIT->getNumBits() <= `64`) {
898	if (InReg)
899	return ABIArgInfo::getDirectInReg();
900	return ABIArgInfo::getDirect();
901	}
902	return getIndirectResult(Ty, /ByVal=/false, State);
903	}
904
905	if (InReg)
906	return ABIArgInfo::getDirectInReg();
907	return ABIArgInfo::getDirect();
908	}
909
910	void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
911	CCState State(FI);
912	if (IsMCUABI)
913	State.FreeRegs = `3`;
914	else if (State.CC == llvm::CallingConv::X86_FastCall) {
915	State.FreeRegs = `2`;
916	State.FreeSSERegs = `3`;
917	} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
918	State.FreeRegs = `2`;
919	State.FreeSSERegs = `6`;
920	} else if (FI.getHasRegParm())
921	State.FreeRegs = FI.getRegParm();
922	else if (State.CC == llvm::CallingConv::X86_RegCall) {
923	State.FreeRegs = `5`;
924	State.FreeSSERegs = `8`;
925	} else if (IsWin32StructABI) {
926	// Since MSVC 2015, the first three SSE vectors have been passed in
927	// registers. The rest are passed indirectly.
928	State.FreeRegs = DefaultNumRegisterParameters;
929	State.FreeSSERegs = `3`;
930	} else
931	State.FreeRegs = DefaultNumRegisterParameters;
932
933	if (!::classifyReturnType(CXXABI: getCXXABI(), FI, Info: *this)) {
934	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType(), State);
935	} else if (FI.getReturnInfo().isIndirect()) {
936	// The C++ ABI is not aware of register usage, so we have to check if the
937	// return value was sret and put it in a register ourselves if appropriate.
938	if (State.FreeRegs) {
939	--State.FreeRegs; // The sret parameter consumes a register.
940	if (!IsMCUABI)
941	FI.getReturnInfo().setInReg(true);
942	}
943	}
944
945	// The chain argument effectively gives us another free register.
946	if (FI.isChainCall())
947	++State.FreeRegs;
948
949	// For vectorcall, do a first pass over the arguments, assigning FP and vector
950	// arguments to XMM registers as available.
951	if (State.CC == llvm::CallingConv::X86_VectorCall)
952	runVectorCallFirstPass(FI, State);
953
954	bool UsedInAlloca = false;
955	MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
956	for (unsigned I = `0`, E = Args.size(); I < E; ++I) {
957	// Skip arguments that have already been assigned.
958	if (State.IsPreassigned.test(Idx: I))
959	continue;
960
961	Args [I].info =
962	classifyArgumentType(Ty: Args [I].type, State, ArgIndex: I);
963	UsedInAlloca \|= (Args [I].info.getKind() == ABIArgInfo::InAlloca);
964	}
965
966	// If we needed to use inalloca for any argument, do a second pass and rewrite
967	// all the memory arguments to use inalloca.
968	if (UsedInAlloca)
969	rewriteWithInAlloca(FI);
970	}
971
972	void
973	X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, `6`> &FrameFields,
974	CharUnits &StackOffset, ABIArgInfo &Info,
975	QualType Type) const {
976	// Arguments are always 4-byte-aligned.
977	CharUnits WordSize = CharUnits::fromQuantity(Quantity: `4`);
978	assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
979
980	// sret pointers and indirect things will require an extra pointer
981	// indirection, unless they are byval. Most things are byval, and will not
982	// require this indirection.
983	bool IsIndirect = false;
984	if (Info.isIndirect() && !Info.getIndirectByVal())
985	IsIndirect = true;
986	Info = ABIArgInfo::getInAlloca(FieldIndex: FrameFields.size(), Indirect: IsIndirect);
987	llvm::Type *LLTy = CGT.ConvertTypeForMem(T: Type);
988	if (IsIndirect)
989	LLTy = llvm::PointerType::getUnqual(C&: getVMContext());
990	FrameFields.push_back(Elt: LLTy);
991	StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(T: Type);
992
993	// Insert padding bytes to respect alignment.
994	CharUnits FieldEnd = StackOffset;
995	StackOffset = FieldEnd.alignTo(Align: WordSize);
996	if (StackOffset != FieldEnd) {
997	CharUnits NumBytes = StackOffset - FieldEnd;
998	llvm::Type *Ty = llvm::Type::getInt8Ty(C&: getVMContext());
999	Ty = llvm::ArrayType::get(ElementType: Ty, NumElements: NumBytes.getQuantity());
1000	FrameFields.push_back(Elt: Ty);
1001	}
1002	}
1003
1004	static bool isArgInAlloca(const ABIArgInfo &Info) {
1005	// Leave ignored and inreg arguments alone.
1006	switch (Info.getKind()) {
1007	case ABIArgInfo::InAlloca:
1008	return true;
1009	case ABIArgInfo::Ignore:
1010	case ABIArgInfo::IndirectAliased:
1011	return false;
1012	case ABIArgInfo::Indirect:
1013	case ABIArgInfo::Direct:
1014	case ABIArgInfo::Extend:
1015	return !Info.getInReg();
1016	case ABIArgInfo::Expand:
1017	case ABIArgInfo::CoerceAndExpand:
1018	// These are aggregate types which are never passed in registers when
1019	// inalloca is involved.
1020	return true;
1021	}
1022	llvm_unreachable("invalid enum");
1023	}
1024
1025	void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
1026	assert(IsWin32StructABI && "inalloca only supported on win32");
1027
1028	// Build a packed struct type for all of the arguments in memory.
1029	SmallVector<llvm::Type *, `6`> FrameFields;
1030
1031	// The stack alignment is always 4.
1032	CharUnits StackAlign = CharUnits::fromQuantity(Quantity: `4`);
1033
1034	CharUnits StackOffset;
1035	CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
1036
1037	// Put 'this' into the struct before 'sret', if necessary.
1038	bool IsThisCall =
1039	FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
1040	ABIArgInfo &Ret = FI.getReturnInfo();
1041	if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
1042	isArgInAlloca(Info: I->info)) {
1043	addFieldToArgStruct(FrameFields, StackOffset, Info&: I->info, Type: I->type);
1044	++I;
1045	}
1046
1047	// Put the sret parameter into the inalloca struct if it's in memory.
1048	if (Ret.isIndirect() && !Ret.getInReg()) {
1049	addFieldToArgStruct(FrameFields, StackOffset, Info&: Ret, Type: FI.getReturnType());
1050	// On Windows, the hidden sret parameter is always returned in eax.
1051	Ret.setInAllocaSRet(IsWin32StructABI);
1052	}
1053
1054	// Skip the 'this' parameter in ecx.
1055	if (IsThisCall)
1056	++I;
1057
1058	// Put arguments passed in memory into the struct.
1059	for (; I != E; ++I) {
1060	if (isArgInAlloca(Info: I->info))
1061	addFieldToArgStruct(FrameFields, StackOffset, Info&: I->info, Type: I->type);
1062	}
1063
1064	FI.setArgStruct(Ty: llvm::StructType::get(Context&: getVMContext(), Elements: FrameFields,
1065	/isPacked=/true),
1066	Align: StackAlign);
1067	}
1068
1069	RValue X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
1070	QualType Ty, AggValueSlot Slot) const {
1071
1072	auto TypeInfo = getContext().getTypeInfoInChars(T: Ty);
1073
1074	CCState State(*const_cast<CGFunctionInfo *>(CGF.CurFnInfo));
1075	ABIArgInfo AI = classifyArgumentType(Ty, State, /ArgIndex/ `0`);
1076	// Empty records are ignored for parameter passing purposes.
1077	if (AI.isIgnore())
1078	return Slot.asRValue();
1079
1080	// x86-32 changes the alignment of certain arguments on the stack.
1081	//
1082	// Just messing with TypeInfo like this works because we never pass
1083	// anything indirectly.
1084	TypeInfo.Align = CharUnits::fromQuantity(
1085	Quantity: getTypeStackAlignInBytes(Ty, Align: TypeInfo.Align.getQuantity()));
1086
1087	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /Indirect/ IsIndirect: false, ValueInfo: TypeInfo,
1088	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `4`),
1089	/AllowHigherAlign/ true, Slot);
1090	}
1091
1092	bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
1093	const llvm::Triple &Triple, const CodeGenOptions &Opts) {
1094	assert(Triple.getArch() == llvm::Triple::x86);
1095
1096	switch (Opts.getStructReturnConvention()) {
1097	case CodeGenOptions::SRCK_Default:
1098	break;
1099	case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return
1100	return false;
1101	case CodeGenOptions::SRCK_InRegs: // -freg-struct-return
1102	return true;
1103	}
1104
1105	if (Triple.isOSDarwin() \|\| Triple.isOSIAMCU())
1106	return true;
1107
1108	switch (Triple.getOS()) {
1109	case llvm::Triple::DragonFly:
1110	case llvm::Triple::FreeBSD:
1111	case llvm::Triple::OpenBSD:
1112	case llvm::Triple::Win32:
1113	return true;
1114	default:
1115	return false;
1116	}
1117	}
1118
1119	static void addX86InterruptAttrs(const FunctionDecl FD, llvm::GlobalValue GV,
1120	CodeGen::CodeGenModule &CGM) {
1121	if (!FD->hasAttr<AnyX86InterruptAttr>())
1122	return;
1123
1124	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1125	Fn->setCallingConv(llvm::CallingConv::X86_INTR);
1126	if (FD->getNumParams() == `0`)
1127	return;
1128
1129	auto PtrTy = cast<PointerType>(Val: FD->getParamDecl(i: `0`)->getType());
1130	llvm::Type *ByValTy = CGM.getTypes().ConvertType(T: PtrTy->getPointeeType());
1131	llvm::Attribute NewAttr = llvm::Attribute::getWithByValType(
1132	Context&: Fn->getContext(), Ty: ByValTy);
1133	Fn->addParamAttr(ArgNo: `0`, Attr: NewAttr);
1134	}
1135
1136	void X86_32TargetCodeGenInfo::setTargetAttributes(
1137	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
1138	if (GV->isDeclaration())
1139	return;
1140	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D)) {
1141	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1142	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1143	Fn->addFnAttr(Kind: "stackrealign");
1144	}
1145
1146	addX86InterruptAttrs(FD, GV, CGM);
1147	}
1148	}
1149
1150	bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
1151	CodeGen::CodeGenFunction &CGF,
1152	llvm::Value Address) const* {
1153	CodeGen::CGBuilderTy &Builder = CGF.Builder;
1154
1155	llvm::Value *Four8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `4`);
1156
1157	// 0-7 are the eight integer registers; the order is different
1158	// on Darwin (for EH), but the range is the same.
1159	// 8 is %eip.
1160	AssignToArrayRange(Builder, Array: Address, Value: Four8, FirstIndex: `0`, LastIndex: `8`);
1161
1162	if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
1163	// 12-16 are st(0..4). Not sure why we stop at 4.
1164	// These have size 16, which is sizeof(long double) on
1165	// platforms with 8-byte alignment for that type.
1166	llvm::Value *Sixteen8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `16`);
1167	AssignToArrayRange(Builder, Array: Address, Value: Sixteen8, FirstIndex: `12`, LastIndex: `16`);
1168
1169	} else {
1170	// 9 is %eflags, which doesn't get a size on Darwin for some
1171	// reason.
1172	Builder.CreateAlignedStore(
1173	Val: Four8, Addr: Builder.CreateConstInBoundsGEP1_32(Ty: CGF.Int8Ty, Ptr: Address, Idx0: `9`),
1174	Align: CharUnits::One());
1175
1176	// 11-16 are st(0..5). Not sure why we stop at 5.
1177	// These have size 12, which is sizeof(long double) on
1178	// platforms with 4-byte alignment for that type.
1179	llvm::Value *Twelve8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `12`);
1180	AssignToArrayRange(Builder, Array: Address, Value: Twelve8, FirstIndex: `11`, LastIndex: `16`);
1181	}
1182
1183	return false;
1184	}
1185
1186	//===----------------------------------------------------------------------===//
1187	// X86-64 ABI Implementation
1188	//===----------------------------------------------------------------------===//
1189
1190
1191	namespace {
1192
1193	/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
1194	static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
1195	switch (AVXLevel) {
1196	case X86AVXABILevel::AVX512:
1197	return `512`;
1198	case X86AVXABILevel::AVX:
1199	return `256`;
1200	case X86AVXABILevel::None:
1201	return `128`;
1202	}
1203	llvm_unreachable("Unknown AVXLevel");
1204	}
1205
1206	/// X86_64ABIInfo - The X86_64 ABI information.
1207	class X86_64ABIInfo : public ABIInfo {
1208	enum Class {
1209	Integer = `0`,
1210	SSE,
1211	SSEUp,
1212	X87,
1213	X87Up,
1214	ComplexX87,
1215	NoClass,
1216	Memory
1217	};
1218
1219	/// merge - Implement the X86_64 ABI merging algorithm.
1220	///
1221	/// Merge an accumulating classification \arg Accum with a field
1222	/// classification \arg Field.
1223	///
1224	/// \param Accum - The accumulating classification. This should
1225	/// always be either NoClass or the result of a previous merge
1226	/// call. In addition, this should never be Memory (the caller
1227	/// should just return Memory for the aggregate).
1228	static Class merge(Class Accum, Class Field);
1229
1230	/// postMerge - Implement the X86_64 ABI post merging algorithm.
1231	///
1232	/// Post merger cleanup, reduces a malformed Hi and Lo pair to
1233	/// final MEMORY or SSE classes when necessary.
1234	///
1235	/// \param AggregateSize - The size of the current aggregate in
1236	/// the classification process.
1237	///
1238	/// \param Lo - The classification for the parts of the type
1239	/// residing in the low word of the containing object.
1240	///
1241	/// \param Hi - The classification for the parts of the type
1242	/// residing in the higher words of the containing object.
1243	///
1244	void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
1245
1246	/// classify - Determine the x86_64 register classes in which the
1247	/// given type T should be passed.
1248	///
1249	/// \param Lo - The classification for the parts of the type
1250	/// residing in the low word of the containing object.
1251	///
1252	/// \param Hi - The classification for the parts of the type
1253	/// residing in the high word of the containing object.
1254	///
1255	/// \param OffsetBase - The bit offset of this type in the
1256	/// containing object. Some parameters are classified different
1257	/// depending on whether they straddle an eightbyte boundary.
1258	///
1259	/// \param isNamedArg - Whether the argument in question is a "named"
1260	/// argument, as used in AMD64-ABI 3.5.7.
1261	///
1262	/// \param IsRegCall - Whether the calling conversion is regcall.
1263	///
1264	/// If a word is unused its result will be NoClass; if a type should
1265	/// be passed in Memory then at least the classification of \arg Lo
1266	/// will be Memory.
1267	///
1268	/// The \arg Lo class will be NoClass iff the argument is ignored.
1269	///
1270	/// If the \arg Lo class is ComplexX87, then the \arg Hi class will
1271	/// also be ComplexX87.
1272	void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
1273	bool isNamedArg, bool IsRegCall = false) const;
1274
1275	llvm::Type GetByteVectorType(QualType Ty) const*;
1276	llvm::Type GetSSETypeAtOffset(llvm::Type IRType,
1277	unsigned IROffset, QualType SourceTy,
1278	unsigned SourceOffset) const;
1279	llvm::Type GetINTEGERTypeAtOffset(llvm::Type IRType,
1280	unsigned IROffset, QualType SourceTy,
1281	unsigned SourceOffset) const;
1282
1283	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
1284	/// such that the argument will be returned in memory.
1285	ABIArgInfo getIndirectReturnResult(QualType Ty) const;
1286
1287	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
1288	/// such that the argument will be passed in memory.
1289	///
1290	/// \param freeIntRegs - The number of free integer registers remaining
1291	/// available.
1292	ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
1293
1294	ABIArgInfo classifyReturnType(QualType RetTy) const;
1295
1296	ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs,
1297	unsigned &neededInt, unsigned &neededSSE,
1298	bool isNamedArg,
1299	bool IsRegCall = false) const;
1300
1301	ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
1302	unsigned &NeededSSE,
1303	unsigned &MaxVectorWidth) const;
1304
1305	ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
1306	unsigned &NeededSSE,
1307	unsigned &MaxVectorWidth) const;
1308
1309	bool IsIllegalVectorType(QualType Ty) const;
1310
1311	/// The 0.98 ABI revision clarified a lot of ambiguities,
1312	/// unfortunately in ways that were not always consistent with
1313	/// certain previous compilers. In particular, platforms which
1314	/// required strict binary compatibility with older versions of GCC
1315	/// may need to exempt themselves.
1316	bool honorsRevision0_98() const {
1317	return !getTarget().getTriple().isOSDarwin();
1318	}
1319
1320	/// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
1321	/// classify it as INTEGER (for compatibility with older clang compilers).
1322	bool classifyIntegerMMXAsSSE() const {
1323	// Clang <= 3.8 did not do this.
1324	if (getContext().getLangOpts().getClangABICompat() <=
1325	LangOptions::ClangABI::Ver3_8)
1326	return false;
1327
1328	const llvm::Triple &Triple = getTarget().getTriple();
1329	if (Triple.isOSDarwin() \|\| Triple.isPS() \|\| Triple.isOSFreeBSD())
1330	return false;
1331	return true;
1332	}
1333
1334	// GCC classifies vectors of __int128 as memory.
1335	bool passInt128VectorsInMem() const {
1336	// Clang <= 9.0 did not do this.
1337	if (getContext().getLangOpts().getClangABICompat() <=
1338	LangOptions::ClangABI::Ver9)
1339	return false;
1340
1341	const llvm::Triple &T = getTarget().getTriple();
1342	return T.isOSLinux() \|\| T.isOSNetBSD();
1343	}
1344
1345	bool returnCXXRecordGreaterThan128InMem() const {
1346	// Clang <= 20.0 did not do this.
1347	if (getContext().getLangOpts().getClangABICompat() <=
1348	LangOptions::ClangABI::Ver20)
1349	return false;
1350
1351	return true;
1352	}
1353
1354	X86AVXABILevel AVXLevel;
1355	// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
1356	// 64-bit hardware.
1357	bool Has64BitPointers;
1358
1359	public:
1360	X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1361	: ABIInfo (CGT), AVXLevel(AVXLevel),
1362	Has64BitPointers(CGT.getDataLayout().getPointerSize(AS: `0`) == `8`) {}
1363
1364	bool isPassedUsingAVXType(QualType type) const {
1365	unsigned neededInt, neededSSE;
1366	// The freeIntRegs argument doesn't matter here.
1367	ABIArgInfo info = classifyArgumentType(Ty: type, freeIntRegs: `0`, neededInt, neededSSE,
1368	/isNamedArg/true);
1369	if (info.isDirect()) {
1370	llvm::Type *ty = info.getCoerceToType();
1371	if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(Val: ty))
1372	return vectorTy->getPrimitiveSizeInBits().getFixedValue() > `128`;
1373	}
1374	return false;
1375	}
1376
1377	void computeInfo(CGFunctionInfo &FI) const override;
1378
1379	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1380	AggValueSlot Slot) const override;
1381	RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1382	AggValueSlot Slot) const override;
1383
1384	bool has64BitPointers() const {
1385	return Has64BitPointers;
1386	}
1387	};
1388
1389	/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
1390	class WinX86_64ABIInfo : public ABIInfo {
1391	public:
1392	WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1393	: ABIInfo (CGT), AVXLevel(AVXLevel),
1394	IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
1395
1396	void computeInfo(CGFunctionInfo &FI) const override;
1397
1398	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1399	AggValueSlot Slot) const override;
1400
1401	bool isHomogeneousAggregateBaseType(QualType Ty) const override {
1402	// FIXME: Assumes vectorcall is in use.
1403	return isX86VectorTypeForVectorCall(Context&: getContext(), Ty);
1404	}
1405
1406	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
1407	uint64_t NumMembers) const override {
1408	// FIXME: Assumes vectorcall is in use.
1409	return isX86VectorCallAggregateSmallEnough(NumMembers);
1410	}
1411
1412	private:
1413	ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
1414	bool IsVectorCall, bool IsRegCall) const;
1415	ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs,
1416	const ABIArgInfo &current) const;
1417
1418	X86AVXABILevel AVXLevel;
1419
1420	bool IsMingw64;
1421	};
1422
1423	class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
1424	public:
1425	X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1426	: TargetCodeGenInfo (std::make_unique<X86_64ABIInfo>(args&: CGT, args&: AVXLevel)) {
1427	SwiftInfo =
1428	std::make_unique<SwiftABIInfo>(args&: CGT, /SwiftErrorInRegister=/args: true);
1429	}
1430
1431	/// Disable tail call on x86-64. The epilogue code before the tail jump blocks
1432	/// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations.
1433	bool markARCOptimizedReturnCallsAsNoTail() const override { return true; }
1434
1435	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1436	return `7`;
1437	}
1438
1439	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1440	llvm::Value Address) const* override {
1441	llvm::Value *Eight8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `8`);
1442
1443	// 0-15 are the 16 integer registers.
1444	// 16 is %rip.
1445	AssignToArrayRange(Builder&: CGF.Builder, Array: Address, Value: Eight8, FirstIndex: `0`, LastIndex: `16`);
1446	return false;
1447	}
1448
1449	llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
1450	StringRef Constraint,
1451	llvm::Type* Ty) const override {
1452	return X86AdjustInlineAsmType(CGF, Constraint, Ty);
1453	}
1454
1455	bool isNoProtoCallVariadic(const CallArgList &args,
1456	const FunctionNoProtoType fnType) const* override {
1457	// The default CC on x86-64 sets %al to the number of SSA
1458	// registers used, and GCC sets this when calling an unprototyped
1459	// function, so we override the default behavior. However, don't do
1460	// that when AVX types are involved: the ABI explicitly states it is
1461	// undefined, and it doesn't work in practice because of how the ABI
1462	// defines varargs anyway.
1463	if (fnType->getCallConv() == CC_C) {
1464	bool HasAVXType = false;
1465	for (const CallArg &arg : args) {
1466	if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(type: arg.Ty)) {
1467	HasAVXType = true;
1468	break;
1469	}
1470	}
1471
1472	if (!HasAVXType)
1473	return true;
1474	}
1475
1476	return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
1477	}
1478
1479	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
1480	CodeGen::CodeGenModule &CGM) const override {
1481	if (GV->isDeclaration())
1482	return;
1483	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D)) {
1484	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1485	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1486	Fn->addFnAttr(Kind: "stackrealign");
1487	}
1488
1489	addX86InterruptAttrs(FD, GV, CGM);
1490	}
1491	}
1492
1493	void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
1494	const FunctionDecl *Caller,
1495	const FunctionDecl Callee, const* CallArgList &Args,
1496	QualType ReturnType) const override;
1497	};
1498	} // namespace
1499
1500	static void initFeatureMaps(const ASTContext &Ctx,
1501	llvm::StringMap<bool> &CallerMap,
1502	const FunctionDecl *Caller,
1503	llvm::StringMap<bool> &CalleeMap,
1504	const FunctionDecl *Callee) {
1505	if (CalleeMap.empty() && CallerMap.empty()) {
1506	// The caller is potentially nullptr in the case where the call isn't in a
1507	// function. In this case, the getFunctionFeatureMap ensures we just get
1508	// the TU level setting (since it cannot be modified by 'target'..
1509	Ctx.getFunctionFeatureMap(FeatureMap&: CallerMap, Caller);
1510	Ctx.getFunctionFeatureMap(FeatureMap&: CalleeMap, Callee);
1511	}
1512	}
1513
1514	static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
1515	SourceLocation CallLoc,
1516	const llvm::StringMap<bool> &CallerMap,
1517	const llvm::StringMap<bool> &CalleeMap,
1518	QualType Ty, StringRef Feature,
1519	bool IsArgument) {
1520	bool CallerHasFeat = CallerMap.lookup(Key: Feature);
1521	bool CalleeHasFeat = CalleeMap.lookup(Key: Feature);
1522	if (!CallerHasFeat && !CalleeHasFeat)
1523	return Diag.Report(Loc: CallLoc, DiagID: diag::warn_avx_calling_convention)
1524	<< IsArgument << Ty << Feature;
1525
1526	// Mixing calling conventions here is very clearly an error.
1527	if (!CallerHasFeat \|\| !CalleeHasFeat)
1528	return Diag.Report(Loc: CallLoc, DiagID: diag::err_avx_calling_convention)
1529	<< IsArgument << Ty << Feature;
1530
1531	// Else, both caller and callee have the required feature, so there is no need
1532	// to diagnose.
1533	return false;
1534	}
1535
1536	static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag,
1537	SourceLocation CallLoc,
1538	const llvm::StringMap<bool> &CallerMap,
1539	const llvm::StringMap<bool> &CalleeMap,
1540	QualType Ty, bool IsArgument) {
1541	bool Caller256 = CallerMap.lookup(Key: "avx512f") && !CallerMap.lookup(Key: "evex512");
1542	bool Callee256 = CalleeMap.lookup(Key: "avx512f") && !CalleeMap.lookup(Key: "evex512");
1543
1544	// Forbid 512-bit or larger vector pass or return when we disabled ZMM
1545	// instructions.
1546	if (Caller256 \|\| Callee256)
1547	return Diag.Report(Loc: CallLoc, DiagID: diag::err_avx_calling_convention)
1548	<< IsArgument << Ty << "evex512";
1549
1550	return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
1551	Feature: "avx512f", IsArgument);
1552	}
1553
1554	static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
1555	SourceLocation CallLoc,
1556	const llvm::StringMap<bool> &CallerMap,
1557	const llvm::StringMap<bool> &CalleeMap, QualType Ty,
1558	bool IsArgument) {
1559	uint64_t Size = Ctx.getTypeSize(T: Ty);
1560	if (Size > `256`)
1561	return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
1562	IsArgument);
1563
1564	if (Size > `128`)
1565	return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, Feature: "avx",
1566	IsArgument);
1567
1568	return false;
1569	}
1570
1571	void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1572	SourceLocation CallLoc,
1573	const FunctionDecl *Caller,
1574	const FunctionDecl *Callee,
1575	const CallArgList &Args,
1576	QualType ReturnType) const {
1577	if (!Callee)
1578	return;
1579
1580	llvm::StringMap<bool> CallerMap;
1581	llvm::StringMap<bool> CalleeMap;
1582	unsigned ArgIndex = `0`;
1583
1584	// We need to loop through the actual call arguments rather than the
1585	// function's parameters, in case this variadic.
1586	for (const CallArg &Arg : Args) {
1587	// The "avx" feature changes how vectors >128 in size are passed. "avx512f"
1588	// additionally changes how vectors >256 in size are passed. Like GCC, we
1589	// warn when a function is called with an argument where this will change.
1590	// Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
1591	// the caller and callee features are mismatched.
1592	// Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
1593	// change its ABI with attribute-target after this call.
1594	if (Arg.getType()->isVectorType() &&
1595	CGM.getContext().getTypeSize(T: Arg.getType()) > `128`) {
1596	initFeatureMaps(Ctx: CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
1597	QualType Ty = Arg.getType();
1598	// The CallArg seems to have desugared the type already, so for clearer
1599	// diagnostics, replace it with the type in the FunctionDecl if possible.
1600	if (ArgIndex < Callee->getNumParams())
1601	Ty = Callee->getParamDecl(i: ArgIndex)->getType();
1602
1603	if (checkAVXParam(Diag&: CGM.getDiags(), Ctx&: CGM.getContext(), CallLoc, CallerMap,
1604	CalleeMap, Ty, /IsArgument/ true))
1605	return;
1606	}
1607	++ArgIndex;
1608	}
1609
1610	// Check return always, as we don't have a good way of knowing in codegen
1611	// whether this value is used, tail-called, etc.
1612	if (Callee->getReturnType()->isVectorType() &&
1613	CGM.getContext().getTypeSize(T: Callee->getReturnType()) > `128`) {
1614	initFeatureMaps(Ctx: CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
1615	checkAVXParam(Diag&: CGM.getDiags(), Ctx&: CGM.getContext(), CallLoc, CallerMap,
1616	CalleeMap, Ty: Callee->getReturnType(),
1617	/IsArgument/ false);
1618	}
1619	}
1620
1621	std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) {
1622	// If the argument does not end in .lib, automatically add the suffix.
1623	// If the argument contains a space, enclose it in quotes.
1624	// This matches the behavior of MSVC.
1625	bool Quote = Lib.contains(C: `' '`);
1626	std::string ArgStr = Quote ? "\"" : "";
1627	ArgStr += Lib;
1628	if (!Lib.ends_with_insensitive(Suffix: ".lib") && !Lib.ends_with_insensitive(Suffix: ".a"))
1629	ArgStr += ".lib";
1630	ArgStr += Quote ? "\"" : "";
1631	return ArgStr;
1632	}
1633
1634	namespace {
1635	class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
1636	public:
1637	WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
1638	bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
1639	unsigned NumRegisterParameters)
1640	: X86_32TargetCodeGenInfo (CGT, DarwinVectorABI, RetSmallStructInRegABI,
1641	Win32StructABI, NumRegisterParameters, false) {}
1642
1643	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
1644	CodeGen::CodeGenModule &CGM) const override;
1645
1646	void getDependentLibraryOption(llvm::StringRef Lib,
1647	llvm::SmallString<`24`> &Opt) const override {
1648	Opt = "/DEFAULTLIB:";
1649	Opt += qualifyWindowsLibrary(Lib);
1650	}
1651
1652	void getDetectMismatchOption(llvm::StringRef Name,
1653	llvm::StringRef Value,
1654	llvm::SmallString<`32`> &Opt) const override {
1655	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
1656	}
1657	};
1658	} // namespace
1659
1660	void WinX86_32TargetCodeGenInfo::setTargetAttributes(
1661	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
1662	X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
1663	if (GV->isDeclaration())
1664	return;
1665	addStackProbeTargetAttributes(D, GV, CGM);
1666	}
1667
1668	namespace {
1669	class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
1670	public:
1671	WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
1672	X86AVXABILevel AVXLevel)
1673	: TargetCodeGenInfo (std::make_unique<WinX86_64ABIInfo>(args&: CGT, args&: AVXLevel)) {
1674	SwiftInfo =
1675	std::make_unique<SwiftABIInfo>(args&: CGT, /SwiftErrorInRegister=/args: true);
1676	}
1677
1678	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
1679	CodeGen::CodeGenModule &CGM) const override;
1680
1681	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1682	return `7`;
1683	}
1684
1685	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1686	llvm::Value Address) const* override {
1687	llvm::Value *Eight8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `8`);
1688
1689	// 0-15 are the 16 integer registers.
1690	// 16 is %rip.
1691	AssignToArrayRange(Builder&: CGF.Builder, Array: Address, Value: Eight8, FirstIndex: `0`, LastIndex: `16`);
1692	return false;
1693	}
1694
1695	void getDependentLibraryOption(llvm::StringRef Lib,
1696	llvm::SmallString<`24`> &Opt) const override {
1697	Opt = "/DEFAULTLIB:";
1698	Opt += qualifyWindowsLibrary(Lib);
1699	}
1700
1701	void getDetectMismatchOption(llvm::StringRef Name,
1702	llvm::StringRef Value,
1703	llvm::SmallString<`32`> &Opt) const override {
1704	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
1705	}
1706	};
1707	} // namespace
1708
1709	void WinX86_64TargetCodeGenInfo::setTargetAttributes(
1710	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
1711	TargetCodeGenInfo::setTargetAttributes(D, GV, M&: CGM);
1712	if (GV->isDeclaration())
1713	return;
1714	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D)) {
1715	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1716	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1717	Fn->addFnAttr(Kind: "stackrealign");
1718	}
1719
1720	addX86InterruptAttrs(FD, GV, CGM);
1721	}
1722
1723	addStackProbeTargetAttributes(D, GV, CGM);
1724	}
1725
1726	void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
1727	Class &Hi) const {
1728	// AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
1729	//
1730	// (a) If one of the classes is Memory, the whole argument is passed in
1731	// memory.
1732	//
1733	// (b) If X87UP is not preceded by X87, the whole argument is passed in
1734	// memory.
1735	//
1736	// (c) If the size of the aggregate exceeds two eightbytes and the first
1737	// eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
1738	// argument is passed in memory. NOTE: This is necessary to keep the
1739	// ABI working for processors that don't support the __m256 type.
1740	//
1741	// (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
1742	//
1743	// Some of these are enforced by the merging logic. Others can arise
1744	// only with unions; for example:
1745	// union { _Complex double; unsigned; }
1746	//
1747	// Note that clauses (b) and (c) were added in 0.98.
1748	//
1749	if (Hi == Memory)
1750	Lo = Memory;
1751	if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
1752	Lo = Memory;
1753	if (AggregateSize > `128` && (Lo != SSE \|\| Hi != SSEUp))
1754	Lo = Memory;
1755	if (Hi == SSEUp && Lo != SSE)
1756	Hi = SSE;
1757	}
1758
1759	X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
1760	// AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
1761	// classified recursively so that always two fields are
1762	// considered. The resulting class is calculated according to
1763	// the classes of the fields in the eightbyte:
1764	//
1765	// (a) If both classes are equal, this is the resulting class.
1766	//
1767	// (b) If one of the classes is NO_CLASS, the resulting class is
1768	// the other class.
1769	//
1770	// (c) If one of the classes is MEMORY, the result is the MEMORY
1771	// class.
1772	//
1773	// (d) If one of the classes is INTEGER, the result is the
1774	// INTEGER.
1775	//
1776	// (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
1777	// MEMORY is used as class.
1778	//
1779	// (f) Otherwise class SSE is used.
1780
1781	// Accum should never be memory (we should have returned) or
1782	// ComplexX87 (because this cannot be passed in a structure).
1783	assert((Accum != Memory && Accum != ComplexX87) &&
1784	"Invalid accumulated classification during merge.");
1785	if (Accum == Field \|\| Field == NoClass)
1786	return Accum;
1787	if (Field == Memory)
1788	return Memory;
1789	if (Accum == NoClass)
1790	return Field;
1791	if (Accum == Integer \|\| Field == Integer)
1792	return Integer;
1793	if (Field == X87 \|\| Field == X87Up \|\| Field == ComplexX87 \|\|
1794	Accum == X87 \|\| Accum == X87Up)
1795	return Memory;
1796	return SSE;
1797	}
1798
1799	void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
1800	Class &Hi, bool isNamedArg, bool IsRegCall) const {
1801	// FIXME: This code can be simplified by introducing a simple value class for
1802	// Class pairs with appropriate constructor methods for the various
1803	// situations.
1804
1805	// FIXME: Some of the split computations are wrong; unaligned vectors
1806	// shouldn't be passed in registers for example, so there is no chance they
1807	// can straddle an eightbyte. Verify & simplify.
1808
1809	Lo = Hi = NoClass;
1810
1811	Class &Current = OffsetBase < `64` ? Lo : Hi;
1812	Current = Memory;
1813
1814	if (const BuiltinType *BT = Ty ->getAs<BuiltinType>()) {
1815	BuiltinType::Kind k = BT->getKind();
1816
1817	if (k == BuiltinType::Void) {
1818	Current = NoClass;
1819	} else if (k == BuiltinType::Int128 \|\| k == BuiltinType::UInt128) {
1820	Lo = Integer;
1821	Hi = Integer;
1822	} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
1823	Current = Integer;
1824	} else if (k == BuiltinType::Float \|\| k == BuiltinType::Double \|\|
1825	k == BuiltinType::Float16 \|\| k == BuiltinType::BFloat16) {
1826	Current = SSE;
1827	} else if (k == BuiltinType::Float128) {
1828	Lo = SSE;
1829	Hi = SSEUp;
1830	} else if (k == BuiltinType::LongDouble) {
1831	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
1832	if (LDF == &llvm::APFloat::IEEEquad()) {
1833	Lo = SSE;
1834	Hi = SSEUp;
1835	} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
1836	Lo = X87;
1837	Hi = X87Up;
1838	} else if (LDF == &llvm::APFloat::IEEEdouble()) {
1839	Current = SSE;
1840	} else
1841	llvm_unreachable("unexpected long double representation!");
1842	}
1843	// FIXME: _Decimal32 and _Decimal64 are SSE.
1844	// FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
1845	return;
1846	}
1847
1848	if (const EnumType *ET = Ty ->getAs<EnumType>()) {
1849	// Classify the underlying integer type.
1850	classify(Ty: ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
1851	return;
1852	}
1853
1854	if (Ty ->hasPointerRepresentation()) {
1855	Current = Integer;
1856	return;
1857	}
1858
1859	if (Ty ->isMemberPointerType()) {
1860	if (Ty ->isMemberFunctionPointerType()) {
1861	if (Has64BitPointers) {
1862	// If Has64BitPointers, this is an {i64, i64}, so classify both
1863	// Lo and Hi now.
1864	Lo = Hi = Integer;
1865	} else {
1866	// Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
1867	// straddles an eightbyte boundary, Hi should be classified as well.
1868	uint64_t EB_FuncPtr = (OffsetBase) / `64`;
1869	uint64_t EB_ThisAdj = (OffsetBase + `64` - `1`) / `64`;
1870	if (EB_FuncPtr != EB_ThisAdj) {
1871	Lo = Hi = Integer;
1872	} else {
1873	Current = Integer;
1874	}
1875	}
1876	} else {
1877	Current = Integer;
1878	}
1879	return;
1880	}
1881
1882	if (const VectorType *VT = Ty ->getAs<VectorType>()) {
1883	uint64_t Size = getContext().getTypeSize(T: VT);
1884	if (Size == `1` \|\| Size == `8` \|\| Size == `16` \|\| Size == `32`) {
1885	// gcc passes the following as integer:
1886	// 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
1887	// 2 bytes - <2 x char>, <1 x short>
1888	// 1 byte - <1 x char>
1889	Current = Integer;
1890
1891	// If this type crosses an eightbyte boundary, it should be
1892	// split.
1893	uint64_t EB_Lo = (OffsetBase) / `64`;
1894	uint64_t EB_Hi = (OffsetBase + Size - `1`) / `64`;
1895	if (EB_Lo != EB_Hi)
1896	Hi = Lo;
1897	} else if (Size == `64`) {
1898	QualType ElementType = VT->getElementType();
1899
1900	// gcc passes <1 x double> in memory. :(
1901	if (ElementType ->isSpecificBuiltinType(K: BuiltinType::Double))
1902	return;
1903
1904	// gcc passes <1 x long long> as SSE but clang used to unconditionally
1905	// pass them as integer. For platforms where clang is the de facto
1906	// platform compiler, we must continue to use integer.
1907	if (!classifyIntegerMMXAsSSE() &&
1908	(ElementType ->isSpecificBuiltinType(K: BuiltinType::LongLong) \|\|
1909	ElementType ->isSpecificBuiltinType(K: BuiltinType::ULongLong) \|\|
1910	ElementType ->isSpecificBuiltinType(K: BuiltinType::Long) \|\|
1911	ElementType ->isSpecificBuiltinType(K: BuiltinType::ULong)))
1912	Current = Integer;
1913	else
1914	Current = SSE;
1915
1916	// If this type crosses an eightbyte boundary, it should be
1917	// split.
1918	if (OffsetBase && OffsetBase != `64`)
1919	Hi = Lo;
1920	} else if (Size == `128` \|\|
1921	(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
1922	QualType ElementType = VT->getElementType();
1923
1924	// gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
1925	if (passInt128VectorsInMem() && Size != `128` &&
1926	(ElementType ->isSpecificBuiltinType(K: BuiltinType::Int128) \|\|
1927	ElementType ->isSpecificBuiltinType(K: BuiltinType::UInt128)))
1928	return;
1929
1930	// Arguments of 256-bits are split into four eightbyte chunks. The
1931	// least significant one belongs to class SSE and all the others to class
1932	// SSEUP. The original Lo and Hi design considers that types can't be
1933	// greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
1934	// This design isn't correct for 256-bits, but since there're no cases
1935	// where the upper parts would need to be inspected, avoid adding
1936	// complexity and just consider Hi to match the 64-256 part.
1937	//
1938	// Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
1939	// registers if they are "named", i.e. not part of the "..." of a
1940	// variadic function.
1941	//
1942	// Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
1943	// split into eight eightbyte chunks, one SSE and seven SSEUP.
1944	Lo = SSE;
1945	Hi = SSEUp;
1946	}
1947	return;
1948	}
1949
1950	if (const ComplexType *CT = Ty ->getAs<ComplexType>()) {
1951	QualType ET = getContext().getCanonicalType(T: CT->getElementType());
1952
1953	uint64_t Size = getContext().getTypeSize(T: Ty);
1954	if (ET ->isIntegralOrEnumerationType()) {
1955	if (Size <= `64`)
1956	Current = Integer;
1957	else if (Size <= `128`)
1958	Lo = Hi = Integer;
1959	} else if (ET ->isFloat16Type() \|\| ET == getContext().FloatTy \|\|
1960	ET ->isBFloat16Type()) {
1961	Current = SSE;
1962	} else if (ET == getContext().DoubleTy) {
1963	Lo = Hi = SSE;
1964	} else if (ET == getContext().LongDoubleTy) {
1965	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
1966	if (LDF == &llvm::APFloat::IEEEquad())
1967	Current = Memory;
1968	else if (LDF == &llvm::APFloat::x87DoubleExtended())
1969	Current = ComplexX87;
1970	else if (LDF == &llvm::APFloat::IEEEdouble())
1971	Lo = Hi = SSE;
1972	else
1973	llvm_unreachable("unexpected long double representation!");
1974	}
1975
1976	// If this complex type crosses an eightbyte boundary then it
1977	// should be split.
1978	uint64_t EB_Real = (OffsetBase) / `64`;
1979	uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(T: ET)) / `64`;
1980	if (Hi == NoClass && EB_Real != EB_Imag)
1981	Hi = Lo;
1982
1983	return;
1984	}
1985
1986	if (const auto *EITy = Ty ->getAs<BitIntType>()) {
1987	if (EITy->getNumBits() <= `64`)
1988	Current = Integer;
1989	else if (EITy->getNumBits() <= `128`)
1990	Lo = Hi = Integer;
1991	// Larger values need to get passed in memory.
1992	return;
1993	}
1994
1995	if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(T: Ty)) {
1996	// Arrays are treated like structures.
1997
1998	uint64_t Size = getContext().getTypeSize(T: Ty);
1999
2000	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
2001	// than eight eightbytes, ..., it has class MEMORY.
2002	// regcall ABI doesn't have limitation to an object. The only limitation
2003	// is the free registers, which will be checked in computeInfo.
2004	if (!IsRegCall && Size > `512`)
2005	return;
2006
2007	// AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
2008	// fields, it has class MEMORY.
2009	//
2010	// Only need to check alignment of array base.
2011	if (OffsetBase % getContext().getTypeAlign(T: AT->getElementType()))
2012	return;
2013
2014	// Otherwise implement simplified merge. We could be smarter about
2015	// this, but it isn't worth it and would be harder to verify.
2016	Current = NoClass;
2017	uint64_t EltSize = getContext().getTypeSize(T: AT->getElementType());
2018	uint64_t ArraySize = AT->getZExtSize();
2019
2020	// The only case a 256-bit wide vector could be used is when the array
2021	// contains a single 256-bit element. Since Lo and Hi logic isn't extended
2022	// to work for sizes wider than 128, early check and fallback to memory.
2023	//
2024	if (Size > `128` &&
2025	(Size != EltSize \|\| Size > getNativeVectorSizeForAVXABI(AVXLevel)))
2026	return;
2027
2028	for (uint64_t i=`0`, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
2029	Class FieldLo, FieldHi;
2030	classify(Ty: AT->getElementType(), OffsetBase: Offset, Lo&: FieldLo, Hi&: FieldHi, isNamedArg);
2031	Lo = merge(Accum: Lo, Field: FieldLo);
2032	Hi = merge(Accum: Hi, Field: FieldHi);
2033	if (Lo == Memory \|\| Hi == Memory)
2034	break;
2035	}
2036
2037	postMerge(AggregateSize: Size, Lo, Hi);
2038	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp array classification.");
2039	return;
2040	}
2041
2042	if (const RecordType *RT = Ty ->getAs<RecordType>()) {
2043	uint64_t Size = getContext().getTypeSize(T: Ty);
2044
2045	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
2046	// than eight eightbytes, ..., it has class MEMORY.
2047	if (Size > `512`)
2048	return;
2049
2050	// AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
2051	// copy constructor or a non-trivial destructor, it is passed by invisible
2052	// reference.
2053	if (getRecordArgABI(RT, CXXABI&: getCXXABI()))
2054	return;
2055
2056	const RecordDecl *RD = RT->getDecl();
2057
2058	// Assume variable sized types are passed in memory.
2059	if (RD->hasFlexibleArrayMember())
2060	return;
2061
2062	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(D: RD);
2063
2064	// Reset Lo class, this will be recomputed.
2065	Current = NoClass;
2066
2067	// If this is a C++ record, classify the bases first.
2068	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
2069	for (const auto &I : CXXRD->bases()) {
2070	assert(!I.isVirtual() && !I.getType()->isDependentType() &&
2071	"Unexpected base class!");
2072	const auto *Base =
2073	cast<CXXRecordDecl>(Val: I.getType()->castAs<RecordType>()->getDecl());
2074
2075	// Classify this field.
2076	//
2077	// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
2078	// single eightbyte, each is classified separately. Each eightbyte gets
2079	// initialized to class NO_CLASS.
2080	Class FieldLo, FieldHi;
2081	uint64_t Offset =
2082	OffsetBase + getContext().toBits(CharSize: Layout.getBaseClassOffset(Base));
2083	classify(Ty: I.getType(), OffsetBase: Offset, Lo&: FieldLo, Hi&: FieldHi, isNamedArg);
2084	Lo = merge(Accum: Lo, Field: FieldLo);
2085	Hi = merge(Accum: Hi, Field: FieldHi);
2086	if (returnCXXRecordGreaterThan128InMem() &&
2087	(Size > `128` && (Size != getContext().getTypeSize(T: I.getType()) \|\|
2088	Size > getNativeVectorSizeForAVXABI(AVXLevel)))) {
2089	// The only case a 256(or 512)-bit wide vector could be used to return
2090	// is when CXX record contains a single 256(or 512)-bit element.
2091	Lo = Memory;
2092	}
2093	if (Lo == Memory \|\| Hi == Memory) {
2094	postMerge(AggregateSize: Size, Lo, Hi);
2095	return;
2096	}
2097	}
2098	}
2099
2100	// Classify the fields one at a time, merging the results.
2101	unsigned idx = `0`;
2102	bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
2103	LangOptions::ClangABI::Ver11 \|\|
2104	getContext().getTargetInfo().getTriple().isPS();
2105	bool IsUnion = RT->isUnionType() && !UseClang11Compat;
2106
2107	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
2108	i != e; ++i, ++idx) {
2109	uint64_t Offset = OffsetBase + Layout.getFieldOffset(FieldNo: idx);
2110	bool BitField = i ->isBitField();
2111
2112	// Ignore padding bit-fields.
2113	if (BitField && i ->isUnnamedBitField())
2114	continue;
2115
2116	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
2117	// eight eightbytes, or it contains unaligned fields, it has class MEMORY.
2118	//
2119	// The only case a 256-bit or a 512-bit wide vector could be used is when
2120	// the struct contains a single 256-bit or 512-bit element. Early check
2121	// and fallback to memory.
2122	//
2123	// FIXME: Extended the Lo and Hi logic properly to work for size wider
2124	// than 128.
2125	if (Size > `128` &&
2126	((!IsUnion && Size != getContext().getTypeSize(T: i ->getType())) \|\|
2127	Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
2128	Lo = Memory;
2129	postMerge(AggregateSize: Size, Lo, Hi);
2130	return;
2131	}
2132
2133	bool IsInMemory =
2134	Offset % getContext().getTypeAlign(T: i ->getType().getCanonicalType());
2135	// Note, skip this test for bit-fields, see below.
2136	if (!BitField && IsInMemory) {
2137	Lo = Memory;
2138	postMerge(AggregateSize: Size, Lo, Hi);
2139	return;
2140	}
2141
2142	// Classify this field.
2143	//
2144	// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
2145	// exceeds a single eightbyte, each is classified
2146	// separately. Each eightbyte gets initialized to class
2147	// NO_CLASS.
2148	Class FieldLo, FieldHi;
2149
2150	// Bit-fields require special handling, they do not force the
2151	// structure to be passed in memory even if unaligned, and
2152	// therefore they can straddle an eightbyte.
2153	if (BitField) {
2154	assert(!i->isUnnamedBitField());
2155	uint64_t Offset = OffsetBase + Layout.getFieldOffset(FieldNo: idx);
2156	uint64_t Size = i ->getBitWidthValue();
2157
2158	uint64_t EB_Lo = Offset / `64`;
2159	uint64_t EB_Hi = (Offset + Size - `1`) / `64`;
2160
2161	if (EB_Lo) {
2162	assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
2163	FieldLo = NoClass;
2164	FieldHi = Integer;
2165	} else {
2166	FieldLo = Integer;
2167	FieldHi = EB_Hi ? Integer : NoClass;
2168	}
2169	} else
2170	classify(Ty: i ->getType(), OffsetBase: Offset, Lo&: FieldLo, Hi&: FieldHi, isNamedArg);
2171	Lo = merge(Accum: Lo, Field: FieldLo);
2172	Hi = merge(Accum: Hi, Field: FieldHi);
2173	if (Lo == Memory \|\| Hi == Memory)
2174	break;
2175	}
2176
2177	postMerge(AggregateSize: Size, Lo, Hi);
2178	}
2179	}
2180
2181	ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
2182	// If this is a scalar LLVM value then assume LLVM will pass it in the right
2183	// place naturally.
2184	if (!isAggregateTypeForABI(T: Ty)) {
2185	// Treat an enum type as its underlying type.
2186	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
2187	Ty = EnumTy->getDecl()->getIntegerType();
2188
2189	if (Ty ->isBitIntType())
2190	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace());
2191
2192	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
2193	: ABIArgInfo::getDirect());
2194	}
2195
2196	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace());
2197	}
2198
2199	bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
2200	if (const VectorType *VecTy = Ty ->getAs<VectorType>()) {
2201	uint64_t Size = getContext().getTypeSize(T: VecTy);
2202	unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
2203	if (Size <= `64` \|\| Size > LargestVector)
2204	return true;
2205	QualType EltTy = VecTy->getElementType();
2206	if (passInt128VectorsInMem() &&
2207	(EltTy ->isSpecificBuiltinType(K: BuiltinType::Int128) \|\|
2208	EltTy ->isSpecificBuiltinType(K: BuiltinType::UInt128)))
2209	return true;
2210	}
2211
2212	return false;
2213	}
2214
2215	ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
2216	unsigned freeIntRegs) const {
2217	// If this is a scalar LLVM value then assume LLVM will pass it in the right
2218	// place naturally.
2219	//
2220	// This assumption is optimistic, as there could be free registers available
2221	// when we need to pass this argument in memory, and LLVM could try to pass
2222	// the argument in the free register. This does not seem to happen currently,
2223	// but this code would be much safer if we could mark the argument with
2224	// 'onstack'. See PR12193.
2225	if (!isAggregateTypeForABI(T: Ty) && !IsIllegalVectorType(Ty) &&
2226	!Ty ->isBitIntType()) {
2227	// Treat an enum type as its underlying type.
2228	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
2229	Ty = EnumTy->getDecl()->getIntegerType();
2230
2231	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
2232	: ABIArgInfo::getDirect());
2233	}
2234
2235	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(T: Ty, CXXABI&: getCXXABI()))
2236	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
2237	ByVal: RAA == CGCXXABI::RAA_DirectInMemory);
2238
2239	// Compute the byval alignment. We specify the alignment of the byval in all
2240	// cases so that the mid-level optimizer knows the alignment of the byval.
2241	unsigned Align = std::max(a: getContext().getTypeAlign(T: Ty) / `8`, b: `8U`);
2242
2243	// Attempt to avoid passing indirect results using byval when possible. This
2244	// is important for good codegen.
2245	//
2246	// We do this by coercing the value into a scalar type which the backend can
2247	// handle naturally (i.e., without using byval).
2248	//
2249	// For simplicity, we currently only do this when we have exhausted all of the
2250	// free integer registers. Doing this when there are free integer registers
2251	// would require more care, as we would have to ensure that the coerced value
2252	// did not claim the unused register. That would require either reording the
2253	// arguments to the function (so that any subsequent inreg values came first),
2254	// or only doing this optimization when there were no following arguments that
2255	// might be inreg.
2256	//
2257	// We currently expect it to be rare (particularly in well written code) for
2258	// arguments to be passed on the stack when there are still free integer
2259	// registers available (this would typically imply large structs being passed
2260	// by value), so this seems like a fair tradeoff for now.
2261	//
2262	// We can revisit this if the backend grows support for 'onstack' parameter
2263	// attributes. See PR12193.
2264	if (freeIntRegs == `0`) {
2265	uint64_t Size = getContext().getTypeSize(T: Ty);
2266
2267	// If this type fits in an eightbyte, coerce it into the matching integral
2268	// type, which will end up on the stack (with alignment 8).
2269	if (Align == `8` && Size <= `64`)
2270	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(),
2271	NumBits: Size));
2272	}
2273
2274	return ABIArgInfo::getIndirect(Alignment: CharUnits::fromQuantity(Quantity: Align),
2275	AddrSpace: getDataLayout().getAllocaAddrSpace());
2276	}
2277
2278	/// The ABI specifies that a value should be passed in a full vector XMM/YMM
2279	/// register. Pick an LLVM IR type that will be passed as a vector register.
2280	llvm::Type X86_64ABIInfo::GetByteVectorType(QualType Ty) const* {
2281	// Wrapper structs/arrays that only contain vectors are passed just like
2282	// vectors; strip them off if present.
2283	if (const Type *InnerTy = isSingleElementStruct(T: Ty, Context&: getContext()))
2284	Ty = QualType (InnerTy, `0`);
2285
2286	llvm::Type *IRType = CGT.ConvertType(T: Ty);
2287	if (isa<llvm::VectorType>(Val: IRType)) {
2288	// Don't pass vXi128 vectors in their native type, the backend can't
2289	// legalize them.
2290	if (passInt128VectorsInMem() &&
2291	cast<llvm::VectorType>(Val: IRType)->getElementType()->isIntegerTy(Bitwidth: `128`)) {
2292	// Use a vXi64 vector.
2293	uint64_t Size = getContext().getTypeSize(T: Ty);
2294	return llvm::FixedVectorType::get(ElementType: llvm::Type::getInt64Ty(C&: getVMContext()),
2295	NumElts: Size / `64`);
2296	}
2297
2298	return IRType;
2299	}
2300
2301	if (IRType->getTypeID() == llvm::Type::FP128TyID)
2302	return IRType;
2303
2304	// We couldn't find the preferred IR vector type for 'Ty'.
2305	uint64_t Size = getContext().getTypeSize(T: Ty);
2306	assert((Size == `128` \|\| Size == `256` \|\| Size == `512`) && "Invalid type found!");
2307
2308
2309	// Return a LLVM IR vector type based on the size of 'Ty'.
2310	return llvm::FixedVectorType::get(ElementType: llvm::Type::getDoubleTy(C&: getVMContext()),
2311	NumElts: Size / `64`);
2312	}
2313
2314	/// BitsContainNoUserData - Return true if the specified [start,end) bit range
2315	/// is known to either be off the end of the specified type or being in
2316	/// alignment padding. The user type specified is known to be at most 128 bits
2317	/// in size, and have passed through X86_64ABIInfo::classify with a successful
2318	/// classification that put one of the two halves in the INTEGER class.
2319	///
2320	/// It is conservatively correct to return false.
2321	static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
2322	unsigned EndBit, ASTContext &Context) {
2323	// If the bytes being queried are off the end of the type, there is no user
2324	// data hiding here. This handles analysis of builtins, vectors and other
2325	// types that don't contain interesting padding.
2326	unsigned TySize = (unsigned)Context.getTypeSize(T: Ty);
2327	if (TySize <= StartBit)
2328	return true;
2329
2330	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T: Ty)) {
2331	unsigned EltSize = (unsigned)Context.getTypeSize(T: AT->getElementType());
2332	unsigned NumElts = (unsigned)AT->getZExtSize();
2333
2334	// Check each element to see if the element overlaps with the queried range.
2335	for (unsigned i = `0`; i != NumElts; ++i) {
2336	// If the element is after the span we care about, then we're done..
2337	unsigned EltOffset = i*EltSize;
2338	if (EltOffset >= EndBit) break;
2339
2340	unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :`0`;
2341	if (!BitsContainNoUserData(Ty: AT->getElementType(), StartBit: EltStart,
2342	EndBit: EndBit-EltOffset, Context))
2343	return false;
2344	}
2345	// If it overlaps no elements, then it is safe to process as padding.
2346	return true;
2347	}
2348
2349	if (const RecordType *RT = Ty ->getAs<RecordType>()) {
2350	const RecordDecl *RD = RT->getDecl();
2351	const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD);
2352
2353	// If this is a C++ record, check the bases first.
2354	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
2355	for (const auto &I : CXXRD->bases()) {
2356	assert(!I.isVirtual() && !I.getType()->isDependentType() &&
2357	"Unexpected base class!");
2358	const auto *Base =
2359	cast<CXXRecordDecl>(Val: I.getType()->castAs<RecordType>()->getDecl());
2360
2361	// If the base is after the span we care about, ignore it.
2362	unsigned BaseOffset = Context.toBits(CharSize: Layout.getBaseClassOffset(Base));
2363	if (BaseOffset >= EndBit) continue;
2364
2365	unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :`0`;
2366	if (!BitsContainNoUserData(Ty: I.getType(), StartBit: BaseStart,
2367	EndBit: EndBit-BaseOffset, Context))
2368	return false;
2369	}
2370	}
2371
2372	// Verify that no field has data that overlaps the region of interest. Yes
2373	// this could be sped up a lot by being smarter about queried fields,
2374	// however we're only looking at structs up to 16 bytes, so we don't care
2375	// much.
2376	unsigned idx = `0`;
2377	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
2378	i != e; ++i, ++idx) {
2379	unsigned FieldOffset = (unsigned)Layout.getFieldOffset(FieldNo: idx);
2380
2381	// If we found a field after the region we care about, then we're done.
2382	if (FieldOffset >= EndBit) break;
2383
2384	unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :`0`;
2385	if (!BitsContainNoUserData(Ty: i ->getType(), StartBit: FieldStart, EndBit: EndBit-FieldOffset,
2386	Context))
2387	return false;
2388	}
2389
2390	// If nothing in this record overlapped the area of interest, then we're
2391	// clean.
2392	return true;
2393	}
2394
2395	return false;
2396	}
2397
2398	/// getFPTypeAtOffset - Return a floating point type at the specified offset.
2399	static llvm::Type getFPTypeAtOffset(llvm::Type IRType, unsigned IROffset,
2400	const llvm::DataLayout &TD) {
2401	if (IROffset == `0` && IRType->isFloatingPointTy())
2402	return IRType;
2403
2404	// If this is a struct, recurse into the field at the specified offset.
2405	if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val: IRType)) {
2406	if (!STy->getNumContainedTypes())
2407	return nullptr;
2408
2409	const llvm::StructLayout *SL = TD.getStructLayout(Ty: STy);
2410	unsigned Elt = SL->getElementContainingOffset(FixedOffset: IROffset);
2411	IROffset -= SL->getElementOffset(Idx: Elt);
2412	return getFPTypeAtOffset(IRType: STy->getElementType(N: Elt), IROffset, TD);
2413	}
2414
2415	// If this is an array, recurse into the field at the specified offset.
2416	if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(Val: IRType)) {
2417	llvm::Type *EltTy = ATy->getElementType();
2418	unsigned EltSize = TD.getTypeAllocSize(Ty: EltTy);
2419	IROffset -= IROffset / EltSize * EltSize;
2420	return getFPTypeAtOffset(IRType: EltTy, IROffset, TD);
2421	}
2422
2423	return nullptr;
2424	}
2425
2426	/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
2427	/// low 8 bytes of an XMM register, corresponding to the SSE class.
2428	llvm::Type *X86_64ABIInfo::
2429	GetSSETypeAtOffset(llvm::Type IRType, unsigned* IROffset,
2430	QualType SourceTy, unsigned SourceOffset) const {
2431	const llvm::DataLayout &TD = getDataLayout();
2432	unsigned SourceSize =
2433	(unsigned)getContext().getTypeSize(T: SourceTy) / `8` - SourceOffset;
2434	llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
2435	if (!T0 \|\| T0->isDoubleTy())
2436	return llvm::Type::getDoubleTy(C&: getVMContext());
2437
2438	// Get the adjacent FP type.
2439	llvm::Type T1 = nullptr*;
2440	unsigned T0Size = TD.getTypeAllocSize(Ty: T0);
2441	if (SourceSize > T0Size)
2442	T1 = getFPTypeAtOffset(IRType, IROffset: IROffset + T0Size, TD);
2443	if (T1 == nullptr) {
2444	// Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
2445	// to its alignment.
2446	if (T0->is16bitFPTy() && SourceSize > `4`)
2447	T1 = getFPTypeAtOffset(IRType, IROffset: IROffset + `4`, TD);
2448	// If we can't get a second FP type, return a simple half or float.
2449	// avx512fp16-abi.c:pr51813_2 shows it works to return float for
2450	// {float, i8} too.
2451	if (T1 == nullptr)
2452	return T0;
2453	}
2454
2455	if (T0->isFloatTy() && T1->isFloatTy())
2456	return llvm::FixedVectorType::get(ElementType: T0, NumElts: `2`);
2457
2458	if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
2459	llvm::Type T2 = nullptr*;
2460	if (SourceSize > `4`)
2461	T2 = getFPTypeAtOffset(IRType, IROffset: IROffset + `4`, TD);
2462	if (T2 == nullptr)
2463	return llvm::FixedVectorType::get(ElementType: T0, NumElts: `2`);
2464	return llvm::FixedVectorType::get(ElementType: T0, NumElts: `4`);
2465	}
2466
2467	if (T0->is16bitFPTy() \|\| T1->is16bitFPTy())
2468	return llvm::FixedVectorType::get(ElementType: llvm::Type::getHalfTy(C&: getVMContext()), NumElts: `4`);
2469
2470	return llvm::Type::getDoubleTy(C&: getVMContext());
2471	}
2472
2473
2474	/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
2475	/// an 8-byte GPR. This means that we either have a scalar or we are talking
2476	/// about the high or low part of an up-to-16-byte struct. This routine picks
2477	/// the best LLVM IR type to represent this, which may be i64 or may be anything
2478	/// else that the backend will pass in a GPR that works better (e.g. i8, %foo,*
2479	/// etc).
2480	///
2481	/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
2482	/// the source type. IROffset is an offset in bytes into the LLVM IR type that
2483	/// the 8-byte value references. PrefType may be null.
2484	///
2485	/// SourceTy is the source-level type for the entire argument. SourceOffset is
2486	/// an offset into this that we're processing (which is always either 0 or 8).
2487	///
2488	llvm::Type *X86_64ABIInfo::
2489	GetINTEGERTypeAtOffset(llvm::Type IRType, unsigned* IROffset,
2490	QualType SourceTy, unsigned SourceOffset) const {
2491	// If we're dealing with an un-offset LLVM IR type, then it means that we're
2492	// returning an 8-byte unit starting with it. See if we can safely use it.
2493	if (IROffset == `0`) {
2494	// Pointers and int64's always fill the 8-byte unit.
2495	if ((isa<llvm::PointerType>(Val: IRType) && Has64BitPointers) \|\|
2496	IRType->isIntegerTy(Bitwidth: `64`))
2497	return IRType;
2498
2499	// If we have a 1/2/4-byte integer, we can use it only if the rest of the
2500	// goodness in the source type is just tail padding. This is allowed to
2501	// kick in for struct {double,int} on the int, but not on
2502	// struct{double,int,int} because we wouldn't return the second int. We
2503	// have to do this analysis on the source type because we can't depend on
2504	// unions being lowered a specific way etc.
2505	if (IRType->isIntegerTy(Bitwidth: `8`) \|\| IRType->isIntegerTy(Bitwidth: `16`) \|\|
2506	IRType->isIntegerTy(Bitwidth: `32`) \|\|
2507	(isa<llvm::PointerType>(Val: IRType) && !Has64BitPointers)) {
2508	unsigned BitWidth = isa<llvm::PointerType>(Val: IRType) ? `32` :
2509	cast<llvm::IntegerType>(Val: IRType)->getBitWidth();
2510
2511	if (BitsContainNoUserData(Ty: SourceTy, StartBit: SourceOffset*`8`+BitWidth,
2512	EndBit: SourceOffset*`8`+`64`, Context&: getContext()))
2513	return IRType;
2514	}
2515	}
2516
2517	if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val: IRType)) {
2518	// If this is a struct, recurse into the field at the specified offset.
2519	const llvm::StructLayout *SL = getDataLayout().getStructLayout(Ty: STy);
2520	if (IROffset < SL->getSizeInBytes()) {
2521	unsigned FieldIdx = SL->getElementContainingOffset(FixedOffset: IROffset);
2522	IROffset -= SL->getElementOffset(Idx: FieldIdx);
2523
2524	return GetINTEGERTypeAtOffset(IRType: STy->getElementType(N: FieldIdx), IROffset,
2525	SourceTy, SourceOffset);
2526	}
2527	}
2528
2529	if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(Val: IRType)) {
2530	llvm::Type *EltTy = ATy->getElementType();
2531	unsigned EltSize = getDataLayout().getTypeAllocSize(Ty: EltTy);
2532	unsigned EltOffset = IROffset/EltSize*EltSize;
2533	return GetINTEGERTypeAtOffset(IRType: EltTy, IROffset: IROffset-EltOffset, SourceTy,
2534	SourceOffset);
2535	}
2536
2537	// Okay, we don't have any better idea of what to pass, so we pass this in an
2538	// integer register that isn't too big to fit the rest of the struct.
2539	unsigned TySizeInBytes =
2540	(unsigned)getContext().getTypeSizeInChars(T: SourceTy).getQuantity();
2541
2542	assert(TySizeInBytes != SourceOffset && "Empty field?");
2543
2544	// It is always safe to classify this as an integer type up to i64 that
2545	// isn't larger than the structure.
2546	return llvm::IntegerType::get(C&: getVMContext(),
2547	NumBits: std::min(a: TySizeInBytes-SourceOffset, b: `8U`)*`8`);
2548	}
2549
2550
2551	/// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
2552	/// be used as elements of a two register pair to pass or return, return a
2553	/// first class aggregate to represent them. For example, if the low part of
2554	/// a by-value argument should be passed as i32 and the high part as float,*
2555	/// return {i32, float}.*
2556	static llvm::Type *
2557	GetX86_64ByValArgumentPair(llvm::Type Lo, llvm::Type Hi,
2558	const llvm::DataLayout &TD) {
2559	// In order to correctly satisfy the ABI, we need to the high part to start
2560	// at offset 8. If the high and low parts we inferred are both 4-byte types
2561	// (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
2562	// the second element at offset 8. Check for this:
2563	unsigned LoSize = (unsigned)TD.getTypeAllocSize(Ty: Lo);
2564	llvm::Align HiAlign = TD.getABITypeAlign(Ty: Hi);
2565	unsigned HiStart = llvm::alignTo(Size: LoSize, A: HiAlign);
2566	assert(HiStart != `0` && HiStart <= `8` && "Invalid x86-64 argument pair!");
2567
2568	// To handle this, we have to increase the size of the low part so that the
2569	// second element will start at an 8 byte offset. We can't increase the size
2570	// of the second element because it might make us access off the end of the
2571	// struct.
2572	if (HiStart != `8`) {
2573	// There are usually two sorts of types the ABI generation code can produce
2574	// for the low part of a pair that aren't 8 bytes in size: half, float or
2575	// i8/i16/i32. This can also include pointers when they are 32-bit (X32 and
2576	// NaCl).
2577	// Promote these to a larger type.
2578	if (Lo->isHalfTy() \|\| Lo->isFloatTy())
2579	Lo = llvm::Type::getDoubleTy(C&: Lo->getContext());
2580	else {
2581	assert((Lo->isIntegerTy() \|\| Lo->isPointerTy())
2582	&& "Invalid/unknown lo type");
2583	Lo = llvm::Type::getInt64Ty(C&: Lo->getContext());
2584	}
2585	}
2586
2587	llvm::StructType *Result = llvm::StructType::get(elt1: Lo, elts: Hi);
2588
2589	// Verify that the second element is at an 8-byte offset.
2590	assert(TD.getStructLayout(Result)->getElementOffset(`1`) == `8` &&
2591	"Invalid x86-64 argument pair!");
2592	return Result;
2593	}
2594
2595	ABIArgInfo X86_64ABIInfo::
2596	classifyReturnType(QualType RetTy) const {
2597	// AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
2598	// classification algorithm.
2599	X86_64ABIInfo::Class Lo, Hi;
2600	classify(Ty: RetTy, OffsetBase: `0`, Lo, Hi, /isNamedArg/ true);
2601
2602	// Check some invariants.
2603	assert((Hi != Memory \|\| Lo == Memory) && "Invalid memory classification.");
2604	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp classification.");
2605
2606	llvm::Type ResType = nullptr*;
2607	switch (Lo) {
2608	case NoClass:
2609	if (Hi == NoClass)
2610	return ABIArgInfo::getIgnore();
2611	// If the low part is just padding, it takes no register, leave ResType
2612	// null.
2613	assert((Hi == SSE \|\| Hi == Integer \|\| Hi == X87Up) &&
2614	"Unknown missing lo part");
2615	break;
2616
2617	case SSEUp:
2618	case X87Up:
2619	llvm_unreachable("Invalid classification for lo word.");
2620
2621	// AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
2622	// hidden argument.
2623	case Memory:
2624	return getIndirectReturnResult(Ty: RetTy);
2625
2626	// AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
2627	// available register of the sequence %rax, %rdx is used.
2628	case Integer:
2629	ResType = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `0`, SourceTy: RetTy, SourceOffset: `0`);
2630
2631	// If we have a sign or zero extended integer, make sure to return Extend
2632	// so that the parameter gets the right LLVM IR attributes.
2633	if (Hi == NoClass && isa<llvm::IntegerType>(Val: ResType)) {
2634	// Treat an enum type as its underlying type.
2635	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
2636	RetTy = EnumTy->getDecl()->getIntegerType();
2637
2638	if (RetTy ->isIntegralOrEnumerationType() &&
2639	isPromotableIntegerTypeForABI(Ty: RetTy))
2640	return ABIArgInfo::getExtend(Ty: RetTy);
2641	}
2642	break;
2643
2644	// AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
2645	// available SSE register of the sequence %xmm0, %xmm1 is used.
2646	case SSE:
2647	ResType = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `0`, SourceTy: RetTy, SourceOffset: `0`);
2648	break;
2649
2650	// AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
2651	// returned on the X87 stack in %st0 as 80-bit x87 number.
2652	case X87:
2653	ResType = llvm::Type::getX86_FP80Ty(C&: getVMContext());
2654	break;
2655
2656	// AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
2657	// part of the value is returned in %st0 and the imaginary part in
2658	// %st1.
2659	case ComplexX87:
2660	assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
2661	ResType = llvm::StructType::get(elt1: llvm::Type::getX86_FP80Ty(C&: getVMContext()),
2662	elts: llvm::Type::getX86_FP80Ty(C&: getVMContext()));
2663	break;
2664	}
2665
2666	llvm::Type HighPart = nullptr*;
2667	switch (Hi) {
2668	// Memory was handled previously and X87 should
2669	// never occur as a hi class.
2670	case Memory:
2671	case X87:
2672	llvm_unreachable("Invalid classification for hi word.");
2673
2674	case ComplexX87: // Previously handled.
2675	case NoClass:
2676	break;
2677
2678	case Integer:
2679	HighPart = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `8`, SourceTy: RetTy, SourceOffset: `8`);
2680	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
2681	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2682	break;
2683	case SSE:
2684	HighPart = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `8`, SourceTy: RetTy, SourceOffset: `8`);
2685	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
2686	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2687	break;
2688
2689	// AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
2690	// is passed in the next available eightbyte chunk if the last used
2691	// vector register.
2692	//
2693	// SSEUP should always be preceded by SSE, just widen.
2694	case SSEUp:
2695	assert(Lo == SSE && "Unexpected SSEUp classification.");
2696	ResType = GetByteVectorType(Ty: RetTy);
2697	break;
2698
2699	// AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
2700	// returned together with the previous X87 value in %st0.
2701	case X87Up:
2702	// If X87Up is preceded by X87, we don't need to do
2703	// anything. However, in some cases with unions it may not be
2704	// preceded by X87. In such situations we follow gcc and pass the
2705	// extra bits in an SSE reg.
2706	if (Lo != X87) {
2707	HighPart = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `8`, SourceTy: RetTy, SourceOffset: `8`);
2708	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
2709	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2710	}
2711	break;
2712	}
2713
2714	// If a high part was specified, merge it together with the low part. It is
2715	// known to pass in the high eightbyte of the result. We do this by forming a
2716	// first class struct aggregate with the high and low part: {low, high}
2717	if (HighPart)
2718	ResType = GetX86_64ByValArgumentPair(Lo: ResType, Hi: HighPart, TD: getDataLayout());
2719
2720	return ABIArgInfo::getDirect(T: ResType);
2721	}
2722
2723	ABIArgInfo
2724	X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
2725	unsigned &neededInt, unsigned &neededSSE,
2726	bool isNamedArg, bool IsRegCall) const {
2727	Ty = useFirstFieldIfTransparentUnion(Ty);
2728
2729	X86_64ABIInfo::Class Lo, Hi;
2730	classify(Ty, OffsetBase: `0`, Lo, Hi, isNamedArg, IsRegCall);
2731
2732	// Check some invariants.
2733	// FIXME: Enforce these by construction.
2734	assert((Hi != Memory \|\| Lo == Memory) && "Invalid memory classification.");
2735	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp classification.");
2736
2737	neededInt = `0`;
2738	neededSSE = `0`;
2739	llvm::Type ResType = nullptr*;
2740	switch (Lo) {
2741	case NoClass:
2742	if (Hi == NoClass)
2743	return ABIArgInfo::getIgnore();
2744	// If the low part is just padding, it takes no register, leave ResType
2745	// null.
2746	assert((Hi == SSE \|\| Hi == Integer \|\| Hi == X87Up) &&
2747	"Unknown missing lo part");
2748	break;
2749
2750	// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
2751	// on the stack.
2752	case Memory:
2753
2754	// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
2755	// COMPLEX_X87, it is passed in memory.
2756	case X87:
2757	case ComplexX87:
2758	if (getRecordArgABI(T: Ty, CXXABI&: getCXXABI()) == CGCXXABI::RAA_Indirect)
2759	++neededInt;
2760	return getIndirectResult(Ty, freeIntRegs);
2761
2762	case SSEUp:
2763	case X87Up:
2764	llvm_unreachable("Invalid classification for lo word.");
2765
2766	// AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
2767	// available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
2768	// and %r9 is used.
2769	case Integer:
2770	++neededInt;
2771
2772	// Pick an 8-byte type based on the preferred type.
2773	ResType = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: Ty), IROffset: `0`, SourceTy: Ty, SourceOffset: `0`);
2774
2775	// If we have a sign or zero extended integer, make sure to return Extend
2776	// so that the parameter gets the right LLVM IR attributes.
2777	if (Hi == NoClass && isa<llvm::IntegerType>(Val: ResType)) {
2778	// Treat an enum type as its underlying type.
2779	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
2780	Ty = EnumTy->getDecl()->getIntegerType();
2781
2782	if (Ty ->isIntegralOrEnumerationType() &&
2783	isPromotableIntegerTypeForABI(Ty))
2784	return ABIArgInfo::getExtend(Ty, T: CGT.ConvertType(T: Ty));
2785	}
2786
2787	break;
2788
2789	// AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
2790	// available SSE register is used, the registers are taken in the
2791	// order from %xmm0 to %xmm7.
2792	case SSE: {
2793	llvm::Type *IRType = CGT.ConvertType(T: Ty);
2794	ResType = GetSSETypeAtOffset(IRType, IROffset: `0`, SourceTy: Ty, SourceOffset: `0`);
2795	++neededSSE;
2796	break;
2797	}
2798	}
2799
2800	llvm::Type HighPart = nullptr*;
2801	switch (Hi) {
2802	// Memory was handled previously, ComplexX87 and X87 should
2803	// never occur as hi classes, and X87Up must be preceded by X87,
2804	// which is passed in memory.
2805	case Memory:
2806	case X87:
2807	case ComplexX87:
2808	llvm_unreachable("Invalid classification for hi word.");
2809
2810	case NoClass: break;
2811
2812	case Integer:
2813	++neededInt;
2814	// Pick an 8-byte type based on the preferred type.
2815	HighPart = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: Ty), IROffset: `8`, SourceTy: Ty, SourceOffset: `8`);
2816
2817	if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
2818	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2819	break;
2820
2821	// X87Up generally doesn't occur here (long double is passed in
2822	// memory), except in situations involving unions.
2823	case X87Up:
2824	case SSE:
2825	++neededSSE;
2826	HighPart = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: Ty), IROffset: `8`, SourceTy: Ty, SourceOffset: `8`);
2827
2828	if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
2829	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2830	break;
2831
2832	// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
2833	// eightbyte is passed in the upper half of the last used SSE
2834	// register. This only happens when 128-bit vectors are passed.
2835	case SSEUp:
2836	assert(Lo == SSE && "Unexpected SSEUp classification");
2837	ResType = GetByteVectorType(Ty);
2838	break;
2839	}
2840
2841	// If a high part was specified, merge it together with the low part. It is
2842	// known to pass in the high eightbyte of the result. We do this by forming a
2843	// first class struct aggregate with the high and low part: {low, high}
2844	if (HighPart)
2845	ResType = GetX86_64ByValArgumentPair(Lo: ResType, Hi: HighPart, TD: getDataLayout());
2846
2847	return ABIArgInfo::getDirect(T: ResType);
2848	}
2849
2850	ABIArgInfo
2851	X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
2852	unsigned &NeededSSE,
2853	unsigned &MaxVectorWidth) const {
2854	auto RT = Ty ->getAs<RecordType>();
2855	assert(RT && "classifyRegCallStructType only valid with struct types");
2856
2857	if (RT->getDecl()->hasFlexibleArrayMember())
2858	return getIndirectReturnResult(Ty);
2859
2860	// Sum up bases
2861	if (auto CXXRD = dyn_cast<CXXRecordDecl>(Val: RT->getDecl())) {
2862	if (CXXRD->isDynamicClass()) {
2863	NeededInt = NeededSSE = `0`;
2864	return getIndirectReturnResult(Ty);
2865	}
2866
2867	for (const auto &I : CXXRD->bases())
2868	if (classifyRegCallStructTypeImpl(Ty: I.getType(), NeededInt, NeededSSE,
2869	MaxVectorWidth)
2870	.isIndirect()) {
2871	NeededInt = NeededSSE = `0`;
2872	return getIndirectReturnResult(Ty);
2873	}
2874	}
2875
2876	// Sum up members
2877	for (const auto *FD : RT->getDecl()->fields()) {
2878	QualType MTy = FD->getType();
2879	if (MTy ->isRecordType() && !MTy ->isUnionType()) {
2880	if (classifyRegCallStructTypeImpl(Ty: MTy, NeededInt, NeededSSE,
2881	MaxVectorWidth)
2882	.isIndirect()) {
2883	NeededInt = NeededSSE = `0`;
2884	return getIndirectReturnResult(Ty);
2885	}
2886	} else {
2887	unsigned LocalNeededInt, LocalNeededSSE;
2888	if (classifyArgumentType(Ty: MTy, UINT_MAX, neededInt&: LocalNeededInt, neededSSE&: LocalNeededSSE,
2889	isNamedArg: true, IsRegCall: true)
2890	.isIndirect()) {
2891	NeededInt = NeededSSE = `0`;
2892	return getIndirectReturnResult(Ty);
2893	}
2894	if (const auto *AT = getContext().getAsConstantArrayType(T: MTy))
2895	MTy = AT->getElementType();
2896	if (const auto *VT = MTy ->getAs<VectorType>())
2897	if (getContext().getTypeSize(T: VT) > MaxVectorWidth)
2898	MaxVectorWidth = getContext().getTypeSize(T: VT);
2899	NeededInt += LocalNeededInt;
2900	NeededSSE += LocalNeededSSE;
2901	}
2902	}
2903
2904	return ABIArgInfo::getDirect();
2905	}
2906
2907	ABIArgInfo
2908	X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
2909	unsigned &NeededSSE,
2910	unsigned &MaxVectorWidth) const {
2911
2912	NeededInt = `0`;
2913	NeededSSE = `0`;
2914	MaxVectorWidth = `0`;
2915
2916	return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE,
2917	MaxVectorWidth);
2918	}
2919
2920	void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
2921
2922	const unsigned CallingConv = FI.getCallingConvention();
2923	// It is possible to force Win64 calling convention on any x86_64 target by
2924	// using __attribute__((ms_abi)). In such case to correctly emit Win64
2925	// compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
2926	if (CallingConv == llvm::CallingConv::Win64) {
2927	WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel);
2928	Win64ABIInfo.computeInfo(FI);
2929	return;
2930	}
2931
2932	bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
2933
2934	// Keep track of the number of assigned registers.
2935	unsigned FreeIntRegs = IsRegCall ? `11` : `6`;
2936	unsigned FreeSSERegs = IsRegCall ? `16` : `8`;
2937	unsigned NeededInt = `0`, NeededSSE = `0`, MaxVectorWidth = `0`;
2938
2939	if (!::classifyReturnType(CXXABI: getCXXABI(), FI, Info: *this)) {
2940	if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
2941	!FI.getReturnType()->getTypePtr()->isUnionType()) {
2942	FI.getReturnInfo() = classifyRegCallStructType(
2943	Ty: FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth);
2944	if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
2945	FreeIntRegs -= NeededInt;
2946	FreeSSERegs -= NeededSSE;
2947	} else {
2948	FI.getReturnInfo() = getIndirectReturnResult(Ty: FI.getReturnType());
2949	}
2950	} else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
2951	getContext().getCanonicalType(T: FI.getReturnType()
2952	->getAs<ComplexType>()
2953	->getElementType()) ==
2954	getContext().LongDoubleTy)
2955	// Complex Long Double Type is passed in Memory when Regcall
2956	// calling convention is used.
2957	FI.getReturnInfo() = getIndirectReturnResult(Ty: FI.getReturnType());
2958	else
2959	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
2960	}
2961
2962	// If the return value is indirect, then the hidden argument is consuming one
2963	// integer register.
2964	if (FI.getReturnInfo().isIndirect())
2965	--FreeIntRegs;
2966	else if (NeededSSE && MaxVectorWidth > `0`)
2967	FI.setMaxVectorWidth(MaxVectorWidth);
2968
2969	// The chain argument effectively gives us another free register.
2970	if (FI.isChainCall())
2971	++FreeIntRegs;
2972
2973	unsigned NumRequiredArgs = FI.getNumRequiredArgs();
2974	// AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
2975	// get assigned (in left-to-right order) for passing as follows...
2976	unsigned ArgNo = `0`;
2977	for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
2978	it != ie; ++it, ++ArgNo) {
2979	bool IsNamedArg = ArgNo < NumRequiredArgs;
2980
2981	if (IsRegCall && it->type ->isStructureOrClassType())
2982	it->info = classifyRegCallStructType(Ty: it->type, NeededInt, NeededSSE,
2983	MaxVectorWidth);
2984	else
2985	it->info = classifyArgumentType(Ty: it->type, freeIntRegs: FreeIntRegs, neededInt&: NeededInt,
2986	neededSSE&: NeededSSE, isNamedArg: IsNamedArg);
2987
2988	// AMD64-ABI 3.2.3p3: If there are no registers available for any
2989	// eightbyte of an argument, the whole argument is passed on the
2990	// stack. If registers have already been assigned for some
2991	// eightbytes of such an argument, the assignments get reverted.
2992	if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
2993	FreeIntRegs -= NeededInt;
2994	FreeSSERegs -= NeededSSE;
2995	if (MaxVectorWidth > FI.getMaxVectorWidth())
2996	FI.setMaxVectorWidth(MaxVectorWidth);
2997	} else {
2998	it->info = getIndirectResult(Ty: it->type, freeIntRegs: FreeIntRegs);
2999	}
3000	}
3001	}
3002
3003	static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
3004	Address VAListAddr, QualType Ty) {
3005	Address overflow_arg_area_p =
3006	CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `2`, Name: "overflow_arg_area_p");
3007	llvm::Value *overflow_arg_area =
3008	CGF.Builder.CreateLoad(Addr: overflow_arg_area_p, Name: "overflow_arg_area");
3009
3010	// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
3011	// byte boundary if alignment needed by type exceeds 8 byte boundary.
3012	// It isn't stated explicitly in the standard, but in practice we use
3013	// alignment greater than 16 where necessary.
3014	CharUnits Align = CGF.getContext().getTypeAlignInChars(T: Ty);
3015	if (Align > CharUnits::fromQuantity(Quantity: `8`)) {
3016	overflow_arg_area = emitRoundPointerUpToAlignment(CGF, Ptr: overflow_arg_area,
3017	Align);
3018	}
3019
3020	// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
3021	llvm::Type *LTy = CGF.ConvertTypeForMem(T: Ty);
3022	llvm::Value *Res = overflow_arg_area;
3023
3024	// AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
3025	// l->overflow_arg_area + sizeof(type).
3026	// AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
3027	// an 8 byte boundary.
3028
3029	uint64_t SizeInBytes = (CGF.getContext().getTypeSize(T: Ty) + `7`) / `8`;
3030	llvm::Value *Offset =
3031	llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: (SizeInBytes + `7`) & ~`7`);
3032	overflow_arg_area = CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: overflow_arg_area,
3033	IdxList: Offset, Name: "overflow_arg_area.next");
3034	CGF.Builder.CreateStore(Val: overflow_arg_area, Addr: overflow_arg_area_p);
3035
3036	// AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
3037	return Address (Res, LTy, Align);
3038	}
3039
3040	RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
3041	QualType Ty, AggValueSlot Slot) const {
3042	// Assume that va_list type is correct; should be pointer to LLVM type:
3043	// struct {
3044	// i32 gp_offset;
3045	// i32 fp_offset;
3046	// i8 overflow_arg_area;*
3047	// i8 reg_save_area;*
3048	// };
3049	unsigned neededInt, neededSSE;
3050
3051	Ty = getContext().getCanonicalType(T: Ty);
3052	ABIArgInfo AI = classifyArgumentType(Ty, freeIntRegs: `0`, neededInt, neededSSE,
3053	/isNamedArg/false);
3054
3055	// Empty records are ignored for parameter passing purposes.
3056	if (AI.isIgnore())
3057	return Slot.asRValue();
3058
3059	// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
3060	// in the registers. If not go to step 7.
3061	if (!neededInt && !neededSSE)
3062	return CGF.EmitLoadOfAnyValue(
3063	V: CGF.MakeAddrLValue(Addr: EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty), T: Ty),
3064	Slot);
3065
3066	// AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
3067	// general purpose registers needed to pass type and num_fp to hold
3068	// the number of floating point registers needed.
3069
3070	// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
3071	// registers. In the case: l->gp_offset > 48 - num_gp 8 or*
3072	// l->fp_offset > 304 - num_fp 16 go to step 7.*
3073	//
3074	// NOTE: 304 is a typo, there are (6 8 + 8 * 16) = 176 bytes of*
3075	// register save space).
3076
3077	llvm::Value InRegs = nullptr*;
3078	Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
3079	llvm::Value gp_offset = nullptr, fp_offset = nullptr;
3080	if (neededInt) {
3081	gp_offset_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `0`, Name: "gp_offset_p");
3082	gp_offset = CGF.Builder.CreateLoad(Addr: gp_offset_p, Name: "gp_offset");
3083	InRegs = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: `48` - neededInt * `8`);
3084	InRegs = CGF.Builder.CreateICmpULE(LHS: gp_offset, RHS: InRegs, Name: "fits_in_gp");
3085	}
3086
3087	if (neededSSE) {
3088	fp_offset_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `1`, Name: "fp_offset_p");
3089	fp_offset = CGF.Builder.CreateLoad(Addr: fp_offset_p, Name: "fp_offset");
3090	llvm::Value *FitsInFP =
3091	llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: `176` - neededSSE * `16`);
3092	FitsInFP = CGF.Builder.CreateICmpULE(LHS: fp_offset, RHS: FitsInFP, Name: "fits_in_fp");
3093	InRegs = InRegs ? CGF.Builder.CreateAnd(LHS: InRegs, RHS: FitsInFP) : FitsInFP;
3094	}
3095
3096	llvm::BasicBlock *InRegBlock = CGF.createBasicBlock(name: "vaarg.in_reg");
3097	llvm::BasicBlock *InMemBlock = CGF.createBasicBlock(name: "vaarg.in_mem");
3098	llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "vaarg.end");
3099	CGF.Builder.CreateCondBr(Cond: InRegs, True: InRegBlock, False: InMemBlock);
3100
3101	// Emit code to load the value if it was passed in registers.
3102
3103	CGF.EmitBlock(BB: InRegBlock);
3104
3105	// AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
3106	// an offset of l->gp_offset and/or l->fp_offset. This may require
3107	// copying to a temporary location in case the parameter is passed
3108	// in different register classes or requires an alignment greater
3109	// than 8 for general purpose registers and 16 for XMM registers.
3110	//
3111	// FIXME: This really results in shameful code when we end up needing to
3112	// collect arguments from different places; often what should result in a
3113	// simple assembling of a structure from scattered addresses has many more
3114	// loads than necessary. Can we clean this up?
3115	llvm::Type *LTy = CGF.ConvertTypeForMem(T: Ty);
3116	llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
3117	Addr: CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `3`), Name: "reg_save_area");
3118
3119	Address RegAddr = Address::invalid();
3120	if (neededInt && neededSSE) {
3121	// FIXME: Cleanup.
3122	assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
3123	llvm::StructType *ST = cast<llvm::StructType>(Val: AI.getCoerceToType());
3124	Address Tmp = CGF.CreateMemTemp(T: Ty);
3125	Tmp = Tmp.withElementType(ElemTy: ST);
3126	assert(ST->getNumElements() == `2` && "Unexpected ABI info for mixed regs");
3127	llvm::Type *TyLo = ST->getElementType(N: `0`);
3128	llvm::Type *TyHi = ST->getElementType(N: `1`);
3129	assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
3130	"Unexpected ABI info for mixed regs");
3131	llvm::Value *GPAddr =
3132	CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: gp_offset);
3133	llvm::Value *FPAddr =
3134	CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: fp_offset);
3135	llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
3136	llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
3137
3138	// Copy the first element.
3139	// FIXME: Our choice of alignment here and below is probably pessimistic.
3140	llvm::Value *V = CGF.Builder.CreateAlignedLoad(
3141	Ty: TyLo, Addr: RegLoAddr,
3142	Align: CharUnits::fromQuantity(Quantity: getDataLayout().getABITypeAlign(Ty: TyLo)));
3143	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `0`));
3144
3145	// Copy the second element.
3146	V = CGF.Builder.CreateAlignedLoad(
3147	Ty: TyHi, Addr: RegHiAddr,
3148	Align: CharUnits::fromQuantity(Quantity: getDataLayout().getABITypeAlign(Ty: TyHi)));
3149	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `1`));
3150
3151	RegAddr = Tmp.withElementType(ElemTy: LTy);
3152	} else if (neededInt \|\| neededSSE == `1`) {
3153	// Copy to a temporary if necessary to ensure the appropriate alignment.
3154	auto TInfo = getContext().getTypeInfoInChars(T: Ty);
3155	uint64_t TySize = TInfo.Width.getQuantity();
3156	CharUnits TyAlign = TInfo.Align;
3157	llvm::Type CoTy = nullptr*;
3158	if (AI.isDirect())
3159	CoTy = AI.getCoerceToType();
3160
3161	llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset;
3162	uint64_t Alignment = neededInt ? `8` : `16`;
3163	uint64_t RegSize = neededInt ? neededInt * `8` : `16`;
3164	// There are two cases require special handling:
3165	// 1)
3166	// ```
3167	// struct {
3168	// struct {} a[8];
3169	// int b;
3170	// };
3171	// ```
3172	// The lower 8 bytes of the structure are not stored,
3173	// so an 8-byte offset is needed when accessing the structure.
3174	// 2)
3175	// ```
3176	// struct {
3177	// long long a;
3178	// struct {} b;
3179	// };
3180	// ```
3181	// The stored size of this structure is smaller than its actual size,
3182	// which may lead to reading past the end of the register save area.
3183	if (CoTy && (AI.getDirectOffset() == `8` \|\| RegSize < TySize)) {
3184	Address Tmp = CGF.CreateMemTemp(T: Ty);
3185	llvm::Value *Addr =
3186	CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: GpOrFpOffset);
3187	llvm::Value *Src = CGF.Builder.CreateAlignedLoad(Ty: CoTy, Addr, Align: TyAlign);
3188	llvm::Value *PtrOffset =
3189	llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: AI.getDirectOffset());
3190	Address Dst = Address (
3191	CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: Tmp.getBasePointer(), IdxList: PtrOffset),
3192	LTy, TyAlign);
3193	CGF.Builder.CreateStore(Val: Src, Addr: Dst);
3194	RegAddr = Tmp.withElementType(ElemTy: LTy);
3195	} else {
3196	RegAddr =
3197	Address (CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: GpOrFpOffset),
3198	LTy, CharUnits::fromQuantity(Quantity: Alignment));
3199
3200	// Copy into a temporary if the type is more aligned than the
3201	// register save area.
3202	if (neededInt && TyAlign.getQuantity() > `8`) {
3203	Address Tmp = CGF.CreateMemTemp(T: Ty);
3204	CGF.Builder.CreateMemCpy(Dest: Tmp, Src: RegAddr, Size: TySize, IsVolatile: false);
3205	RegAddr = Tmp;
3206	}
3207	}
3208
3209	} else {
3210	assert(neededSSE == `2` && "Invalid number of needed registers!");
3211	// SSE registers are spaced 16 bytes apart in the register save
3212	// area, we need to collect the two eightbytes together.
3213	// The ABI isn't explicit about this, but it seems reasonable
3214	// to assume that the slots are 16-byte aligned, since the stack is
3215	// naturally 16-byte aligned and the prologue is expected to store
3216	// all the SSE registers to the RSA.
3217	Address RegAddrLo = Address (CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea,
3218	IdxList: fp_offset),
3219	CGF.Int8Ty, CharUnits::fromQuantity(Quantity: `16`));
3220	Address RegAddrHi =
3221	CGF.Builder.CreateConstInBoundsByteGEP(Addr: RegAddrLo,
3222	Offset: CharUnits::fromQuantity(Quantity: `16`));
3223	llvm::Type *ST = AI.canHaveCoerceToType()
3224	? AI.getCoerceToType()
3225	: llvm::StructType::get(elt1: CGF.DoubleTy, elts: CGF.DoubleTy);
3226	llvm::Value *V;
3227	Address Tmp = CGF.CreateMemTemp(T: Ty);
3228	Tmp = Tmp.withElementType(ElemTy: ST);
3229	V = CGF.Builder.CreateLoad(
3230	Addr: RegAddrLo.withElementType(ElemTy: ST->getStructElementType(N: `0`)));
3231	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `0`));
3232	V = CGF.Builder.CreateLoad(
3233	Addr: RegAddrHi.withElementType(ElemTy: ST->getStructElementType(N: `1`)));
3234	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `1`));
3235
3236	RegAddr = Tmp.withElementType(ElemTy: LTy);
3237	}
3238
3239	// AMD64-ABI 3.5.7p5: Step 5. Set:
3240	// l->gp_offset = l->gp_offset + num_gp 8*
3241	// l->fp_offset = l->fp_offset + num_fp 16.*
3242	if (neededInt) {
3243	llvm::Value Offset = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: neededInt `8`);
3244	CGF.Builder.CreateStore(Val: CGF.Builder.CreateAdd(LHS: gp_offset, RHS: Offset),
3245	Addr: gp_offset_p);
3246	}
3247	if (neededSSE) {
3248	llvm::Value Offset = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: neededSSE `16`);
3249	CGF.Builder.CreateStore(Val: CGF.Builder.CreateAdd(LHS: fp_offset, RHS: Offset),
3250	Addr: fp_offset_p);
3251	}
3252	CGF.EmitBranch(Block: ContBlock);
3253
3254	// Emit code to load the value if it was passed in memory.
3255
3256	CGF.EmitBlock(BB: InMemBlock);
3257	Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
3258
3259	// Return the appropriate result.
3260
3261	CGF.EmitBlock(BB: ContBlock);
3262	Address ResAddr = emitMergePHI(CGF, Addr1: RegAddr, Block1: InRegBlock, Addr2: MemAddr, Block2: InMemBlock,
3263	Name: "vaarg.addr");
3264	return CGF.EmitLoadOfAnyValue(V: CGF.MakeAddrLValue(Addr: ResAddr, T: Ty), Slot);
3265	}
3266
3267	RValue X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
3268	QualType Ty, AggValueSlot Slot) const {
3269	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3270	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3271	uint64_t Width = getContext().getTypeSize(T: Ty);
3272	bool IsIndirect = Width > `64` \|\| !llvm::isPowerOf2_64(Value: Width);
3273
3274	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, IsIndirect,
3275	ValueInfo: CGF.getContext().getTypeInfoInChars(T: Ty),
3276	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `8`),
3277	/allowHigherAlign/ AllowHigherAlign: false, Slot);
3278	}
3279
3280	ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall(
3281	QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo &current) const {
3282	const Type Base = nullptr*;
3283	uint64_t NumElts = `0`;
3284
3285	if (!Ty ->isBuiltinType() && !Ty ->isVectorType() &&
3286	isHomogeneousAggregate(Ty, Base, Members&: NumElts) && FreeSSERegs >= NumElts) {
3287	FreeSSERegs -= NumElts;
3288	return getDirectX86Hva();
3289	}
3290	return current;
3291	}
3292
3293	ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
3294	bool IsReturnType, bool IsVectorCall,
3295	bool IsRegCall) const {
3296
3297	if (Ty ->isVoidType())
3298	return ABIArgInfo::getIgnore();
3299
3300	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
3301	Ty = EnumTy->getDecl()->getIntegerType();
3302
3303	TypeInfo Info = getContext().getTypeInfo(T: Ty);
3304	uint64_t Width = Info.Width;
3305	CharUnits Align = getContext().toCharUnitsFromBits(BitSize: Info.Align);
3306
3307	const RecordType *RT = Ty ->getAs<RecordType>();
3308	if (RT) {
3309	if (!IsReturnType) {
3310	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, CXXABI&: getCXXABI()))
3311	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
3312	ByVal: RAA == CGCXXABI::RAA_DirectInMemory);
3313	}
3314
3315	if (RT->getDecl()->hasFlexibleArrayMember())
3316	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
3317	/ByVal=/false);
3318	}
3319
3320	const Type Base = nullptr*;
3321	uint64_t NumElts = `0`;
3322	// vectorcall adds the concept of a homogenous vector aggregate, similar to
3323	// other targets.
3324	if ((IsVectorCall \|\| IsRegCall) &&
3325	isHomogeneousAggregate(Ty, Base, Members&: NumElts)) {
3326	if (IsRegCall) {
3327	if (FreeSSERegs >= NumElts) {
3328	FreeSSERegs -= NumElts;
3329	if (IsReturnType \|\| Ty ->isBuiltinType() \|\| Ty ->isVectorType())
3330	return ABIArgInfo::getDirect();
3331	return ABIArgInfo::getExpand();
3332	}
3333	return ABIArgInfo::getIndirect(
3334	Alignment: Align, /AddrSpace=/getDataLayout().getAllocaAddrSpace(),
3335	/ByVal=/false);
3336	} else if (IsVectorCall) {
3337	if (FreeSSERegs >= NumElts &&
3338	(IsReturnType \|\| Ty ->isBuiltinType() \|\| Ty ->isVectorType())) {
3339	FreeSSERegs -= NumElts;
3340	return ABIArgInfo::getDirect();
3341	} else if (IsReturnType) {
3342	return ABIArgInfo::getExpand();
3343	} else if (!Ty ->isBuiltinType() && !Ty ->isVectorType()) {
3344	// HVAs are delayed and reclassified in the 2nd step.
3345	return ABIArgInfo::getIndirect(
3346	Alignment: Align, /AddrSpace=/getDataLayout().getAllocaAddrSpace(),
3347	/ByVal=/false);
3348	}
3349	}
3350	}
3351
3352	if (Ty ->isMemberPointerType()) {
3353	// If the member pointer is represented by an LLVM int or ptr, pass it
3354	// directly.
3355	llvm::Type *LLTy = CGT.ConvertType(T: Ty);
3356	if (LLTy->isPointerTy() \|\| LLTy->isIntegerTy())
3357	return ABIArgInfo::getDirect();
3358	}
3359
3360	if (RT \|\| Ty ->isAnyComplexType() \|\| Ty ->isMemberPointerType()) {
3361	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3362	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3363	if (Width > `64` \|\| !llvm::isPowerOf2_64(Value: Width))
3364	return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
3365	/ByVal=/false);
3366
3367	// Otherwise, coerce it to a small integer.
3368	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(), NumBits: Width));
3369	}
3370
3371	if (const BuiltinType *BT = Ty ->getAs<BuiltinType>()) {
3372	switch (BT->getKind()) {
3373	case BuiltinType::Bool:
3374	// Bool type is always extended to the ABI, other builtin types are not
3375	// extended.
3376	return ABIArgInfo::getExtend(Ty);
3377
3378	case BuiltinType::LongDouble:
3379	// Mingw64 GCC uses the old 80 bit extended precision floating point
3380	// unit. It passes them indirectly through memory.
3381	if (IsMingw64) {
3382	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
3383	if (LDF == &llvm::APFloat::x87DoubleExtended())
3384	return ABIArgInfo::getIndirect(
3385	Alignment: Align, /AddrSpace=/getDataLayout().getAllocaAddrSpace(),
3386	/ByVal=/false);
3387	}
3388	break;
3389
3390	case BuiltinType::Int128:
3391	case BuiltinType::UInt128:
3392	case BuiltinType::Float128:
3393	// 128-bit float and integer types share the same ABI.
3394
3395	// If it's a parameter type, the normal ABI rule is that arguments larger
3396	// than 8 bytes are passed indirectly. GCC follows it. We follow it too,
3397	// even though it isn't particularly efficient.
3398	if (!IsReturnType)
3399	return ABIArgInfo::getIndirect(
3400	Alignment: Align, /AddrSpace=/getDataLayout().getAllocaAddrSpace(),
3401	/ByVal=/false);
3402
3403	// Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
3404	// Clang matches them for compatibility.
3405	// NOTE: GCC actually returns f128 indirectly but will hopefully change.
3406	// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054#c8.
3407	return ABIArgInfo::getDirect(T: llvm::FixedVectorType::get(
3408	ElementType: llvm::Type::getInt64Ty(C&: getVMContext()), NumElts: `2`));
3409
3410	default:
3411	break;
3412	}
3413	}
3414
3415	if (Ty ->isBitIntType()) {
3416	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3417	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3418	// However, non-power-of-two bit-precise integers will be passed as 1, 2, 4,
3419	// or 8 bytes anyway as long is it fits in them, so we don't have to check
3420	// the power of 2.
3421	if (Width <= `64`)
3422	return ABIArgInfo::getDirect();
3423	return ABIArgInfo::getIndirect(
3424	Alignment: Align, /AddrSpace=/getDataLayout().getAllocaAddrSpace(),
3425	/ByVal=/false);
3426	}
3427
3428	return ABIArgInfo::getDirect();
3429	}
3430
3431	void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
3432	const unsigned CC = FI.getCallingConvention();
3433	bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
3434	bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;
3435
3436	// If __attribute__((sysv_abi)) is in use, use the SysV argument
3437	// classification rules.
3438	if (CC == llvm::CallingConv::X86_64_SysV) {
3439	X86_64ABIInfo SysVABIInfo(CGT, AVXLevel);
3440	SysVABIInfo.computeInfo(FI);
3441	return;
3442	}
3443
3444	unsigned FreeSSERegs = `0`;
3445	if (IsVectorCall) {
3446	// We can use up to 4 SSE return registers with vectorcall.
3447	FreeSSERegs = `4`;
3448	} else if (IsRegCall) {
3449	// RegCall gives us 16 SSE registers.
3450	FreeSSERegs = `16`;
3451	}
3452
3453	if (!getCXXABI().classifyReturnType(FI))
3454	FI.getReturnInfo() = classify(Ty: FI.getReturnType(), FreeSSERegs, IsReturnType: true,
3455	IsVectorCall, IsRegCall);
3456
3457	if (IsVectorCall) {
3458	// We can use up to 6 SSE register parameters with vectorcall.
3459	FreeSSERegs = `6`;
3460	} else if (IsRegCall) {
3461	// RegCall gives us 16 SSE registers, we can reuse the return registers.
3462	FreeSSERegs = `16`;
3463	}
3464
3465	unsigned ArgNum = `0`;
3466	unsigned ZeroSSERegs = `0`;
3467	for (auto &I : FI.arguments()) {
3468	// Vectorcall in x64 only permits the first 6 arguments to be passed as
3469	// XMM/YMM registers. After the sixth argument, pretend no vector
3470	// registers are left.
3471	unsigned *MaybeFreeSSERegs =
3472	(IsVectorCall && ArgNum >= `6`) ? &ZeroSSERegs : &FreeSSERegs;
3473	I.info =
3474	classify(Ty: I.type, FreeSSERegs&: MaybeFreeSSERegs, IsReturnType: false*, IsVectorCall, IsRegCall);
3475	++ArgNum;
3476	}
3477
3478	if (IsVectorCall) {
3479	// For vectorcall, assign aggregate HVAs to any free vector registers in a
3480	// second pass.
3481	for (auto &I : FI.arguments())
3482	I.info = reclassifyHvaArgForVectorCall(Ty: I.type, FreeSSERegs, current: I.info);
3483	}
3484	}
3485
3486	RValue WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
3487	QualType Ty, AggValueSlot Slot) const {
3488	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3489	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3490	uint64_t Width = getContext().getTypeSize(T: Ty);
3491	bool IsIndirect = Width > `64` \|\| !llvm::isPowerOf2_64(Value: Width);
3492
3493	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, IsIndirect,
3494	ValueInfo: CGF.getContext().getTypeInfoInChars(T: Ty),
3495	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `8`),
3496	/allowHigherAlign/ AllowHigherAlign: false, Slot);
3497	}
3498
3499	std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo(
3500	CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI,
3501	unsigned NumRegisterParameters, bool SoftFloatABI) {
3502	bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(
3503	Triple: CGM.getTriple(), Opts: CGM.getCodeGenOpts());
3504	return std::make_unique<X86_32TargetCodeGenInfo>(
3505	args&: CGM.getTypes(), args&: DarwinVectorABI, args&: RetSmallStructInRegABI, args&: Win32StructABI,
3506	args&: NumRegisterParameters, args&: SoftFloatABI);
3507	}
3508
3509	std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo(
3510	CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI,
3511	unsigned NumRegisterParameters) {
3512	bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(
3513	Triple: CGM.getTriple(), Opts: CGM.getCodeGenOpts());
3514	return std::make_unique<WinX86_32TargetCodeGenInfo>(
3515	args&: CGM.getTypes(), args&: DarwinVectorABI, args&: RetSmallStructInRegABI, args&: Win32StructABI,
3516	args&: NumRegisterParameters);
3517	}
3518
3519	std::unique_ptr<TargetCodeGenInfo>
3520	CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM,
3521	X86AVXABILevel AVXLevel) {
3522	return std::make_unique<X86_64TargetCodeGenInfo>(args&: CGM.getTypes(), args&: AVXLevel);
3523	}
3524
3525	std::unique_ptr<TargetCodeGenInfo>
3526	CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM,
3527	X86AVXABILevel AVXLevel) {
3528	return std::make_unique<WinX86_64TargetCodeGenInfo>(args&: CGM.getTypes(), args&: AVXLevel);
3529	}
3530

source code of clang/lib/CodeGen/Targets/X86.cpp