AArch64FastISel.cpp source code [llvm/lib/Target/AArch64/AArch64FastISel.cpp]

1	//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the AArch64-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// AArch64GenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "AArch64.h"
16	#include "AArch64CallingConvention.h"
17	#include "AArch64MachineFunctionInfo.h"
18	#include "AArch64RegisterInfo.h"
19	#include "AArch64Subtarget.h"
20	#include "MCTargetDesc/AArch64AddressingModes.h"
21	#include "Utils/AArch64BaseInfo.h"
22	#include "llvm/ADT/APFloat.h"
23	#include "llvm/ADT/APInt.h"
24	#include "llvm/ADT/DenseMap.h"
25	#include "llvm/ADT/SmallVector.h"
26	#include "llvm/Analysis/BranchProbabilityInfo.h"
27	#include "llvm/CodeGen/CallingConvLower.h"
28	#include "llvm/CodeGen/FastISel.h"
29	#include "llvm/CodeGen/FunctionLoweringInfo.h"
30	#include "llvm/CodeGen/ISDOpcodes.h"
31	#include "llvm/CodeGen/MachineBasicBlock.h"
32	#include "llvm/CodeGen/MachineConstantPool.h"
33	#include "llvm/CodeGen/MachineFrameInfo.h"
34	#include "llvm/CodeGen/MachineInstr.h"
35	#include "llvm/CodeGen/MachineInstrBuilder.h"
36	#include "llvm/CodeGen/MachineMemOperand.h"
37	#include "llvm/CodeGen/MachineRegisterInfo.h"
38	#include "llvm/CodeGen/RuntimeLibcalls.h"
39	#include "llvm/CodeGen/ValueTypes.h"
40	#include "llvm/CodeGenTypes/MachineValueType.h"
41	#include "llvm/IR/Argument.h"
42	#include "llvm/IR/Attributes.h"
43	#include "llvm/IR/BasicBlock.h"
44	#include "llvm/IR/CallingConv.h"
45	#include "llvm/IR/Constant.h"
46	#include "llvm/IR/Constants.h"
47	#include "llvm/IR/DataLayout.h"
48	#include "llvm/IR/DerivedTypes.h"
49	#include "llvm/IR/Function.h"
50	#include "llvm/IR/GetElementPtrTypeIterator.h"
51	#include "llvm/IR/GlobalValue.h"
52	#include "llvm/IR/InstrTypes.h"
53	#include "llvm/IR/Instruction.h"
54	#include "llvm/IR/Instructions.h"
55	#include "llvm/IR/IntrinsicInst.h"
56	#include "llvm/IR/Intrinsics.h"
57	#include "llvm/IR/IntrinsicsAArch64.h"
58	#include "llvm/IR/Operator.h"
59	#include "llvm/IR/Type.h"
60	#include "llvm/IR/User.h"
61	#include "llvm/IR/Value.h"
62	#include "llvm/MC/MCInstrDesc.h"
63	#include "llvm/MC/MCRegisterInfo.h"
64	#include "llvm/MC/MCSymbol.h"
65	#include "llvm/Support/AtomicOrdering.h"
66	#include "llvm/Support/Casting.h"
67	#include "llvm/Support/CodeGen.h"
68	#include "llvm/Support/Compiler.h"
69	#include "llvm/Support/ErrorHandling.h"
70	#include "llvm/Support/MathExtras.h"
71	#include <algorithm>
72	#include <cassert>
73	#include <cstdint>
74	#include <iterator>
75	#include <utility>
76
77	using namespace llvm;
78
79	namespace {
80
81	class AArch64FastISel final : public FastISel {
82	class Address {
83	public:
84	using BaseKind = enum {
85	RegBase,
86	FrameIndexBase
87	};
88
89	private:
90	BaseKind Kind = RegBase;
91	AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
92	union {
93	unsigned Reg;
94	int FI;
95	} Base;
96	unsigned OffsetReg = `0`;
97	unsigned Shift = `0`;
98	int64_t Offset = `0`;
99	const GlobalValue GV = nullptr*;
100
101	public:
102	Address() { Base.Reg = `0`; }
103
104	void setKind(BaseKind K) { Kind = K; }
105	BaseKind getKind() const { return Kind; }
106	void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107	AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108	bool isRegBase() const { return Kind == RegBase; }
109	bool isFIBase() const { return Kind == FrameIndexBase; }
110
111	void setReg(unsigned Reg) {
112	assert(isRegBase() && "Invalid base register access!");
113	Base.Reg = Reg;
114	}
115
116	unsigned getReg() const {
117	assert(isRegBase() && "Invalid base register access!");
118	return Base.Reg;
119	}
120
121	void setOffsetReg(unsigned Reg) {
122	OffsetReg = Reg;
123	}
124
125	unsigned getOffsetReg() const {
126	return OffsetReg;
127	}
128
129	void setFI(unsigned FI) {
130	assert(isFIBase() && "Invalid base frame index access!");
131	Base.FI = FI;
132	}
133
134	unsigned getFI() const {
135	assert(isFIBase() && "Invalid base frame index access!");
136	return Base.FI;
137	}
138
139	void setOffset(int64_t O) { Offset = O; }
140	int64_t getOffset() { return Offset; }
141	void setShift(unsigned S) { Shift = S; }
142	unsigned getShift() { return Shift; }
143
144	void setGlobalValue(const GlobalValue *G) { GV = G; }
145	const GlobalValue getGlobalValue() { return* GV; }
146	};
147
148	/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149	/// make the right decision when generating code for different targets.
150	const AArch64Subtarget *Subtarget;
151	LLVMContext *Context;
152
153	bool fastLowerArguments() override;
154	bool fastLowerCall(CallLoweringInfo &CLI) override;
155	bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157	private:
158	// Selection routines.
159	bool selectAddSub(const Instruction *I);
160	bool selectLogicalOp(const Instruction *I);
161	bool selectLoad(const Instruction *I);
162	bool selectStore(const Instruction *I);
163	bool selectBranch(const Instruction *I);
164	bool selectIndirectBr(const Instruction *I);
165	bool selectCmp(const Instruction *I);
166	bool selectSelect(const Instruction *I);
167	bool selectFPExt(const Instruction *I);
168	bool selectFPTrunc(const Instruction *I);
169	bool selectFPToInt(const Instruction I, bool* Signed);
170	bool selectIntToFP(const Instruction I, bool* Signed);
171	bool selectRem(const Instruction I, unsigned* ISDOpcode);
172	bool selectRet(const Instruction *I);
173	bool selectTrunc(const Instruction *I);
174	bool selectIntExt(const Instruction *I);
175	bool selectMul(const Instruction *I);
176	bool selectShift(const Instruction *I);
177	bool selectBitCast(const Instruction *I);
178	bool selectFRem(const Instruction *I);
179	bool selectSDiv(const Instruction *I);
180	bool selectGetElementPtr(const Instruction *I);
181	bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183	// Utility helper routines.
184	bool isTypeLegal(Type *Ty, MVT &VT);
185	bool isTypeSupported(Type Ty, MVT &VT, bool* IsVectorAllowed = false);
186	bool isValueAvailable(const Value V) const*;
187	bool computeAddress(const Value Obj, Address &Addr, Type Ty = nullptr);
188	bool computeCallAddress(const Value *V, Address &Addr);
189	bool simplifyAddress(Address &Addr, MVT VT);
190	void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191	MachineMemOperand::Flags Flags,
192	unsigned ScaleFactor, MachineMemOperand *MMO);
193	bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194	bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195	MaybeAlign Alignment);
196	bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197	const Value *Cond);
198	bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199	bool optimizeSelect(const SelectInst *SI);
200	unsigned getRegForGEPIndex(const Value *Idx);
201
202	// Emit helper routines.
203	unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204	const Value RHS, bool* SetFlags = false,
205	bool WantResult = true, bool IsZExt = false);
206	unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207	unsigned RHSReg, bool SetFlags = false,
208	bool WantResult = true);
209	unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210	uint64_t Imm, bool SetFlags = false,
211	bool WantResult = true);
212	unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213	unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214	uint64_t ShiftImm, bool SetFlags = false,
215	bool WantResult = true);
216	unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217	unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218	uint64_t ShiftImm, bool SetFlags = false,
219	bool WantResult = true);
220
221	// Emit functions.
222	bool emitCompareAndBranch(const BranchInst *BI);
223	bool emitCmp(const Value LHS, const* Value RHS, bool* IsZExt);
224	bool emitICmp(MVT RetVT, const Value LHS, const* Value RHS, bool* IsZExt);
225	bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226	bool emitFCmp(MVT RetVT, const Value LHS, const* Value *RHS);
227	unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228	MachineMemOperand MMO = nullptr*);
229	bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230	MachineMemOperand MMO = nullptr*);
231	bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232	MachineMemOperand MMO = nullptr*);
233	unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234	unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235	unsigned emitAdd(MVT RetVT, const Value LHS, const* Value *RHS,
236	bool SetFlags = false, bool WantResult = true,
237	bool IsZExt = false);
238	unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239	unsigned emitSub(MVT RetVT, const Value LHS, const* Value *RHS,
240	bool SetFlags = false, bool WantResult = true,
241	bool IsZExt = false);
242	unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243	bool WantResult = true);
244	unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245	AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246	bool WantResult = true);
247	unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248	const Value *RHS);
249	unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250	uint64_t Imm);
251	unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252	unsigned RHSReg, uint64_t ShiftImm);
253	unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254	unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255	unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256	unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257	unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258	unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259	bool IsZExt = true);
260	unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261	unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262	bool IsZExt = true);
263	unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264	unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265	bool IsZExt = false);
266
267	unsigned materializeInt(const ConstantInt *CI, MVT VT);
268	unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269	unsigned materializeGV(const GlobalValue *GV);
270
271	// Call handling routines.
272	private:
273	CCAssignFn CCAssignFnForCall(CallingConv::ID CC) const*;
274	bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275	unsigned &NumBytes);
276	bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278	public:
279	// Backend specific FastISel code.
280	unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281	unsigned fastMaterializeConstant(const Constant *C) override;
282	unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284	explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285	const TargetLibraryInfo *LibInfo)
286	: FastISel (FuncInfo, LibInfo, /SkipTargetIndependentISel=/true) {
287	Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288	Context = &FuncInfo.Fn->getContext();
289	}
290
291	bool fastSelectInstruction(const Instruction *I) override;
292
293	#include "AArch64GenFastISel.inc"
294	};
295
296	} // end anonymous namespace
297
298	/// Check if the sign-/zero-extend will be a noop.
299	static bool isIntExtFree(const Instruction *I) {
300	assert((isa<ZExtInst>(I) \|\| isa<SExtInst>(I)) &&
301	"Unexpected integer extend instruction.");
302	assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303	"Unexpected value type.");
304	bool IsZExt = isa<ZExtInst>(Val: I);
305
306	if (const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: `0`)))
307	if (LI->hasOneUse())
308	return true;
309
310	if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: `0`)))
311	if ((IsZExt && Arg->hasZExtAttr()) \|\| (!IsZExt && Arg->hasSExtAttr()))
312	return true;
313
314	return false;
315	}
316
317	/// Determine the implicit scale factor that is applied by a memory
318	/// operation for a given value type.
319	static unsigned getImplicitScaleFactor(MVT VT) {
320	switch (VT.SimpleTy) {
321	default:
322	return `0`; // invalid
323	case MVT::i1: // fall-through
324	case MVT::i8:
325	return `1`;
326	case MVT::i16:
327	return `2`;
328	case MVT::i32: // fall-through
329	case MVT::f32:
330	return `4`;
331	case MVT::i64: // fall-through
332	case MVT::f64:
333	return `8`;
334	}
335	}
336
337	CCAssignFn AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const* {
338	if (CC == CallingConv::GHC)
339	return CC_AArch64_GHC;
340	if (CC == CallingConv::CFGuard_Check)
341	return CC_AArch64_Win64_CFGuard_Check;
342	if (Subtarget->isTargetDarwin())
343	return CC_AArch64_DarwinPCS;
344	if (Subtarget->isTargetWindows())
345	return CC_AArch64_Win64PCS;
346	return CC_AArch64_AAPCS;
347	}
348
349	unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350	assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351	"Alloca should always return a pointer.");
352
353	// Don't handle dynamic allocas.
354	if (!FuncInfo.StaticAllocaMap.count(Val: AI))
355	return `0`;
356
357	DenseMap<const AllocaInst , int*>::iterator SI =
358	FuncInfo.StaticAllocaMap.find(Val: AI);
359
360	if (SI != FuncInfo.StaticAllocaMap.end()) {
361	Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
362	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: ADDXri),
363	ResultReg)
364	.addFrameIndex(SI ->second)
365	.addImm(`0`)
366	.addImm(`0`);
367	return ResultReg;
368	}
369
370	return `0`;
371	}
372
373	unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374	if (VT > MVT::i64)
375	return `0`;
376
377	if (!CI->isZero())
378	return fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm: CI->getZExtValue());
379
380	// Create a copy from the zero register to materialize a "0" value.
381	const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382	: &AArch64::GPR32RegClass;
383	unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384	Register ResultReg = createResultReg(RC);
385	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY),
386	DestReg: ResultReg).addReg(RegNo: ZeroReg, flags: getKillRegState(B: true));
387	return ResultReg;
388	}
389
390	unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391	// Positive zero (+0.0) has to be materialized with a fmov from the zero
392	// register, because the immediate version of fmov cannot encode zero.
393	if (CFP->isNullValue())
394	return fastMaterializeFloatZero(CF: CFP);
395
396	if (VT != MVT::f32 && VT != MVT::f64)
397	return `0`;
398
399	const APFloat Val = CFP->getValueAPF();
400	bool Is64Bit = (VT == MVT::f64);
401	// This checks to see if we can use FMOV instructions to materialize
402	// a constant, otherwise we have to materialize via the constant pool.
403	int Imm =
404	Is64Bit ? AArch64_AM::getFP64Imm(FPImm: Val) : AArch64_AM::getFP32Imm(FPImm: Val);
405	if (Imm != -`1`) {
406	unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407	return fastEmitInst_i(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Imm);
408	}
409
410	// For the large code model materialize the FP constant in code.
411	if (TM.getCodeModel() == CodeModel::Large) {
412	unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413	const TargetRegisterClass *RC = Is64Bit ?
414	&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416	Register TmpReg = createResultReg(RC);
417	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc1), DestReg: TmpReg)
418	.addImm(Val: CFP->getValueAPF().bitcastToAPInt().getZExtValue());
419
420	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
421	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
422	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
423	.addReg(RegNo: TmpReg, flags: getKillRegState(B: true));
424
425	return ResultReg;
426	}
427
428	// Materialize via constant pool. MachineConstantPool wants an explicit
429	// alignment.
430	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
431
432	unsigned CPI = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
433	Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
434	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
435	DestReg: ADRPReg).addConstantPoolIndex(Idx: CPI, Offset: `0`, TargetFlags: AArch64II::MO_PAGE);
436
437	unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
439	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
440	.addReg(RegNo: ADRPReg)
441	.addConstantPoolIndex(Idx: CPI, Offset: `0`, TargetFlags: AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
442	return ResultReg;
443	}
444
445	unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446	// We can't handle thread-local variables quickly yet.
447	if (GV->isThreadLocal())
448	return `0`;
449
450	// MachO still uses GOT for large code-model accesses, but ELF requires
451	// movz/movk sequences, which FastISel doesn't handle yet.
452	if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453	return `0`;
454
455	unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456
457	EVT DestEVT = TLI.getValueType(DL, Ty: GV->getType(), AllowUnknown: true);
458	if (!DestEVT.isSimple())
459	return `0`;
460
461	Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
462	unsigned ResultReg;
463
464	if (OpFlags & AArch64II::MO_GOT) {
465	// ADRP + LDRX
466	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
467	DestReg: ADRPReg)
468	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_PAGE \| OpFlags);
469
470	unsigned LdrOpc;
471	if (Subtarget->isTargetILP32()) {
472	ResultReg = createResultReg(RC: &AArch64::GPR32RegClass);
473	LdrOpc = AArch64::LDRWui;
474	} else {
475	ResultReg = createResultReg(RC: &AArch64::GPR64RegClass);
476	LdrOpc = AArch64::LDRXui;
477	}
478	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: LdrOpc),
479	DestReg: ResultReg)
480	.addReg(RegNo: ADRPReg)
481	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_GOT \| AArch64II::MO_PAGEOFF \|
482	AArch64II::MO_NC \| OpFlags);
483	if (!Subtarget->isTargetILP32())
484	return ResultReg;
485
486	// LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487	// so we must extend the result on ILP32.
488	Register Result64 = createResultReg(RC: &AArch64::GPR64RegClass);
489	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
490	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
491	.addDef(RegNo: Result64)
492	.addImm(Val: `0`)
493	.addReg(RegNo: ResultReg, flags: RegState::Kill)
494	.addImm(AArch64::Val: sub_32);
495	return Result64;
496	} else {
497	// ADRP + ADDX
498	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
499	DestReg: ADRPReg)
500	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_PAGE \| OpFlags);
501
502	if (OpFlags & AArch64II::MO_TAGGED) {
503	// MO_TAGGED on the page indicates a tagged address. Set the tag now.
504	// We do so by creating a MOVK that sets bits 48-63 of the register to
505	// (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506	// the small code model so we can assume a binary size of <= 4GB, which
507	// makes the untagged PC relative offset positive. The binary must also be
508	// loaded into address range [0, 2^48). Both of these properties need to
509	// be ensured at runtime when using tagged addresses.
510	//
511	// TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512	// also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513	// are not exactly 1:1 with FastISel so we cannot easily abstract this
514	// out. At some point, it would be nice to find a way to not have this
515	// duplciate code.
516	unsigned DstReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
517	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: MOVKXi),
518	DstReg)
519	.addReg(ADRPReg)
520	.addGlobalAddress(GV, /Offset=/`0x100000000`,
521	AArch64II::MO_PREL \| AArch64II::MO_G3)
522	.addImm(`48`);
523	ADRPReg = DstReg;
524	}
525
526	ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
527	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: ADDXri),
528	ResultReg)
529	.addReg(ADRPReg)
530	.addGlobalAddress(GV, `0`,
531	AArch64II::MO_PAGEOFF \| AArch64II::MO_NC \| OpFlags)
532	.addImm(`0`);
533	}
534	return ResultReg;
535	}
536
537	unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
539
540	// Only handle simple types.
541	if (!CEVT.isSimple())
542	return `0`;
543	MVT VT = CEVT.getSimpleVT();
544	// arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545	// 'null' pointers need to have a somewhat special treatment.
546	if (isa<ConstantPointerNull>(Val: C)) {
547	assert(VT == MVT::i64 && "Expected 64-bit pointers");
548	return materializeInt(CI: ConstantInt::get(Ty: Type::getInt64Ty(C&: *Context), V: `0`), VT);
549	}
550
551	if (const auto *CI = dyn_cast<ConstantInt>(Val: C))
552	return materializeInt(CI, VT);
553	else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
554	return materializeFP(CFP, VT);
555	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
556	return materializeGV(GV);
557
558	return `0`;
559	}
560
561	unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562	assert(CFP->isNullValue() &&
563	"Floating-point constant is not a positive zero.");
564	MVT VT;
565	if (!isTypeLegal(Ty: CFP->getType(), VT))
566	return `0`;
567
568	if (VT != MVT::f32 && VT != MVT::f64)
569	return `0`;
570
571	bool Is64Bit = (VT == MVT::f64);
572	unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573	unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574	return fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Op0: ZReg);
575	}
576
577	/// Check if the multiply is by a power-of-2 constant.
578	static bool isMulPowOf2(const Value *I) {
579	if (const auto *MI = dyn_cast<MulOperator>(Val: I)) {
580	if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: `0`)))
581	if (C->getValue().isPowerOf2())
582	return true;
583	if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: `1`)))
584	if (C->getValue().isPowerOf2())
585	return true;
586	}
587	return false;
588	}
589
590	// Computes the address to get to an object.
591	bool AArch64FastISel::computeAddress(const Value Obj, Address &Addr, Type Ty)
592	{
593	const User U = nullptr*;
594	unsigned Opcode = Instruction::UserOp1;
595	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
596	// Don't walk into other basic blocks unless the object is an alloca from
597	// another block, otherwise it may not have a virtual register assigned.
598	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
599	FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB) {
600	Opcode = I->getOpcode();
601	U = I;
602	}
603	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
604	Opcode = C->getOpcode();
605	U = C;
606	}
607
608	if (auto *Ty = dyn_cast<PointerType>(Val: Obj->getType()))
609	if (Ty->getAddressSpace() > `255`)
610	// Fast instruction selection doesn't support the special
611	// address spaces.
612	return false;
613
614	switch (Opcode) {
615	default:
616	break;
617	case Instruction::BitCast:
618	// Look through bitcasts.
619	return computeAddress(Obj: U->getOperand(i: `0`), Addr, Ty);
620
621	case Instruction::IntToPtr:
622	// Look past no-op inttoptrs.
623	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
624	TLI.getPointerTy(DL))
625	return computeAddress(Obj: U->getOperand(i: `0`), Addr, Ty);
626	break;
627
628	case Instruction::PtrToInt:
629	// Look past no-op ptrtoints.
630	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
631	return computeAddress(Obj: U->getOperand(i: `0`), Addr, Ty);
632	break;
633
634	case Instruction::GetElementPtr: {
635	Address SavedAddr = Addr;
636	uint64_t TmpOffset = Addr.getOffset();
637
638	// Iterate through the GEP folding the constants into offsets where
639	// we can.
640	for (gep_type_iterator GTI = gep_type_begin(GEP: U), E = gep_type_end(GEP: U);
641	GTI != E; ++GTI) {
642	const Value *Op = GTI.getOperand();
643	if (StructType *STy = GTI.getStructTypeOrNull()) {
644	const StructLayout *SL = DL.getStructLayout(Ty: STy);
645	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
646	TmpOffset += SL->getElementOffset(Idx);
647	} else {
648	uint64_t S = GTI.getSequentialElementStride(DL);
649	while (true) {
650	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
651	// Constant-offset addressing.
652	TmpOffset += CI->getSExtValue() * S;
653	break;
654	}
655	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
656	// A compatible add with a constant operand. Fold the constant.
657	ConstantInt *CI =
658	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
659	TmpOffset += CI->getSExtValue() * S;
660	// Iterate on the other operand.
661	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
662	continue;
663	}
664	// Unsupported
665	goto unsupported_gep;
666	}
667	}
668	}
669
670	// Try to grab the base operand now.
671	Addr.setOffset(TmpOffset);
672	if (computeAddress(Obj: U->getOperand(i: `0`), Addr, Ty))
673	return true;
674
675	// We failed, restore everything and try the other options.
676	Addr = SavedAddr;
677
678	unsupported_gep:
679	break;
680	}
681	case Instruction::Alloca: {
682	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
683	DenseMap<const AllocaInst , int*>::iterator SI =
684	FuncInfo.StaticAllocaMap.find(Val: AI);
685	if (SI != FuncInfo.StaticAllocaMap.end()) {
686	Addr.setKind(Address::FrameIndexBase);
687	Addr.setFI(SI ->second);
688	return true;
689	}
690	break;
691	}
692	case Instruction::Add: {
693	// Adds of constants are common and easy enough.
694	const Value *LHS = U->getOperand(i: `0`);
695	const Value *RHS = U->getOperand(i: `1`);
696
697	if (isa<ConstantInt>(Val: LHS))
698	std::swap(a&: LHS, b&: RHS);
699
700	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) {
701	Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702	return computeAddress(Obj: LHS, Addr, Ty);
703	}
704
705	Address Backup = Addr;
706	if (computeAddress(Obj: LHS, Addr, Ty) && computeAddress(Obj: RHS, Addr, Ty))
707	return true;
708	Addr = Backup;
709
710	break;
711	}
712	case Instruction::Sub: {
713	// Subs of constants are common and easy enough.
714	const Value *LHS = U->getOperand(i: `0`);
715	const Value *RHS = U->getOperand(i: `1`);
716
717	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) {
718	Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719	return computeAddress(Obj: LHS, Addr, Ty);
720	}
721	break;
722	}
723	case Instruction::Shl: {
724	if (Addr.getOffsetReg())
725	break;
726
727	const auto *CI = dyn_cast<ConstantInt>(Val: U->getOperand(i: `1`));
728	if (!CI)
729	break;
730
731	unsigned Val = CI->getZExtValue();
732	if (Val < `1` \|\| Val > `3`)
733	break;
734
735	uint64_t NumBytes = `0`;
736	if (Ty && Ty->isSized()) {
737	uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738	NumBytes = NumBits / `8`;
739	if (!isPowerOf2_64(Value: NumBits))
740	NumBytes = `0`;
741	}
742
743	if (NumBytes != (`1ULL` << Val))
744	break;
745
746	Addr.setShift(Val);
747	Addr.setExtendType(AArch64_AM::LSL);
748
749	const Value *Src = U->getOperand(i: `0`);
750	if (const auto *I = dyn_cast<Instruction>(Val: Src)) {
751	if (FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB) {
752	// Fold the zext or sext when it won't become a noop.
753	if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) {
754	if (!isIntExtFree(I: ZE) &&
755	ZE->getOperand(i_nocapture: `0`)->getType()->isIntegerTy(Bitwidth: `32`)) {
756	Addr.setExtendType(AArch64_AM::UXTW);
757	Src = ZE->getOperand(i_nocapture: `0`);
758	}
759	} else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) {
760	if (!isIntExtFree(I: SE) &&
761	SE->getOperand(i_nocapture: `0`)->getType()->isIntegerTy(Bitwidth: `32`)) {
762	Addr.setExtendType(AArch64_AM::SXTW);
763	Src = SE->getOperand(i_nocapture: `0`);
764	}
765	}
766	}
767	}
768
769	if (const auto *AI = dyn_cast<BinaryOperator>(Val: Src))
770	if (AI->getOpcode() == Instruction::And) {
771	const Value *LHS = AI->getOperand(i_nocapture: `0`);
772	const Value *RHS = AI->getOperand(i_nocapture: `1`);
773
774	if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
775	if (C->getValue() == `0xffffffff`)
776	std::swap(a&: LHS, b&: RHS);
777
778	if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
779	if (C->getValue() == `0xffffffff`) {
780	Addr.setExtendType(AArch64_AM::UXTW);
781	Register Reg = getRegForValue(V: LHS);
782	if (!Reg)
783	return false;
784	Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785	Addr.setOffsetReg(Reg);
786	return true;
787	}
788	}
789
790	Register Reg = getRegForValue(V: Src);
791	if (!Reg)
792	return false;
793	Addr.setOffsetReg(Reg);
794	return true;
795	}
796	case Instruction::Mul: {
797	if (Addr.getOffsetReg())
798	break;
799
800	if (!isMulPowOf2(I: U))
801	break;
802
803	const Value *LHS = U->getOperand(i: `0`);
804	const Value *RHS = U->getOperand(i: `1`);
805
806	// Canonicalize power-of-2 value to the RHS.
807	if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
808	if (C->getValue().isPowerOf2())
809	std::swap(a&: LHS, b&: RHS);
810
811	assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812	const auto *C = cast<ConstantInt>(Val: RHS);
813	unsigned Val = C->getValue().logBase2();
814	if (Val < `1` \|\| Val > `3`)
815	break;
816
817	uint64_t NumBytes = `0`;
818	if (Ty && Ty->isSized()) {
819	uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820	NumBytes = NumBits / `8`;
821	if (!isPowerOf2_64(Value: NumBits))
822	NumBytes = `0`;
823	}
824
825	if (NumBytes != (`1ULL` << Val))
826	break;
827
828	Addr.setShift(Val);
829	Addr.setExtendType(AArch64_AM::LSL);
830
831	const Value *Src = LHS;
832	if (const auto *I = dyn_cast<Instruction>(Val: Src)) {
833	if (FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB) {
834	// Fold the zext or sext when it won't become a noop.
835	if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) {
836	if (!isIntExtFree(I: ZE) &&
837	ZE->getOperand(i_nocapture: `0`)->getType()->isIntegerTy(Bitwidth: `32`)) {
838	Addr.setExtendType(AArch64_AM::UXTW);
839	Src = ZE->getOperand(i_nocapture: `0`);
840	}
841	} else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) {
842	if (!isIntExtFree(I: SE) &&
843	SE->getOperand(i_nocapture: `0`)->getType()->isIntegerTy(Bitwidth: `32`)) {
844	Addr.setExtendType(AArch64_AM::SXTW);
845	Src = SE->getOperand(i_nocapture: `0`);
846	}
847	}
848	}
849	}
850
851	Register Reg = getRegForValue(V: Src);
852	if (!Reg)
853	return false;
854	Addr.setOffsetReg(Reg);
855	return true;
856	}
857	case Instruction::And: {
858	if (Addr.getOffsetReg())
859	break;
860
861	if (!Ty \|\| DL.getTypeSizeInBits(Ty) != `8`)
862	break;
863
864	const Value *LHS = U->getOperand(i: `0`);
865	const Value *RHS = U->getOperand(i: `1`);
866
867	if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
868	if (C->getValue() == `0xffffffff`)
869	std::swap(a&: LHS, b&: RHS);
870
871	if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
872	if (C->getValue() == `0xffffffff`) {
873	Addr.setShift(`0`);
874	Addr.setExtendType(AArch64_AM::LSL);
875	Addr.setExtendType(AArch64_AM::UXTW);
876
877	Register Reg = getRegForValue(V: LHS);
878	if (!Reg)
879	return false;
880	Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881	Addr.setOffsetReg(Reg);
882	return true;
883	}
884	break;
885	}
886	case Instruction::SExt:
887	case Instruction::ZExt: {
888	if (!Addr.getReg() \|\| Addr.getOffsetReg())
889	break;
890
891	const Value Src = nullptr*;
892	// Fold the zext or sext when it won't become a noop.
893	if (const auto *ZE = dyn_cast<ZExtInst>(Val: U)) {
894	if (!isIntExtFree(I: ZE) && ZE->getOperand(i_nocapture: `0`)->getType()->isIntegerTy(Bitwidth: `32`)) {
895	Addr.setExtendType(AArch64_AM::UXTW);
896	Src = ZE->getOperand(i_nocapture: `0`);
897	}
898	} else if (const auto *SE = dyn_cast<SExtInst>(Val: U)) {
899	if (!isIntExtFree(I: SE) && SE->getOperand(i_nocapture: `0`)->getType()->isIntegerTy(Bitwidth: `32`)) {
900	Addr.setExtendType(AArch64_AM::SXTW);
901	Src = SE->getOperand(i_nocapture: `0`);
902	}
903	}
904
905	if (!Src)
906	break;
907
908	Addr.setShift(`0`);
909	Register Reg = getRegForValue(V: Src);
910	if (!Reg)
911	return false;
912	Addr.setOffsetReg(Reg);
913	return true;
914	}
915	} // end switch
916
917	if (Addr.isRegBase() && !Addr.getReg()) {
918	Register Reg = getRegForValue(V: Obj);
919	if (!Reg)
920	return false;
921	Addr.setReg(Reg);
922	return true;
923	}
924
925	if (!Addr.getOffsetReg()) {
926	Register Reg = getRegForValue(V: Obj);
927	if (!Reg)
928	return false;
929	Addr.setOffsetReg(Reg);
930	return true;
931	}
932
933	return false;
934	}
935
936	bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937	const User U = nullptr*;
938	unsigned Opcode = Instruction::UserOp1;
939	bool InMBB = true;
940
941	if (const auto *I = dyn_cast<Instruction>(Val: V)) {
942	Opcode = I->getOpcode();
943	U = I;
944	InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945	} else if (const auto *C = dyn_cast<ConstantExpr>(Val: V)) {
946	Opcode = C->getOpcode();
947	U = C;
948	}
949
950	switch (Opcode) {
951	default: break;
952	case Instruction::BitCast:
953	// Look past bitcasts if its operand is in the same BB.
954	if (InMBB)
955	return computeCallAddress(V: U->getOperand(i: `0`), Addr);
956	break;
957	case Instruction::IntToPtr:
958	// Look past no-op inttoptrs if its operand is in the same BB.
959	if (InMBB &&
960	TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
961	TLI.getPointerTy(DL))
962	return computeCallAddress(V: U->getOperand(i: `0`), Addr);
963	break;
964	case Instruction::PtrToInt:
965	// Look past no-op ptrtoints if its operand is in the same BB.
966	if (InMBB && TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
967	return computeCallAddress(V: U->getOperand(i: `0`), Addr);
968	break;
969	}
970
971	if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
972	Addr.setGlobalValue(GV);
973	return true;
974	}
975
976	// If all else fails, try to materialize the value in a register.
977	if (!Addr.getGlobalValue()) {
978	Addr.setReg(getRegForValue(V));
979	return Addr.getReg() != `0`;
980	}
981
982	return false;
983	}
984
985	bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986	EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
987
988	if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989	return false;
990
991	// Only handle simple types.
992	if (evt == MVT::Other \|\| !evt.isSimple())
993	return false;
994	VT = evt.getSimpleVT();
995
996	// This is a legal type, but it's not something we handle in fast-isel.
997	if (VT == MVT::f128)
998	return false;
999
1000	// Handle all other legal types, i.e. a register that will directly hold this
1001	// value.
1002	return TLI.isTypeLegal(VT);
1003	}
1004
1005	/// Determine if the value type is supported by FastISel.
1006	///
1007	/// FastISel for AArch64 can handle more value types than are legal. This adds
1008	/// simple value type such as i1, i8, and i16.
1009	bool AArch64FastISel::isTypeSupported(Type Ty, MVT &VT, bool* IsVectorAllowed) {
1010	if (Ty->isVectorTy() && !IsVectorAllowed)
1011	return false;
1012
1013	if (isTypeLegal(Ty, VT))
1014	return true;
1015
1016	// If this is a type than can be sign or zero-extended to a basic operation
1017	// go ahead and accept it now.
1018	if (VT == MVT::i1 \|\| VT == MVT::i8 \|\| VT == MVT::i16)
1019	return true;
1020
1021	return false;
1022	}
1023
1024	bool AArch64FastISel::isValueAvailable(const Value V) const* {
1025	if (!isa<Instruction>(Val: V))
1026	return true;
1027
1028	const auto *I = cast<Instruction>(Val: V);
1029	return FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB;
1030	}
1031
1032	bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033	if (Subtarget->isTargetILP32())
1034	return false;
1035
1036	unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037	if (!ScaleFactor)
1038	return false;
1039
1040	bool ImmediateOffsetNeedsLowering = false;
1041	bool RegisterOffsetNeedsLowering = false;
1042	int64_t Offset = Addr.getOffset();
1043	if (((Offset < `0`) \|\| (Offset & (ScaleFactor - `1`))) && !isInt<`9`>(x: Offset))
1044	ImmediateOffsetNeedsLowering = true;
1045	else if (Offset > `0` && !(Offset & (ScaleFactor - `1`)) &&
1046	!isUInt<`12`>(x: Offset / ScaleFactor))
1047	ImmediateOffsetNeedsLowering = true;
1048
1049	// Cannot encode an offset register and an immediate offset in the same
1050	// instruction. Fold the immediate offset into the load/store instruction and
1051	// emit an additional add to take care of the offset register.
1052	if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053	RegisterOffsetNeedsLowering = true;
1054
1055	// Cannot encode zero register as base.
1056	if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057	RegisterOffsetNeedsLowering = true;
1058
1059	// If this is a stack pointer and the offset needs to be simplified then put
1060	// the alloca address into a register, set the base type back to register and
1061	// continue. This should almost never happen.
1062	if ((ImmediateOffsetNeedsLowering \|\| Addr.getOffsetReg()) && Addr.isFIBase())
1063	{
1064	Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066	ResultReg)
1067	.addFrameIndex(Addr.getFI())
1068	.addImm(`0`)
1069	.addImm(`0`);
1070	Addr.setKind(Address::RegBase);
1071	Addr.setReg(ResultReg);
1072	}
1073
1074	if (RegisterOffsetNeedsLowering) {
1075	unsigned ResultReg = `0`;
1076	if (Addr.getReg()) {
1077	if (Addr.getExtendType() == AArch64_AM::SXTW \|\|
1078	Addr.getExtendType() == AArch64_AM::UXTW )
1079	ResultReg = emitAddSub_rx(/UseAdd=/true, MVT::i64, Addr.getReg(),
1080	Addr.getOffsetReg(), Addr.getExtendType(),
1081	Addr.getShift());
1082	else
1083	ResultReg = emitAddSub_rs(/UseAdd=/true, MVT::i64, Addr.getReg(),
1084	Addr.getOffsetReg(), AArch64_AM::LSL,
1085	Addr.getShift());
1086	} else {
1087	if (Addr.getExtendType() == AArch64_AM::UXTW)
1088	ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089	Addr.getShift(), /IsZExt=/true);
1090	else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091	ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092	Addr.getShift(), /IsZExt=/false);
1093	else
1094	ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095	Addr.getShift());
1096	}
1097	if (!ResultReg)
1098	return false;
1099
1100	Addr.setReg(ResultReg);
1101	Addr.setOffsetReg(`0`);
1102	Addr.setShift(`0`);
1103	Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104	}
1105
1106	// Since the offset is too large for the load/store instruction get the
1107	// reg+offset into a register.
1108	if (ImmediateOffsetNeedsLowering) {
1109	unsigned ResultReg;
1110	if (Addr.getReg())
1111	// Try to fold the immediate into the add instruction.
1112	ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113	else
1114	ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115
1116	if (!ResultReg)
1117	return false;
1118	Addr.setReg(ResultReg);
1119	Addr.setOffset(`0`);
1120	}
1121	return true;
1122	}
1123
1124	void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125	const MachineInstrBuilder &MIB,
1126	MachineMemOperand::Flags Flags,
1127	unsigned ScaleFactor,
1128	MachineMemOperand *MMO) {
1129	int64_t Offset = Addr.getOffset() / ScaleFactor;
1130	// Frame base works a bit differently. Handle it separately.
1131	if (Addr.isFIBase()) {
1132	int FI = Addr.getFI();
1133	// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1134	// and alignment should be based on the VT.
1135	MMO = FuncInfo.MF->getMachineMemOperand(
1136	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags,
1137	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
1138	// Now add the rest of the operands.
1139	MIB.addFrameIndex(Idx: FI).addImm(Val: Offset);
1140	} else {
1141	assert(Addr.isRegBase() && "Unexpected address kind.");
1142	const MCInstrDesc &II = MIB ->getDesc();
1143	unsigned Idx = (Flags & MachineMemOperand::MOStore) ? `1` : `0`;
1144	Addr.setReg(
1145	constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: II.getNumDefs()+Idx));
1146	Addr.setOffsetReg(
1147	constrainOperandRegClass(II, Op: Addr.getOffsetReg(), OpNum: II.getNumDefs()+Idx+`1`));
1148	if (Addr.getOffsetReg()) {
1149	assert(Addr.getOffset() == `0` && "Unexpected offset");
1150	bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW \|\|
1151	Addr.getExtendType() == AArch64_AM::SXTX;
1152	MIB.addReg(RegNo: Addr.getReg());
1153	MIB.addReg(RegNo: Addr.getOffsetReg());
1154	MIB.addImm(Val: IsSigned);
1155	MIB.addImm(Val: Addr.getShift() != `0`);
1156	} else
1157	MIB.addReg(RegNo: Addr.getReg()).addImm(Val: Offset);
1158	}
1159
1160	if (MMO)
1161	MIB.addMemOperand(MMO);
1162	}
1163
1164	unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165	const Value RHS, bool* SetFlags,
1166	bool WantResult, bool IsZExt) {
1167	AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1168	bool NeedExtend = false;
1169	switch (RetVT.SimpleTy) {
1170	default:
1171	return `0`;
1172	case MVT::i1:
1173	NeedExtend = true;
1174	break;
1175	case MVT::i8:
1176	NeedExtend = true;
1177	ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178	break;
1179	case MVT::i16:
1180	NeedExtend = true;
1181	ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182	break;
1183	case MVT::i32: // fall-through
1184	case MVT::i64:
1185	break;
1186	}
1187	MVT SrcVT = RetVT;
1188	RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189
1190	// Canonicalize immediates to the RHS first.
1191	if (UseAdd && isa<Constant>(Val: LHS) && !isa<Constant>(Val: RHS))
1192	std::swap(a&: LHS, b&: RHS);
1193
1194	// Canonicalize mul by power of 2 to the RHS.
1195	if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS))
1196	if (isMulPowOf2(I: LHS))
1197	std::swap(a&: LHS, b&: RHS);
1198
1199	// Canonicalize shift immediate to the RHS.
1200	if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS))
1201	if (const auto *SI = dyn_cast<BinaryOperator>(Val: LHS))
1202	if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: `1`)))
1203	if (SI->getOpcode() == Instruction::Shl \|\|
1204	SI->getOpcode() == Instruction::LShr \|\|
1205	SI->getOpcode() == Instruction::AShr )
1206	std::swap(a&: LHS, b&: RHS);
1207
1208	Register LHSReg = getRegForValue(V: LHS);
1209	if (!LHSReg)
1210	return `0`;
1211
1212	if (NeedExtend)
1213	LHSReg = emitIntExt(SrcVT, SrcReg: LHSReg, DestVT: RetVT, isZExt: IsZExt);
1214
1215	unsigned ResultReg = `0`;
1216	if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) {
1217	uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218	if (C->isNegative())
1219	ResultReg = emitAddSub_ri(UseAdd: !UseAdd, RetVT, LHSReg, Imm: -Imm, SetFlags,
1220	WantResult);
1221	else
1222	ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223	WantResult);
1224	} else if (const auto *C = dyn_cast<Constant>(Val: RHS))
1225	if (C->isNullValue())
1226	ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm: `0`, SetFlags, WantResult);
1227
1228	if (ResultReg)
1229	return ResultReg;
1230
1231	// Only extend the RHS within the instruction if there is a valid extend type.
1232	if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233	isValueAvailable(V: RHS)) {
1234	Register RHSReg = getRegForValue(V: RHS);
1235	if (!RHSReg)
1236	return `0`;
1237	return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtType: ExtendType, ShiftImm: `0`,
1238	SetFlags, WantResult);
1239	}
1240
1241	// Check if the mul can be folded into the instruction.
1242	if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1243	if (isMulPowOf2(I: RHS)) {
1244	const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: `0`);
1245	const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: `1`);
1246
1247	if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS))
1248	if (C->getValue().isPowerOf2())
1249	std::swap(a&: MulLHS, b&: MulRHS);
1250
1251	assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1252	uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2();
1253	Register RHSReg = getRegForValue(V: MulLHS);
1254	if (!RHSReg)
1255	return `0`;
1256	ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType: AArch64_AM::LSL,
1257	ShiftImm: ShiftVal, SetFlags, WantResult);
1258	if (ResultReg)
1259	return ResultReg;
1260	}
1261	}
1262
1263	// Check if the shift can be folded into the instruction.
1264	if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1265	if (const auto *SI = dyn_cast<BinaryOperator>(Val: RHS)) {
1266	if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: `1`))) {
1267	AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1268	switch (SI->getOpcode()) {
1269	default: break;
1270	case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1271	case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1272	case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1273	}
1274	uint64_t ShiftVal = C->getZExtValue();
1275	if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1276	Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: `0`));
1277	if (!RHSReg)
1278	return `0`;
1279	ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1280	ShiftImm: ShiftVal, SetFlags, WantResult);
1281	if (ResultReg)
1282	return ResultReg;
1283	}
1284	}
1285	}
1286	}
1287
1288	Register RHSReg = getRegForValue(V: RHS);
1289	if (!RHSReg)
1290	return `0`;
1291
1292	if (NeedExtend)
1293	RHSReg = emitIntExt(SrcVT, SrcReg: RHSReg, DestVT: RetVT, isZExt: IsZExt);
1294
1295	return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1296	}
1297
1298	unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1299	unsigned RHSReg, bool SetFlags,
1300	bool WantResult) {
1301	assert(LHSReg && RHSReg && "Invalid register number.");
1302
1303	if (LHSReg == AArch64::SP \|\| LHSReg == AArch64::WSP \|\|
1304	RHSReg == AArch64::SP \|\| RHSReg == AArch64::WSP)
1305	return `0`;
1306
1307	if (RetVT != MVT::i32 && RetVT != MVT::i64)
1308	return `0`;
1309
1310	static const unsigned OpcTable[`2`][`2`][`2`] = {
1311	{ { AArch64::SUBWrr, AArch64::SUBXrr },
1312	{ AArch64::ADDWrr, AArch64::ADDXrr } },
1313	{ { AArch64::SUBSWrr, AArch64::SUBSXrr },
1314	{ AArch64::ADDSWrr, AArch64::ADDSXrr } }
1315	};
1316	bool Is64Bit = RetVT == MVT::i64;
1317	unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1318	const TargetRegisterClass *RC =
1319	Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1320	unsigned ResultReg;
1321	if (WantResult)
1322	ResultReg = createResultReg(RC);
1323	else
1324	ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1325
1326	const MCInstrDesc &II = TII.get(Opcode: Opc);
1327	LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1328	RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + `1`);
1329	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1330	.addReg(RegNo: LHSReg)
1331	.addReg(RegNo: RHSReg);
1332	return ResultReg;
1333	}
1334
1335	unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1336	uint64_t Imm, bool SetFlags,
1337	bool WantResult) {
1338	assert(LHSReg && "Invalid register number.");
1339
1340	if (RetVT != MVT::i32 && RetVT != MVT::i64)
1341	return `0`;
1342
1343	unsigned ShiftImm;
1344	if (isUInt<`12`>(x: Imm))
1345	ShiftImm = `0`;
1346	else if ((Imm & `0xfff000`) == Imm) {
1347	ShiftImm = `12`;
1348	Imm >>= `12`;
1349	} else
1350	return `0`;
1351
1352	static const unsigned OpcTable[`2`][`2`][`2`] = {
1353	{ { AArch64::SUBWri, AArch64::SUBXri },
1354	{ AArch64::ADDWri, AArch64::ADDXri } },
1355	{ { AArch64::SUBSWri, AArch64::SUBSXri },
1356	{ AArch64::ADDSWri, AArch64::ADDSXri } }
1357	};
1358	bool Is64Bit = RetVT == MVT::i64;
1359	unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1360	const TargetRegisterClass *RC;
1361	if (SetFlags)
1362	RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1363	else
1364	RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1365	unsigned ResultReg;
1366	if (WantResult)
1367	ResultReg = createResultReg(RC);
1368	else
1369	ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1370
1371	const MCInstrDesc &II = TII.get(Opcode: Opc);
1372	LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1373	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1374	.addReg(RegNo: LHSReg)
1375	.addImm(Val: Imm)
1376	.addImm(Val: getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm));
1377	return ResultReg;
1378	}
1379
1380	unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1381	unsigned RHSReg,
1382	AArch64_AM::ShiftExtendType ShiftType,
1383	uint64_t ShiftImm, bool SetFlags,
1384	bool WantResult) {
1385	assert(LHSReg && RHSReg && "Invalid register number.");
1386	assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1387	RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1388
1389	if (RetVT != MVT::i32 && RetVT != MVT::i64)
1390	return `0`;
1391
1392	// Don't deal with undefined shifts.
1393	if (ShiftImm >= RetVT.getSizeInBits())
1394	return `0`;
1395
1396	static const unsigned OpcTable[`2`][`2`][`2`] = {
1397	{ { AArch64::SUBWrs, AArch64::SUBXrs },
1398	{ AArch64::ADDWrs, AArch64::ADDXrs } },
1399	{ { AArch64::SUBSWrs, AArch64::SUBSXrs },
1400	{ AArch64::ADDSWrs, AArch64::ADDSXrs } }
1401	};
1402	bool Is64Bit = RetVT == MVT::i64;
1403	unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1404	const TargetRegisterClass *RC =
1405	Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1406	unsigned ResultReg;
1407	if (WantResult)
1408	ResultReg = createResultReg(RC);
1409	else
1410	ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1411
1412	const MCInstrDesc &II = TII.get(Opcode: Opc);
1413	LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1414	RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + `1`);
1415	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1416	.addReg(RegNo: LHSReg)
1417	.addReg(RegNo: RHSReg)
1418	.addImm(Val: getShifterImm(ST: ShiftType, Imm: ShiftImm));
1419	return ResultReg;
1420	}
1421
1422	unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1423	unsigned RHSReg,
1424	AArch64_AM::ShiftExtendType ExtType,
1425	uint64_t ShiftImm, bool SetFlags,
1426	bool WantResult) {
1427	assert(LHSReg && RHSReg && "Invalid register number.");
1428	assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1429	RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1430
1431	if (RetVT != MVT::i32 && RetVT != MVT::i64)
1432	return `0`;
1433
1434	if (ShiftImm >= `4`)
1435	return `0`;
1436
1437	static const unsigned OpcTable[`2`][`2`][`2`] = {
1438	{ { AArch64::SUBWrx, AArch64::SUBXrx },
1439	{ AArch64::ADDWrx, AArch64::ADDXrx } },
1440	{ { AArch64::SUBSWrx, AArch64::SUBSXrx },
1441	{ AArch64::ADDSWrx, AArch64::ADDSXrx } }
1442	};
1443	bool Is64Bit = RetVT == MVT::i64;
1444	unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1445	const TargetRegisterClass RC = nullptr*;
1446	if (SetFlags)
1447	RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1448	else
1449	RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1450	unsigned ResultReg;
1451	if (WantResult)
1452	ResultReg = createResultReg(RC);
1453	else
1454	ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1455
1456	const MCInstrDesc &II = TII.get(Opcode: Opc);
1457	LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1458	RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + `1`);
1459	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1460	.addReg(RegNo: LHSReg)
1461	.addReg(RegNo: RHSReg)
1462	.addImm(Val: getArithExtendImm(ET: ExtType, Imm: ShiftImm));
1463	return ResultReg;
1464	}
1465
1466	bool AArch64FastISel::emitCmp(const Value LHS, const* Value RHS, bool* IsZExt) {
1467	Type *Ty = LHS->getType();
1468	EVT EVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
1469	if (!EVT.isSimple())
1470	return false;
1471	MVT VT = EVT.getSimpleVT();
1472
1473	switch (VT.SimpleTy) {
1474	default:
1475	return false;
1476	case MVT::i1:
1477	case MVT::i8:
1478	case MVT::i16:
1479	case MVT::i32:
1480	case MVT::i64:
1481	return emitICmp(RetVT: VT, LHS, RHS, IsZExt);
1482	case MVT::f32:
1483	case MVT::f64:
1484	return emitFCmp(RetVT: VT, LHS, RHS);
1485	}
1486	}
1487
1488	bool AArch64FastISel::emitICmp(MVT RetVT, const Value LHS, const* Value *RHS,
1489	bool IsZExt) {
1490	return emitSub(RetVT, LHS, RHS, /SetFlags=/true, /WantResult=/false,
1491	IsZExt) != `0`;
1492	}
1493
1494	bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1495	return emitAddSub_ri(/UseAdd=/false, RetVT, LHSReg, Imm,
1496	/SetFlags=/true, /WantResult=/false) != `0`;
1497	}
1498
1499	bool AArch64FastISel::emitFCmp(MVT RetVT, const Value LHS, const* Value *RHS) {
1500	if (RetVT != MVT::f32 && RetVT != MVT::f64)
1501	return false;
1502
1503	// Check to see if the 2nd operand is a constant that we can encode directly
1504	// in the compare.
1505	bool UseImm = false;
1506	if (const auto *CFP = dyn_cast<ConstantFP>(Val: RHS))
1507	if (CFP->isZero() && !CFP->isNegative())
1508	UseImm = true;
1509
1510	Register LHSReg = getRegForValue(V: LHS);
1511	if (!LHSReg)
1512	return false;
1513
1514	if (UseImm) {
1515	unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1516	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
1517	.addReg(RegNo: LHSReg);
1518	return true;
1519	}
1520
1521	Register RHSReg = getRegForValue(V: RHS);
1522	if (!RHSReg)
1523	return false;
1524
1525	unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1526	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
1527	.addReg(RegNo: LHSReg)
1528	.addReg(RegNo: RHSReg);
1529	return true;
1530	}
1531
1532	unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value LHS, const* Value *RHS,
1533	bool SetFlags, bool WantResult, bool IsZExt) {
1534	return emitAddSub(/UseAdd=/true, RetVT, LHS, RHS, SetFlags, WantResult,
1535	IsZExt);
1536	}
1537
1538	/// This method is a wrapper to simplify add emission.
1539	///
1540	/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1541	/// that fails, then try to materialize the immediate into a register and use
1542	/// emitAddSub_rr instead.
1543	unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1544	unsigned ResultReg;
1545	if (Imm < `0`)
1546	ResultReg = emitAddSub_ri(UseAdd: false, RetVT: VT, LHSReg: Op0, Imm: -Imm);
1547	else
1548	ResultReg = emitAddSub_ri(UseAdd: true, RetVT: VT, LHSReg: Op0, Imm);
1549
1550	if (ResultReg)
1551	return ResultReg;
1552
1553	unsigned CReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm);
1554	if (!CReg)
1555	return `0`;
1556
1557	ResultReg = emitAddSub_rr(UseAdd: true, RetVT: VT, LHSReg: Op0, RHSReg: CReg);
1558	return ResultReg;
1559	}
1560
1561	unsigned AArch64FastISel::emitSub(MVT RetVT, const Value LHS, const* Value *RHS,
1562	bool SetFlags, bool WantResult, bool IsZExt) {
1563	return emitAddSub(/UseAdd=/false, RetVT, LHS, RHS, SetFlags, WantResult,
1564	IsZExt);
1565	}
1566
1567	unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1568	unsigned RHSReg, bool WantResult) {
1569	return emitAddSub_rr(/UseAdd=/false, RetVT, LHSReg, RHSReg,
1570	/SetFlags=/true, WantResult);
1571	}
1572
1573	unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1574	unsigned RHSReg,
1575	AArch64_AM::ShiftExtendType ShiftType,
1576	uint64_t ShiftImm, bool WantResult) {
1577	return emitAddSub_rs(/UseAdd=/false, RetVT, LHSReg, RHSReg, ShiftType,
1578	ShiftImm, /SetFlags=/true, WantResult);
1579	}
1580
1581	unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1582	const Value LHS, const* Value *RHS) {
1583	// Canonicalize immediates to the RHS first.
1584	if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS))
1585	std::swap(a&: LHS, b&: RHS);
1586
1587	// Canonicalize mul by power-of-2 to the RHS.
1588	if (LHS->hasOneUse() && isValueAvailable(V: LHS))
1589	if (isMulPowOf2(I: LHS))
1590	std::swap(a&: LHS, b&: RHS);
1591
1592	// Canonicalize shift immediate to the RHS.
1593	if (LHS->hasOneUse() && isValueAvailable(V: LHS))
1594	if (const auto *SI = dyn_cast<ShlOperator>(Val: LHS))
1595	if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: `1`)))
1596	std::swap(a&: LHS, b&: RHS);
1597
1598	Register LHSReg = getRegForValue(V: LHS);
1599	if (!LHSReg)
1600	return `0`;
1601
1602	unsigned ResultReg = `0`;
1603	if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) {
1604	uint64_t Imm = C->getZExtValue();
1605	ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1606	}
1607	if (ResultReg)
1608	return ResultReg;
1609
1610	// Check if the mul can be folded into the instruction.
1611	if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1612	if (isMulPowOf2(I: RHS)) {
1613	const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: `0`);
1614	const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: `1`);
1615
1616	if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS))
1617	if (C->getValue().isPowerOf2())
1618	std::swap(a&: MulLHS, b&: MulRHS);
1619
1620	assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1621	uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2();
1622
1623	Register RHSReg = getRegForValue(V: MulLHS);
1624	if (!RHSReg)
1625	return `0`;
1626	ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal);
1627	if (ResultReg)
1628	return ResultReg;
1629	}
1630	}
1631
1632	// Check if the shift can be folded into the instruction.
1633	if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1634	if (const auto *SI = dyn_cast<ShlOperator>(Val: RHS))
1635	if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: `1`))) {
1636	uint64_t ShiftVal = C->getZExtValue();
1637	Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: `0`));
1638	if (!RHSReg)
1639	return `0`;
1640	ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal);
1641	if (ResultReg)
1642	return ResultReg;
1643	}
1644	}
1645
1646	Register RHSReg = getRegForValue(V: RHS);
1647	if (!RHSReg)
1648	return `0`;
1649
1650	MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1651	ResultReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISDOpc, Op0: LHSReg, Op1: RHSReg);
1652	if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1653	uint64_t Mask = (RetVT == MVT::i8) ? `0xff` : `0xffff`;
1654	ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1655	}
1656	return ResultReg;
1657	}
1658
1659	unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1660	unsigned LHSReg, uint64_t Imm) {
1661	static_assert((ISD::AND + `1` == ISD::OR) && (ISD::AND + `2` == ISD::XOR),
1662	"ISD nodes are not consecutive!");
1663	static const unsigned OpcTable[`3`][`2`] = {
1664	{ AArch64::ANDWri, AArch64::ANDXri },
1665	{ AArch64::ORRWri, AArch64::ORRXri },
1666	{ AArch64::EORWri, AArch64::EORXri }
1667	};
1668	const TargetRegisterClass *RC;
1669	unsigned Opc;
1670	unsigned RegSize;
1671	switch (RetVT.SimpleTy) {
1672	default:
1673	return `0`;
1674	case MVT::i1:
1675	case MVT::i8:
1676	case MVT::i16:
1677	case MVT::i32: {
1678	unsigned Idx = ISDOpc - ISD::AND;
1679	Opc = OpcTable[Idx][`0`];
1680	RC = &AArch64::GPR32spRegClass;
1681	RegSize = `32`;
1682	break;
1683	}
1684	case MVT::i64:
1685	Opc = OpcTable[ISDOpc - ISD::AND][`1`];
1686	RC = &AArch64::GPR64spRegClass;
1687	RegSize = `64`;
1688	break;
1689	}
1690
1691	if (!AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize))
1692	return `0`;
1693
1694	Register ResultReg =
1695	fastEmitInst_ri(MachineInstOpcode: Opc, RC, Op0: LHSReg,
1696	Imm: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
1697	if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698	uint64_t Mask = (RetVT == MVT::i8) ? `0xff` : `0xffff`;
1699	ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1700	}
1701	return ResultReg;
1702	}
1703
1704	unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705	unsigned LHSReg, unsigned RHSReg,
1706	uint64_t ShiftImm) {
1707	static_assert((ISD::AND + `1` == ISD::OR) && (ISD::AND + `2` == ISD::XOR),
1708	"ISD nodes are not consecutive!");
1709	static const unsigned OpcTable[`3`][`2`] = {
1710	{ AArch64::ANDWrs, AArch64::ANDXrs },
1711	{ AArch64::ORRWrs, AArch64::ORRXrs },
1712	{ AArch64::EORWrs, AArch64::EORXrs }
1713	};
1714
1715	// Don't deal with undefined shifts.
1716	if (ShiftImm >= RetVT.getSizeInBits())
1717	return `0`;
1718
1719	const TargetRegisterClass *RC;
1720	unsigned Opc;
1721	switch (RetVT.SimpleTy) {
1722	default:
1723	return `0`;
1724	case MVT::i1:
1725	case MVT::i8:
1726	case MVT::i16:
1727	case MVT::i32:
1728	Opc = OpcTable[ISDOpc - ISD::AND][`0`];
1729	RC = &AArch64::GPR32RegClass;
1730	break;
1731	case MVT::i64:
1732	Opc = OpcTable[ISDOpc - ISD::AND][`1`];
1733	RC = &AArch64::GPR64RegClass;
1734	break;
1735	}
1736	Register ResultReg =
1737	fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: LHSReg, Op1: RHSReg,
1738	Imm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm));
1739	if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740	uint64_t Mask = (RetVT == MVT::i8) ? `0xff` : `0xffff`;
1741	ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1742	}
1743	return ResultReg;
1744	}
1745
1746	unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1747	uint64_t Imm) {
1748	return emitLogicalOp_ri(ISDOpc: ISD::AND, RetVT, LHSReg, Imm);
1749	}
1750
1751	unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752	bool WantZExt, MachineMemOperand *MMO) {
1753	if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754	return `0`;
1755
1756	// Simplify this down to something we can handle.
1757	if (!simplifyAddress(Addr, VT))
1758	return `0`;
1759
1760	unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761	if (!ScaleFactor)
1762	llvm_unreachable("Unexpected value type.");
1763
1764	// Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765	// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766	bool UseScaled = true;
1767	if ((Addr.getOffset() < `0`) \|\| (Addr.getOffset() & (ScaleFactor - `1`))) {
1768	UseScaled = false;
1769	ScaleFactor = `1`;
1770	}
1771
1772	static const unsigned GPOpcTable[`2`][`8`][`4`] = {
1773	// Sign-extend.
1774	{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1775	AArch64::LDURXi },
1776	{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1777	AArch64::LDURXi },
1778	{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1779	AArch64::LDRXui },
1780	{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1781	AArch64::LDRXui },
1782	{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783	AArch64::LDRXroX },
1784	{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785	AArch64::LDRXroX },
1786	{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787	AArch64::LDRXroW },
1788	{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789	AArch64::LDRXroW }
1790	},
1791	// Zero-extend.
1792	{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1793	AArch64::LDURXi },
1794	{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1795	AArch64::LDURXi },
1796	{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1797	AArch64::LDRXui },
1798	{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1799	AArch64::LDRXui },
1800	{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1801	AArch64::LDRXroX },
1802	{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1803	AArch64::LDRXroX },
1804	{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1805	AArch64::LDRXroW },
1806	{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1807	AArch64::LDRXroW }
1808	}
1809	};
1810
1811	static const unsigned FPOpcTable[`4`][`2`] = {
1812	{ AArch64::LDURSi, AArch64::LDURDi },
1813	{ AArch64::LDRSui, AArch64::LDRDui },
1814	{ AArch64::LDRSroX, AArch64::LDRDroX },
1815	{ AArch64::LDRSroW, AArch64::LDRDroW }
1816	};
1817
1818	unsigned Opc;
1819	const TargetRegisterClass *RC;
1820	bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821	Addr.getOffsetReg();
1822	unsigned Idx = UseRegOffset ? `2` : UseScaled ? `1` : `0`;
1823	if (Addr.getExtendType() == AArch64_AM::UXTW \|\|
1824	Addr.getExtendType() == AArch64_AM::SXTW)
1825	Idx++;
1826
1827	bool IsRet64Bit = RetVT == MVT::i64;
1828	switch (VT.SimpleTy) {
1829	default:
1830	llvm_unreachable("Unexpected value type.");
1831	case MVT::i1: // Intentional fall-through.
1832	case MVT::i8:
1833	Opc = GPOpcTable[WantZExt][`2` * Idx + IsRet64Bit][`0`];
1834	RC = (IsRet64Bit && !WantZExt) ?
1835	&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836	break;
1837	case MVT::i16:
1838	Opc = GPOpcTable[WantZExt][`2` * Idx + IsRet64Bit][`1`];
1839	RC = (IsRet64Bit && !WantZExt) ?
1840	&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841	break;
1842	case MVT::i32:
1843	Opc = GPOpcTable[WantZExt][`2` * Idx + IsRet64Bit][`2`];
1844	RC = (IsRet64Bit && !WantZExt) ?
1845	&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846	break;
1847	case MVT::i64:
1848	Opc = GPOpcTable[WantZExt][`2` * Idx + IsRet64Bit][`3`];
1849	RC = &AArch64::GPR64RegClass;
1850	break;
1851	case MVT::f32:
1852	Opc = FPOpcTable[Idx][`0`];
1853	RC = &AArch64::FPR32RegClass;
1854	break;
1855	case MVT::f64:
1856	Opc = FPOpcTable[Idx][`1`];
1857	RC = &AArch64::FPR64RegClass;
1858	break;
1859	}
1860
1861	// Create the base instruction, then add the operands.
1862	Register ResultReg = createResultReg(RC);
1863	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1864	MCID: TII.get(Opcode: Opc), DestReg: ResultReg);
1865	addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
1867	// Loading an i1 requires special handling.
1868	if (VT == MVT::i1) {
1869	unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, `1`);
1870	assert(ANDReg && "Unexpected AND instruction emission failure.");
1871	ResultReg = ANDReg;
1872	}
1873
1874	// For zero-extending loads to 64bit we emit a 32bit load and then convert
1875	// the 32bit reg to a 64bit reg.
1876	if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877	Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1879	TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880	.addImm(`0`)
1881	.addReg(ResultReg, getKillRegState(true))
1882	.addImm(AArch64::sub_32);
1883	ResultReg = Reg64;
1884	}
1885	return ResultReg;
1886	}
1887
1888	bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889	MVT VT;
1890	if (!isTypeSupported(Ty: I->getType(), VT, /IsVectorAllowed=/true))
1891	return false;
1892
1893	if (VT.isVector())
1894	return selectOperator(I, Opcode: I->getOpcode());
1895
1896	unsigned ResultReg;
1897	switch (I->getOpcode()) {
1898	default:
1899	llvm_unreachable("Unexpected instruction.");
1900	case Instruction::Add:
1901	ResultReg = emitAdd(RetVT: VT, LHS: I->getOperand(i: `0`), RHS: I->getOperand(i: `1`));
1902	break;
1903	case Instruction::Sub:
1904	ResultReg = emitSub(RetVT: VT, LHS: I->getOperand(i: `0`), RHS: I->getOperand(i: `1`));
1905	break;
1906	}
1907	if (!ResultReg)
1908	return false;
1909
1910	updateValueMap(I, Reg: ResultReg);
1911	return true;
1912	}
1913
1914	bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915	MVT VT;
1916	if (!isTypeSupported(Ty: I->getType(), VT, /IsVectorAllowed=/true))
1917	return false;
1918
1919	if (VT.isVector())
1920	return selectOperator(I, Opcode: I->getOpcode());
1921
1922	unsigned ResultReg;
1923	switch (I->getOpcode()) {
1924	default:
1925	llvm_unreachable("Unexpected instruction.");
1926	case Instruction::And:
1927	ResultReg = emitLogicalOp(ISDOpc: ISD::AND, RetVT: VT, LHS: I->getOperand(i: `0`), RHS: I->getOperand(i: `1`));
1928	break;
1929	case Instruction::Or:
1930	ResultReg = emitLogicalOp(ISDOpc: ISD::OR, RetVT: VT, LHS: I->getOperand(i: `0`), RHS: I->getOperand(i: `1`));
1931	break;
1932	case Instruction::Xor:
1933	ResultReg = emitLogicalOp(ISDOpc: ISD::XOR, RetVT: VT, LHS: I->getOperand(i: `0`), RHS: I->getOperand(i: `1`));
1934	break;
1935	}
1936	if (!ResultReg)
1937	return false;
1938
1939	updateValueMap(I, Reg: ResultReg);
1940	return true;
1941	}
1942
1943	bool AArch64FastISel::selectLoad(const Instruction *I) {
1944	MVT VT;
1945	// Verify we have a legal type before going any further. Currently, we handle
1946	// simple types that will directly fit in a register (i32/f32/i64/f64) or
1947	// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948	if (!isTypeSupported(Ty: I->getType(), VT, /IsVectorAllowed=/true) \|\|
1949	cast<LoadInst>(Val: I)->isAtomic())
1950	return false;
1951
1952	const Value *SV = I->getOperand(i: `0`);
1953	if (TLI.supportSwiftError()) {
1954	// Swifterror values can come from either a function parameter with
1955	// swifterror attribute or an alloca with swifterror attribute.
1956	if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
1957	if (Arg->hasSwiftErrorAttr())
1958	return false;
1959	}
1960
1961	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
1962	if (Alloca->isSwiftError())
1963	return false;
1964	}
1965	}
1966
1967	// See if we can handle this address.
1968	Address Addr;
1969	if (!computeAddress(Obj: I->getOperand(i: `0`), Addr, Ty: I->getType()))
1970	return false;
1971
1972	// Fold the following sign-/zero-extend into the load instruction.
1973	bool WantZExt = true;
1974	MVT RetVT = VT;
1975	const Value IntExtVal = nullptr*;
1976	if (I->hasOneUse()) {
1977	if (const auto *ZE = dyn_cast<ZExtInst>(Val: I->use_begin()->getUser())) {
1978	if (isTypeSupported(Ty: ZE->getType(), VT&: RetVT))
1979	IntExtVal = ZE;
1980	else
1981	RetVT = VT;
1982	} else if (const auto *SE = dyn_cast<SExtInst>(Val: I->use_begin()->getUser())) {
1983	if (isTypeSupported(Ty: SE->getType(), VT&: RetVT))
1984	IntExtVal = SE;
1985	else
1986	RetVT = VT;
1987	WantZExt = false;
1988	}
1989	}
1990
1991	unsigned ResultReg =
1992	emitLoad(VT, RetVT, Addr, WantZExt, MMO: createMachineMemOperandFor(I));
1993	if (!ResultReg)
1994	return false;
1995
1996	// There are a few different cases we have to handle, because the load or the
1997	// sign-/zero-extend might not be selected by FastISel if we fall-back to
1998	// SelectionDAG. There is also an ordering issue when both instructions are in
1999	// different basic blocks.
2000	// 1.) The load instruction is selected by FastISel, but the integer extend
2001	// not. This usually happens when the integer extend is in a different
2002	// basic block and SelectionDAG took over for that basic block.
2003	// 2.) The load instruction is selected before the integer extend. This only
2004	// happens when the integer extend is in a different basic block.
2005	// 3.) The load instruction is selected by SelectionDAG and the integer extend
2006	// by FastISel. This happens if there are instructions between the load
2007	// and the integer extend that couldn't be selected by FastISel.
2008	if (IntExtVal) {
2009	// The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010	// could select it. Emit a copy to subreg if necessary. FastISel will remove
2011	// it when it selects the integer extend.
2012	Register Reg = lookUpRegForValue(V: IntExtVal);
2013	auto *MI = MRI.getUniqueVRegDef(Reg);
2014	if (!MI) {
2015	if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016	if (WantZExt) {
2017	// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018	MachineBasicBlock::iterator I(std::prev(x: FuncInfo.InsertPt));
2019	ResultReg = std::prev(x: I)->getOperand(i: `0`).getReg();
2020	removeDeadCode(I, E: std::next(x: I));
2021	} else
2022	ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023	AArch64::sub_32);
2024	}
2025	updateValueMap(I, Reg: ResultReg);
2026	return true;
2027	}
2028
2029	// The integer extend has already been emitted - delete all the instructions
2030	// that have been emitted by the integer extend lowering code and use the
2031	// result from the load instruction directly.
2032	while (MI) {
2033	Reg = `0`;
2034	for (auto &Opnd : MI->uses()) {
2035	if (Opnd.isReg()) {
2036	Reg = Opnd.getReg();
2037	break;
2038	}
2039	}
2040	MachineBasicBlock::iterator I(MI);
2041	removeDeadCode(I, E: std::next(x: I));
2042	MI = nullptr;
2043	if (Reg)
2044	MI = MRI.getUniqueVRegDef(Reg);
2045	}
2046	updateValueMap(I: IntExtVal, Reg: ResultReg);
2047	return true;
2048	}
2049
2050	updateValueMap(I, Reg: ResultReg);
2051	return true;
2052	}
2053
2054	bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055	unsigned AddrReg,
2056	MachineMemOperand *MMO) {
2057	unsigned Opc;
2058	switch (VT.SimpleTy) {
2059	default: return false;
2060	case MVT::i8: Opc = AArch64::STLRB; break;
2061	case MVT::i16: Opc = AArch64::STLRH; break;
2062	case MVT::i32: Opc = AArch64::STLRW; break;
2063	case MVT::i64: Opc = AArch64::STLRX; break;
2064	}
2065
2066	const MCInstrDesc &II = TII.get(Opcode: Opc);
2067	SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: `0`);
2068	AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: `1`);
2069	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
2070	.addReg(RegNo: SrcReg)
2071	.addReg(RegNo: AddrReg)
2072	.addMemOperand(MMO);
2073	return true;
2074	}
2075
2076	bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077	MachineMemOperand *MMO) {
2078	if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079	return false;
2080
2081	// Simplify this down to something we can handle.
2082	if (!simplifyAddress(Addr, VT))
2083	return false;
2084
2085	unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086	if (!ScaleFactor)
2087	llvm_unreachable("Unexpected value type.");
2088
2089	// Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090	// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091	bool UseScaled = true;
2092	if ((Addr.getOffset() < `0`) \|\| (Addr.getOffset() & (ScaleFactor - `1`))) {
2093	UseScaled = false;
2094	ScaleFactor = `1`;
2095	}
2096
2097	static const unsigned OpcTable[`4`][`6`] = {
2098	{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099	AArch64::STURSi, AArch64::STURDi },
2100	{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101	AArch64::STRSui, AArch64::STRDui },
2102	{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103	AArch64::STRSroX, AArch64::STRDroX },
2104	{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105	AArch64::STRSroW, AArch64::STRDroW }
2106	};
2107
2108	unsigned Opc;
2109	bool VTIsi1 = false;
2110	bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111	Addr.getOffsetReg();
2112	unsigned Idx = UseRegOffset ? `2` : UseScaled ? `1` : `0`;
2113	if (Addr.getExtendType() == AArch64_AM::UXTW \|\|
2114	Addr.getExtendType() == AArch64_AM::SXTW)
2115	Idx++;
2116
2117	switch (VT.SimpleTy) {
2118	default: llvm_unreachable("Unexpected value type.");
2119	case MVT::i1: VTIsi1 = true; [[fallthrough]];
2120	case MVT::i8: Opc = OpcTable[Idx][`0`]; break;
2121	case MVT::i16: Opc = OpcTable[Idx][`1`]; break;
2122	case MVT::i32: Opc = OpcTable[Idx][`2`]; break;
2123	case MVT::i64: Opc = OpcTable[Idx][`3`]; break;
2124	case MVT::f32: Opc = OpcTable[Idx][`4`]; break;
2125	case MVT::f64: Opc = OpcTable[Idx][`5`]; break;
2126	}
2127
2128	// Storing an i1 requires special handling.
2129	if (VTIsi1 && SrcReg != AArch64::WZR) {
2130	unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, `1`);
2131	assert(ANDReg && "Unexpected AND instruction emission failure.");
2132	SrcReg = ANDReg;
2133	}
2134	// Create the base instruction, then add the operands.
2135	const MCInstrDesc &II = TII.get(Opcode: Opc);
2136	SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs());
2137	MachineInstrBuilder MIB =
2138	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: SrcReg);
2139	addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141	return true;
2142	}
2143
2144	bool AArch64FastISel::selectStore(const Instruction *I) {
2145	MVT VT;
2146	const Value *Op0 = I->getOperand(i: `0`);
2147	// Verify we have a legal type before going any further. Currently, we handle
2148	// simple types that will directly fit in a register (i32/f32/i64/f64) or
2149	// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150	if (!isTypeSupported(Ty: Op0->getType(), VT, /IsVectorAllowed=/true))
2151	return false;
2152
2153	const Value *PtrV = I->getOperand(i: `1`);
2154	if (TLI.supportSwiftError()) {
2155	// Swifterror values can come from either a function parameter with
2156	// swifterror attribute or an alloca with swifterror attribute.
2157	if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
2158	if (Arg->hasSwiftErrorAttr())
2159	return false;
2160	}
2161
2162	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
2163	if (Alloca->isSwiftError())
2164	return false;
2165	}
2166	}
2167
2168	// Get the value to be stored into a register. Use the zero register directly
2169	// when possible to avoid an unnecessary copy and a wasted register.
2170	unsigned SrcReg = `0`;
2171	if (const auto *CI = dyn_cast<ConstantInt>(Val: Op0)) {
2172	if (CI->isZero())
2173	SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174	} else if (const auto *CF = dyn_cast<ConstantFP>(Val: Op0)) {
2175	if (CF->isZero() && !CF->isNegative()) {
2176	VT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
2177	SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178	}
2179	}
2180
2181	if (!SrcReg)
2182	SrcReg = getRegForValue(V: Op0);
2183
2184	if (!SrcReg)
2185	return false;
2186
2187	auto *SI = cast<StoreInst>(Val: I);
2188
2189	// Try to emit a STLR for seq_cst/release.
2190	if (SI->isAtomic()) {
2191	AtomicOrdering Ord = SI->getOrdering();
2192	// The non-atomic instructions are sufficient for relaxed stores.
2193	if (isReleaseOrStronger(AO: Ord)) {
2194	// The STLR addressing mode only supports a base reg; pass that directly.
2195	Register AddrReg = getRegForValue(V: PtrV);
2196	return emitStoreRelease(VT, SrcReg, AddrReg,
2197	MMO: createMachineMemOperandFor(I));
2198	}
2199	}
2200
2201	// See if we can handle this address.
2202	Address Addr;
2203	if (!computeAddress(Obj: PtrV, Addr, Ty: Op0->getType()))
2204	return false;
2205
2206	if (!emitStore(VT, SrcReg, Addr, MMO: createMachineMemOperandFor(I)))
2207	return false;
2208	return true;
2209	}
2210
2211	static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2212	switch (Pred) {
2213	case CmpInst::FCMP_ONE:
2214	case CmpInst::FCMP_UEQ:
2215	default:
2216	// AL is our "false" for now. The other two need more compares.
2217	return AArch64CC::AL;
2218	case CmpInst::ICMP_EQ:
2219	case CmpInst::FCMP_OEQ:
2220	return AArch64CC::EQ;
2221	case CmpInst::ICMP_SGT:
2222	case CmpInst::FCMP_OGT:
2223	return AArch64CC::GT;
2224	case CmpInst::ICMP_SGE:
2225	case CmpInst::FCMP_OGE:
2226	return AArch64CC::GE;
2227	case CmpInst::ICMP_UGT:
2228	case CmpInst::FCMP_UGT:
2229	return AArch64CC::HI;
2230	case CmpInst::FCMP_OLT:
2231	return AArch64CC::MI;
2232	case CmpInst::ICMP_ULE:
2233	case CmpInst::FCMP_OLE:
2234	return AArch64CC::LS;
2235	case CmpInst::FCMP_ORD:
2236	return AArch64CC::VC;
2237	case CmpInst::FCMP_UNO:
2238	return AArch64CC::VS;
2239	case CmpInst::FCMP_UGE:
2240	return AArch64CC::PL;
2241	case CmpInst::ICMP_SLT:
2242	case CmpInst::FCMP_ULT:
2243	return AArch64CC::LT;
2244	case CmpInst::ICMP_SLE:
2245	case CmpInst::FCMP_ULE:
2246	return AArch64CC::LE;
2247	case CmpInst::FCMP_UNE:
2248	case CmpInst::ICMP_NE:
2249	return AArch64CC::NE;
2250	case CmpInst::ICMP_UGE:
2251	return AArch64CC::HS;
2252	case CmpInst::ICMP_ULT:
2253	return AArch64CC::LO;
2254	}
2255	}
2256
2257	/// Try to emit a combined compare-and-branch instruction.
2258	bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2260	// will not be produced, as they are conditional branch instructions that do
2261	// not set flags.
2262	if (FuncInfo.MF->getFunction().hasFnAttribute(
2263	Attribute::SpeculativeLoadHardening))
2264	return false;
2265
2266	assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2267	const CmpInst *CI = cast<CmpInst>(Val: BI->getCondition());
2268	CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2269
2270	const Value *LHS = CI->getOperand(i_nocapture: `0`);
2271	const Value *RHS = CI->getOperand(i_nocapture: `1`);
2272
2273	MVT VT;
2274	if (!isTypeSupported(Ty: LHS->getType(), VT))
2275	return false;
2276
2277	unsigned BW = VT.getSizeInBits();
2278	if (BW > `64`)
2279	return false;
2280
2281	MachineBasicBlock *TBB = FuncInfo.MBBMap [BI->getSuccessor(i: `0`)];
2282	MachineBasicBlock *FBB = FuncInfo.MBBMap [BI->getSuccessor(i: `1`)];
2283
2284	// Try to take advantage of fallthrough opportunities.
2285	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2286	std::swap(a&: TBB, b&: FBB);
2287	Predicate = CmpInst::getInversePredicate(pred: Predicate);
2288	}
2289
2290	int TestBit = -`1`;
2291	bool IsCmpNE;
2292	switch (Predicate) {
2293	default:
2294	return false;
2295	case CmpInst::ICMP_EQ:
2296	case CmpInst::ICMP_NE:
2297	if (isa<Constant>(Val: LHS) && cast<Constant>(Val: LHS)->isNullValue())
2298	std::swap(a&: LHS, b&: RHS);
2299
2300	if (!isa<Constant>(Val: RHS) \|\| !cast<Constant>(Val: RHS)->isNullValue())
2301	return false;
2302
2303	if (const auto *AI = dyn_cast<BinaryOperator>(Val: LHS))
2304	if (AI->getOpcode() == Instruction::And && isValueAvailable(V: AI)) {
2305	const Value *AndLHS = AI->getOperand(i_nocapture: `0`);
2306	const Value *AndRHS = AI->getOperand(i_nocapture: `1`);
2307
2308	if (const auto *C = dyn_cast<ConstantInt>(Val: AndLHS))
2309	if (C->getValue().isPowerOf2())
2310	std::swap(a&: AndLHS, b&: AndRHS);
2311
2312	if (const auto *C = dyn_cast<ConstantInt>(Val: AndRHS))
2313	if (C->getValue().isPowerOf2()) {
2314	TestBit = C->getValue().logBase2();
2315	LHS = AndLHS;
2316	}
2317	}
2318
2319	if (VT == MVT::i1)
2320	TestBit = `0`;
2321
2322	IsCmpNE = Predicate == CmpInst::ICMP_NE;
2323	break;
2324	case CmpInst::ICMP_SLT:
2325	case CmpInst::ICMP_SGE:
2326	if (!isa<Constant>(Val: RHS) \|\| !cast<Constant>(Val: RHS)->isNullValue())
2327	return false;
2328
2329	TestBit = BW - `1`;
2330	IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2331	break;
2332	case CmpInst::ICMP_SGT:
2333	case CmpInst::ICMP_SLE:
2334	if (!isa<ConstantInt>(Val: RHS))
2335	return false;
2336
2337	if (cast<ConstantInt>(Val: RHS)->getValue() != APInt (BW, -`1`, true))
2338	return false;
2339
2340	TestBit = BW - `1`;
2341	IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2342	break;
2343	} // end switch
2344
2345	static const unsigned OpcTable[`2`][`2`][`2`] = {
2346	{ {AArch64::CBZW, AArch64::CBZX },
2347	{AArch64::CBNZW, AArch64::CBNZX} },
2348	{ {AArch64::TBZW, AArch64::TBZX },
2349	{AArch64::TBNZW, AArch64::TBNZX} }
2350	};
2351
2352	bool IsBitTest = TestBit != -`1`;
2353	bool Is64Bit = BW == `64`;
2354	if (TestBit < `32` && TestBit >= `0`)
2355	Is64Bit = false;
2356
2357	unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2358	const MCInstrDesc &II = TII.get(Opcode: Opc);
2359
2360	Register SrcReg = getRegForValue(V: LHS);
2361	if (!SrcReg)
2362	return false;
2363
2364	if (BW == `64` && !Is64Bit)
2365	SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2366
2367	if ((BW < `32`) && !IsBitTest)
2368	SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /isZExt=/true);
2369
2370	// Emit the combined compare and branch instruction.
2371	SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs());
2372	MachineInstrBuilder MIB =
2373	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
2374	.addReg(RegNo: SrcReg);
2375	if (IsBitTest)
2376	MIB.addImm(Val: TestBit);
2377	MIB.addMBB(MBB: TBB);
2378
2379	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2380	return true;
2381	}
2382
2383	bool AArch64FastISel::selectBranch(const Instruction *I) {
2384	const BranchInst *BI = cast<BranchInst>(Val: I);
2385	if (BI->isUnconditional()) {
2386	MachineBasicBlock *MSucc = FuncInfo.MBBMap [BI->getSuccessor(i: `0`)];
2387	fastEmitBranch(MSucc, DbgLoc: BI->getDebugLoc());
2388	return true;
2389	}
2390
2391	MachineBasicBlock *TBB = FuncInfo.MBBMap [BI->getSuccessor(i: `0`)];
2392	MachineBasicBlock *FBB = FuncInfo.MBBMap [BI->getSuccessor(i: `1`)];
2393
2394	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
2395	if (CI->hasOneUse() && isValueAvailable(V: CI)) {
2396	// Try to optimize or fold the cmp.
2397	CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398	switch (Predicate) {
2399	default:
2400	break;
2401	case CmpInst::FCMP_FALSE:
2402	fastEmitBranch(MSucc: FBB, DbgLoc: MIMD.getDL());
2403	return true;
2404	case CmpInst::FCMP_TRUE:
2405	fastEmitBranch(MSucc: TBB, DbgLoc: MIMD.getDL());
2406	return true;
2407	}
2408
2409	// Try to emit a combined compare-and-branch first.
2410	if (emitCompareAndBranch(BI))
2411	return true;
2412
2413	// Try to take advantage of fallthrough opportunities.
2414	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2415	std::swap(a&: TBB, b&: FBB);
2416	Predicate = CmpInst::getInversePredicate(pred: Predicate);
2417	}
2418
2419	// Emit the cmp.
2420	if (!emitCmp(LHS: CI->getOperand(i_nocapture: `0`), RHS: CI->getOperand(i_nocapture: `1`), IsZExt: CI->isUnsigned()))
2421	return false;
2422
2423	// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424	// instruction.
2425	AArch64CC::CondCode CC = getCompareCC(Pred: Predicate);
2426	AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2427	switch (Predicate) {
2428	default:
2429	break;
2430	case CmpInst::FCMP_UEQ:
2431	ExtraCC = AArch64CC::EQ;
2432	CC = AArch64CC::VS;
2433	break;
2434	case CmpInst::FCMP_ONE:
2435	ExtraCC = AArch64CC::MI;
2436	CC = AArch64CC::GT;
2437	break;
2438	}
2439	assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440
2441	// Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442	if (ExtraCC != AArch64CC::AL) {
2443	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2444	.addImm(ExtraCC)
2445	.addMBB(TBB);
2446	}
2447
2448	// Emit the branch.
2449	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450	.addImm(CC)
2451	.addMBB(TBB);
2452
2453	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2454	return true;
2455	}
2456	} else if (const auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
2457	uint64_t Imm = CI->getZExtValue();
2458	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
2459	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2460	.addMBB(Target);
2461
2462	// Obtain the branch probability and add the target to the successor list.
2463	if (FuncInfo.BPI) {
2464	auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465	Src: BI->getParent(), Dst: Target->getBasicBlock());
2466	FuncInfo.MBB->addSuccessor(Succ: Target, Prob: BranchProbability);
2467	} else
2468	FuncInfo.MBB->addSuccessorWithoutProb(Succ: Target);
2469	return true;
2470	} else {
2471	AArch64CC::CondCode CC = AArch64CC::NE;
2472	if (foldXALUIntrinsic(CC, I, Cond: BI->getCondition())) {
2473	// Fake request the condition, otherwise the intrinsic might be completely
2474	// optimized away.
2475	Register CondReg = getRegForValue(V: BI->getCondition());
2476	if (!CondReg)
2477	return false;
2478
2479	// Emit the branch.
2480	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2481	.addImm(CC)
2482	.addMBB(TBB);
2483
2484	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2485	return true;
2486	}
2487	}
2488
2489	Register CondReg = getRegForValue(V: BI->getCondition());
2490	if (CondReg == `0`)
2491	return false;
2492
2493	// i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2494	unsigned Opcode = AArch64::TBNZW;
2495	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2496	std::swap(a&: TBB, b&: FBB);
2497	Opcode = AArch64::TBZW;
2498	}
2499
2500	const MCInstrDesc &II = TII.get(Opcode);
2501	Register ConstrainedCondReg
2502	= constrainOperandRegClass(II, Op: CondReg, OpNum: II.getNumDefs());
2503	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
2504	.addReg(RegNo: ConstrainedCondReg)
2505	.addImm(Val: `0`)
2506	.addMBB(MBB: TBB);
2507
2508	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2509	return true;
2510	}
2511
2512	bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2513	const IndirectBrInst *BI = cast<IndirectBrInst>(Val: I);
2514	Register AddrReg = getRegForValue(V: BI->getOperand(i_nocapture: `0`));
2515	if (AddrReg == `0`)
2516	return false;
2517
2518	// Emit the indirect branch.
2519	const MCInstrDesc &II = TII.get(AArch64::BR);
2520	AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs());
2521	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: AddrReg);
2522
2523	// Make sure the CFG is up-to-date.
2524	for (const auto *Succ : BI->successors())
2525	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.MBBMap [Succ]);
2526
2527	return true;
2528	}
2529
2530	bool AArch64FastISel::selectCmp(const Instruction *I) {
2531	const CmpInst *CI = cast<CmpInst>(Val: I);
2532
2533	// Vectors of i1 are weird: bail out.
2534	if (CI->getType()->isVectorTy())
2535	return false;
2536
2537	// Try to optimize or fold the cmp.
2538	CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539	unsigned ResultReg = `0`;
2540	switch (Predicate) {
2541	default:
2542	break;
2543	case CmpInst::FCMP_FALSE:
2544	ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546	TII.get(TargetOpcode::COPY), ResultReg)
2547	.addReg(AArch64::WZR, getKillRegState(true));
2548	break;
2549	case CmpInst::FCMP_TRUE:
2550	ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, `1`);
2551	break;
2552	}
2553
2554	if (ResultReg) {
2555	updateValueMap(I, Reg: ResultReg);
2556	return true;
2557	}
2558
2559	// Emit the cmp.
2560	if (!emitCmp(LHS: CI->getOperand(i_nocapture: `0`), RHS: CI->getOperand(i_nocapture: `1`), IsZExt: CI->isUnsigned()))
2561	return false;
2562
2563	ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565	// FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566	// condition codes are inverted, because they are used by CSINC.
2567	static unsigned CondCodeTable[`2`][`2`] = {
2568	{ AArch64CC::NE, AArch64CC::VC },
2569	{ AArch64CC::PL, AArch64CC::LE }
2570	};
2571	unsigned CondCodes = nullptr*;
2572	switch (Predicate) {
2573	default:
2574	break;
2575	case CmpInst::FCMP_UEQ:
2576	CondCodes = &CondCodeTable[`0`][`0`];
2577	break;
2578	case CmpInst::FCMP_ONE:
2579	CondCodes = &CondCodeTable[`1`][`0`];
2580	break;
2581	}
2582
2583	if (CondCodes) {
2584	Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586	TmpReg1)
2587	.addReg(AArch64::WZR, getKillRegState(true))
2588	.addReg(AArch64::WZR, getKillRegState(true))
2589	.addImm(CondCodes[`0`]);
2590	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591	ResultReg)
2592	.addReg(TmpReg1, getKillRegState(true))
2593	.addReg(AArch64::WZR, getKillRegState(true))
2594	.addImm(CondCodes[`1`]);
2595
2596	updateValueMap(I, Reg: ResultReg);
2597	return true;
2598	}
2599
2600	// Now set a register based on the comparison.
2601	AArch64CC::CondCode CC = getCompareCC(Pred: Predicate);
2602	assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603	AArch64CC::CondCode invertedCC = getInvertedCondCode(Code: CC);
2604	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605	ResultReg)
2606	.addReg(AArch64::WZR, getKillRegState(true))
2607	.addReg(AArch64::WZR, getKillRegState(true))
2608	.addImm(invertedCC);
2609
2610	updateValueMap(I, Reg: ResultReg);
2611	return true;
2612	}
2613
2614	/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615	/// value.
2616	bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617	if (!SI->getType()->isIntegerTy(Bitwidth: `1`))
2618	return false;
2619
2620	const Value Src1Val, Src2Val;
2621	unsigned Opc = `0`;
2622	bool NeedExtraOp = false;
2623	if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getTrueValue())) {
2624	if (CI->isOne()) {
2625	Src1Val = SI->getCondition();
2626	Src2Val = SI->getFalseValue();
2627	Opc = AArch64::ORRWrr;
2628	} else {
2629	assert(CI->isZero());
2630	Src1Val = SI->getFalseValue();
2631	Src2Val = SI->getCondition();
2632	Opc = AArch64::BICWrr;
2633	}
2634	} else if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getFalseValue())) {
2635	if (CI->isOne()) {
2636	Src1Val = SI->getCondition();
2637	Src2Val = SI->getTrueValue();
2638	Opc = AArch64::ORRWrr;
2639	NeedExtraOp = true;
2640	} else {
2641	assert(CI->isZero());
2642	Src1Val = SI->getCondition();
2643	Src2Val = SI->getTrueValue();
2644	Opc = AArch64::ANDWrr;
2645	}
2646	}
2647
2648	if (!Opc)
2649	return false;
2650
2651	Register Src1Reg = getRegForValue(V: Src1Val);
2652	if (!Src1Reg)
2653	return false;
2654
2655	Register Src2Reg = getRegForValue(V: Src2Val);
2656	if (!Src2Reg)
2657	return false;
2658
2659	if (NeedExtraOp)
2660	Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, `1`);
2661
2662	Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663	Src2Reg);
2664	updateValueMap(I: SI, Reg: ResultReg);
2665	return true;
2666	}
2667
2668	bool AArch64FastISel::selectSelect(const Instruction *I) {
2669	assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670	MVT VT;
2671	if (!isTypeSupported(Ty: I->getType(), VT))
2672	return false;
2673
2674	unsigned Opc;
2675	const TargetRegisterClass *RC;
2676	switch (VT.SimpleTy) {
2677	default:
2678	return false;
2679	case MVT::i1:
2680	case MVT::i8:
2681	case MVT::i16:
2682	case MVT::i32:
2683	Opc = AArch64::CSELWr;
2684	RC = &AArch64::GPR32RegClass;
2685	break;
2686	case MVT::i64:
2687	Opc = AArch64::CSELXr;
2688	RC = &AArch64::GPR64RegClass;
2689	break;
2690	case MVT::f32:
2691	Opc = AArch64::FCSELSrrr;
2692	RC = &AArch64::FPR32RegClass;
2693	break;
2694	case MVT::f64:
2695	Opc = AArch64::FCSELDrrr;
2696	RC = &AArch64::FPR64RegClass;
2697	break;
2698	}
2699
2700	const SelectInst *SI = cast<SelectInst>(Val: I);
2701	const Value *Cond = SI->getCondition();
2702	AArch64CC::CondCode CC = AArch64CC::NE;
2703	AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2704
2705	if (optimizeSelect(SI))
2706	return true;
2707
2708	// Try to pickup the flags, so we don't have to emit another compare.
2709	if (foldXALUIntrinsic(CC, I, Cond)) {
2710	// Fake request the condition to force emission of the XALU intrinsic.
2711	Register CondReg = getRegForValue(V: Cond);
2712	if (!CondReg)
2713	return false;
2714	} else if (isa<CmpInst>(Val: Cond) && cast<CmpInst>(Val: Cond)->hasOneUse() &&
2715	isValueAvailable(V: Cond)) {
2716	const auto *Cmp = cast<CmpInst>(Val: Cond);
2717	// Try to optimize or fold the cmp.
2718	CmpInst::Predicate Predicate = optimizeCmpPredicate(CI: Cmp);
2719	const Value FoldSelect = nullptr*;
2720	switch (Predicate) {
2721	default:
2722	break;
2723	case CmpInst::FCMP_FALSE:
2724	FoldSelect = SI->getFalseValue();
2725	break;
2726	case CmpInst::FCMP_TRUE:
2727	FoldSelect = SI->getTrueValue();
2728	break;
2729	}
2730
2731	if (FoldSelect) {
2732	Register SrcReg = getRegForValue(V: FoldSelect);
2733	if (!SrcReg)
2734	return false;
2735
2736	updateValueMap(I, Reg: SrcReg);
2737	return true;
2738	}
2739
2740	// Emit the cmp.
2741	if (!emitCmp(LHS: Cmp->getOperand(i_nocapture: `0`), RHS: Cmp->getOperand(i_nocapture: `1`), IsZExt: Cmp->isUnsigned()))
2742	return false;
2743
2744	// FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745	CC = getCompareCC(Pred: Predicate);
2746	switch (Predicate) {
2747	default:
2748	break;
2749	case CmpInst::FCMP_UEQ:
2750	ExtraCC = AArch64CC::EQ;
2751	CC = AArch64CC::VS;
2752	break;
2753	case CmpInst::FCMP_ONE:
2754	ExtraCC = AArch64CC::MI;
2755	CC = AArch64CC::GT;
2756	break;
2757	}
2758	assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759	} else {
2760	Register CondReg = getRegForValue(V: Cond);
2761	if (!CondReg)
2762	return false;
2763
2764	const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765	CondReg = constrainOperandRegClass(II, Op: CondReg, OpNum: `1`);
2766
2767	// Emit a TST instruction (ANDS wzr, reg, #imm).
2768	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769	AArch64::WZR)
2770	.addReg(CondReg)
2771	.addImm(AArch64_AM::encodeLogicalImmediate(`1`, `32`));
2772	}
2773
2774	Register Src1Reg = getRegForValue(V: SI->getTrueValue());
2775	Register Src2Reg = getRegForValue(V: SI->getFalseValue());
2776
2777	if (!Src1Reg \|\| !Src2Reg)
2778	return false;
2779
2780	if (ExtraCC != AArch64CC::AL)
2781	Src2Reg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: ExtraCC);
2782
2783	Register ResultReg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: CC);
2784	updateValueMap(I, Reg: ResultReg);
2785	return true;
2786	}
2787
2788	bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789	Value *V = I->getOperand(i: `0`);
2790	if (!I->getType()->isDoubleTy() \|\| !V->getType()->isFloatTy())
2791	return false;
2792
2793	Register Op = getRegForValue(V);
2794	if (Op == `0`)
2795	return false;
2796
2797	Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799	ResultReg).addReg(Op);
2800	updateValueMap(I, Reg: ResultReg);
2801	return true;
2802	}
2803
2804	bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805	Value *V = I->getOperand(i: `0`);
2806	if (!I->getType()->isFloatTy() \|\| !V->getType()->isDoubleTy())
2807	return false;
2808
2809	Register Op = getRegForValue(V);
2810	if (Op == `0`)
2811	return false;
2812
2813	Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815	ResultReg).addReg(Op);
2816	updateValueMap(I, Reg: ResultReg);
2817	return true;
2818	}
2819
2820	// FPToUI and FPToSI
2821	bool AArch64FastISel::selectFPToInt(const Instruction I, bool* Signed) {
2822	MVT DestVT;
2823	if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) \|\| DestVT.isVector())
2824	return false;
2825
2826	Register SrcReg = getRegForValue(V: I->getOperand(i: `0`));
2827	if (SrcReg == `0`)
2828	return false;
2829
2830	EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: `0`)->getType(), AllowUnknown: true);
2831	if (SrcVT == MVT::f128 \|\| SrcVT == MVT::f16 \|\| SrcVT == MVT::bf16)
2832	return false;
2833
2834	unsigned Opc;
2835	if (SrcVT == MVT::f64) {
2836	if (Signed)
2837	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838	else
2839	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840	} else {
2841	if (Signed)
2842	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843	else
2844	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845	}
2846	Register ResultReg = createResultReg(
2847	DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
2849	.addReg(RegNo: SrcReg);
2850	updateValueMap(I, Reg: ResultReg);
2851	return true;
2852	}
2853
2854	bool AArch64FastISel::selectIntToFP(const Instruction I, bool* Signed) {
2855	MVT DestVT;
2856	if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) \|\| DestVT.isVector())
2857	return false;
2858	// Let regular ISEL handle FP16
2859	if (DestVT == MVT::f16 \|\| DestVT == MVT::bf16)
2860	return false;
2861
2862	assert((DestVT == MVT::f32 \|\| DestVT == MVT::f64) &&
2863	"Unexpected value type.");
2864
2865	Register SrcReg = getRegForValue(V: I->getOperand(i: `0`));
2866	if (!SrcReg)
2867	return false;
2868
2869	EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: `0`)->getType(), AllowUnknown: true);
2870
2871	// Handle sign-extension.
2872	if (SrcVT == MVT::i16 \|\| SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
2873	SrcReg =
2874	emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /isZExt/ !Signed);
2875	if (!SrcReg)
2876	return false;
2877	}
2878
2879	unsigned Opc;
2880	if (SrcVT == MVT::i64) {
2881	if (Signed)
2882	Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883	else
2884	Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885	} else {
2886	if (Signed)
2887	Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888	else
2889	Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890	}
2891
2892	Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT: DestVT), Op0: SrcReg);
2893	updateValueMap(I, Reg: ResultReg);
2894	return true;
2895	}
2896
2897	bool AArch64FastISel::fastLowerArguments() {
2898	if (!FuncInfo.CanLowerReturn)
2899	return false;
2900
2901	const Function *F = FuncInfo.Fn;
2902	if (F->isVarArg())
2903	return false;
2904
2905	CallingConv::ID CC = F->getCallingConv();
2906	if (CC != CallingConv::C && CC != CallingConv::Swift)
2907	return false;
2908
2909	if (Subtarget->hasCustomCallingConv())
2910	return false;
2911
2912	// Only handle simple cases of up to 8 GPR and FPR each.
2913	unsigned GPRCnt = `0`;
2914	unsigned FPRCnt = `0`;
2915	for (auto const &Arg : F->args()) {
2916	if (Arg.hasAttribute(Attribute::ByVal) \|\|
2917	Arg.hasAttribute(Attribute::InReg) \|\|
2918	Arg.hasAttribute(Attribute::StructRet) \|\|
2919	Arg.hasAttribute(Attribute::SwiftSelf) \|\|
2920	Arg.hasAttribute(Attribute::SwiftAsync) \|\|
2921	Arg.hasAttribute(Attribute::SwiftError) \|\|
2922	Arg.hasAttribute(Attribute::Nest))
2923	return false;
2924
2925	Type *ArgTy = Arg.getType();
2926	if (ArgTy->isStructTy() \|\| ArgTy->isArrayTy())
2927	return false;
2928
2929	EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy);
2930	if (!ArgVT.isSimple())
2931	return false;
2932
2933	MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934	if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935	return false;
2936
2937	if (VT.isVector() &&
2938	(!Subtarget->hasNEON() \|\| !Subtarget->isLittleEndian()))
2939	return false;
2940
2941	if (VT >= MVT::i1 && VT <= MVT::i64)
2942	++GPRCnt;
2943	else if ((VT >= MVT::f16 && VT <= MVT::f64) \|\| VT.is64BitVector() \|\|
2944	VT.is128BitVector())
2945	++FPRCnt;
2946	else
2947	return false;
2948
2949	if (GPRCnt > `8` \|\| FPRCnt > `8`)
2950	return false;
2951	}
2952
2953	static const MCPhysReg Registers[`6`][`8`] = {
2954	{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955	AArch64::W5, AArch64::W6, AArch64::W7 },
2956	{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957	AArch64::X5, AArch64::X6, AArch64::X7 },
2958	{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959	AArch64::H5, AArch64::H6, AArch64::H7 },
2960	{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961	AArch64::S5, AArch64::S6, AArch64::S7 },
2962	{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963	AArch64::D5, AArch64::D6, AArch64::D7 },
2964	{ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965	AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966	};
2967
2968	unsigned GPRIdx = `0`;
2969	unsigned FPRIdx = `0`;
2970	for (auto const &Arg : F->args()) {
2971	MVT VT = TLI.getSimpleValueType(DL, Ty: Arg.getType());
2972	unsigned SrcReg;
2973	const TargetRegisterClass *RC;
2974	if (VT >= MVT::i1 && VT <= MVT::i32) {
2975	SrcReg = Registers[`0`][GPRIdx++];
2976	RC = &AArch64::GPR32RegClass;
2977	VT = MVT::i32;
2978	} else if (VT == MVT::i64) {
2979	SrcReg = Registers[`1`][GPRIdx++];
2980	RC = &AArch64::GPR64RegClass;
2981	} else if (VT == MVT::f16 \|\| VT == MVT::bf16) {
2982	SrcReg = Registers[`2`][FPRIdx++];
2983	RC = &AArch64::FPR16RegClass;
2984	} else if (VT == MVT::f32) {
2985	SrcReg = Registers[`3`][FPRIdx++];
2986	RC = &AArch64::FPR32RegClass;
2987	} else if ((VT == MVT::f64) \|\| VT.is64BitVector()) {
2988	SrcReg = Registers[`4`][FPRIdx++];
2989	RC = &AArch64::FPR64RegClass;
2990	} else if (VT.is128BitVector()) {
2991	SrcReg = Registers[`5`][FPRIdx++];
2992	RC = &AArch64::FPR128RegClass;
2993	} else
2994	llvm_unreachable("Unexpected value type.");
2995
2996	Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC);
2997	// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998	// Without this, EmitLiveInCopies may eliminate the livein if its only
2999	// use is a bitcast (which isn't turned into an instruction).
3000	Register ResultReg = createResultReg(RC);
3001	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3002	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
3003	.addReg(RegNo: DstReg, flags: getKillRegState(B: true));
3004	updateValueMap(I: &Arg, Reg: ResultReg);
3005	}
3006	return true;
3007	}
3008
3009	bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010	SmallVectorImpl<MVT> &OutVTs,
3011	unsigned &NumBytes) {
3012	CallingConv::ID CC = CLI.CallConv;
3013	SmallVector<CCValAssign, `16`> ArgLocs;
3014	CCState CCInfo(CC, false, FuncInfo.MF, ArgLocs, Context);
3015	CCInfo.AnalyzeCallOperands(ArgVTs&: OutVTs, Flags&: CLI.OutFlags, Fn: CCAssignFnForCall(CC));
3016
3017	// Get a count of how many bytes are to be pushed on the stack.
3018	NumBytes = CCInfo.getStackSize();
3019
3020	// Issue CALLSEQ_START
3021	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3022	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackDown))
3023	.addImm(Val: NumBytes).addImm(Val: `0`);
3024
3025	// Process the args.
3026	for (CCValAssign &VA : ArgLocs) {
3027	const Value *ArgVal = CLI.OutVals [VA.getValNo()];
3028	MVT ArgVT = OutVTs [VA.getValNo()];
3029
3030	Register ArgReg = getRegForValue(V: ArgVal);
3031	if (!ArgReg)
3032	return false;
3033
3034	// Handle arg promotion: SExt, ZExt, AExt.
3035	switch (VA.getLocInfo()) {
3036	case CCValAssign::Full:
3037	break;
3038	case CCValAssign::SExt: {
3039	MVT DestVT = VA.getLocVT();
3040	MVT SrcVT = ArgVT;
3041	ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /isZExt=/false);
3042	if (!ArgReg)
3043	return false;
3044	break;
3045	}
3046	case CCValAssign::AExt:
3047	// Intentional fall-through.
3048	case CCValAssign::ZExt: {
3049	MVT DestVT = VA.getLocVT();
3050	MVT SrcVT = ArgVT;
3051	ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /isZExt=/true);
3052	if (!ArgReg)
3053	return false;
3054	break;
3055	}
3056	default:
3057	llvm_unreachable("Unknown arg promotion!");
3058	}
3059
3060	// Now copy/store arg to correct locations.
3061	if (VA.isRegLoc() && !VA.needsCustom()) {
3062	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3063	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: ArgReg);
3064	CLI.OutRegs.push_back(Elt: VA.getLocReg());
3065	} else if (VA.needsCustom()) {
3066	// FIXME: Handle custom args.
3067	return false;
3068	} else {
3069	assert(VA.isMemLoc() && "Assuming store on stack.");
3070
3071	// Don't emit stores for undef values.
3072	if (isa<UndefValue>(Val: ArgVal))
3073	continue;
3074
3075	// Need to store on the stack.
3076	unsigned ArgSize = (ArgVT.getSizeInBits() + `7`) / `8`;
3077
3078	unsigned BEAlign = `0`;
3079	if (ArgSize < `8` && !Subtarget->isLittleEndian())
3080	BEAlign = `8` - ArgSize;
3081
3082	Address Addr;
3083	Addr.setKind(Address::RegBase);
3084	Addr.setReg(AArch64::SP);
3085	Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3086
3087	Align Alignment = DL.getABITypeAlign(Ty: ArgVal->getType());
3088	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3089	PtrInfo: MachinePointerInfo::getStack(MF&: *FuncInfo.MF, Offset: Addr.getOffset()),
3090	F: MachineMemOperand::MOStore, Size: ArgVT.getStoreSize(), BaseAlignment: Alignment);
3091
3092	if (!emitStore(VT: ArgVT, SrcReg: ArgReg, Addr, MMO))
3093	return false;
3094	}
3095	}
3096	return true;
3097	}
3098
3099	bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3100	CallingConv::ID CC = CLI.CallConv;
3101
3102	// Issue CALLSEQ_END
3103	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3104	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackUp))
3105	.addImm(Val: NumBytes).addImm(Val: `0`);
3106
3107	// Now the return values.
3108	SmallVector<CCValAssign, `16`> RVLocs;
3109	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
3110	CCInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: CCAssignFnForCall(CC));
3111
3112	Register ResultReg = FuncInfo.CreateRegs(Ty: CLI.RetTy);
3113	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
3114	CCValAssign &VA = RVLocs [i];
3115	MVT CopyVT = VA.getValVT();
3116	unsigned CopyReg = ResultReg + i;
3117
3118	// TODO: Handle big-endian results
3119	if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3120	return false;
3121
3122	// Copy result out of their specified physreg.
3123	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY),
3124	DestReg: CopyReg)
3125	.addReg(RegNo: VA.getLocReg());
3126	CLI.InRegs.push_back(Elt: VA.getLocReg());
3127	}
3128
3129	CLI.ResultReg = ResultReg;
3130	CLI.NumResultRegs = RVLocs.size();
3131
3132	return true;
3133	}
3134
3135	bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3136	CallingConv::ID CC = CLI.CallConv;
3137	bool IsTailCall = CLI.IsTailCall;
3138	bool IsVarArg = CLI.IsVarArg;
3139	const Value *Callee = CLI.Callee;
3140	MCSymbol *Symbol = CLI.Symbol;
3141
3142	if (!Callee && !Symbol)
3143	return false;
3144
3145	// Allow SelectionDAG isel to handle calls to functions like setjmp that need
3146	// a bti instruction following the call.
3147	if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3148	!Subtarget->noBTIAtReturnTwice() &&
3149	MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3150	return false;
3151
3152	// Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3153	if (CLI.CB && CLI.CB->isIndirectCall() &&
3154	CLI.CB->getOperandBundle(ID: LLVMContext::OB_kcfi))
3155	return false;
3156
3157	// Allow SelectionDAG isel to handle tail calls.
3158	if (IsTailCall)
3159	return false;
3160
3161	// FIXME: we could and should support this, but for now correctness at -O0 is
3162	// more important.
3163	if (Subtarget->isTargetILP32())
3164	return false;
3165
3166	CodeModel::Model CM = TM.getCodeModel();
3167	// Only support the small-addressing and large code models.
3168	if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3169	return false;
3170
3171	// FIXME: Add large code model support for ELF.
3172	if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3173	return false;
3174
3175	// ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3176	// attribute. Check "RtLibUseGOT" instead.
3177	if (MF->getFunction().getParent()->getRtLibUseGOT())
3178	return false;
3179
3180	// Let SDISel handle vararg functions.
3181	if (IsVarArg)
3182	return false;
3183
3184	if (Subtarget->isWindowsArm64EC())
3185	return false;
3186
3187	for (auto Flag : CLI.OutFlags)
3188	if (Flag.isInReg() \|\| Flag.isSRet() \|\| Flag.isNest() \|\| Flag.isByVal() \|\|
3189	Flag.isSwiftSelf() \|\| Flag.isSwiftAsync() \|\| Flag.isSwiftError())
3190	return false;
3191
3192	// Set up the argument vectors.
3193	SmallVector<MVT, `16`> OutVTs;
3194	OutVTs.reserve(N: CLI.OutVals.size());
3195
3196	for (auto *Val : CLI.OutVals) {
3197	MVT VT;
3198	if (!isTypeLegal(Val->getType(), VT) &&
3199	!(VT == MVT::i1 \|\| VT == MVT::i8 \|\| VT == MVT::i16))
3200	return false;
3201
3202	// We don't handle vector parameters yet.
3203	if (VT.isVector() \|\| VT.getSizeInBits() > `64`)
3204	return false;
3205
3206	OutVTs.push_back(Elt: VT);
3207	}
3208
3209	Address Addr;
3210	if (Callee && !computeCallAddress(V: Callee, Addr))
3211	return false;
3212
3213	// The weak function target may be zero; in that case we must use indirect
3214	// addressing via a stub on windows as it may be out of range for a
3215	// PC-relative jump.
3216	if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3217	Addr.getGlobalValue()->hasExternalWeakLinkage())
3218	return false;
3219
3220	// Handle the arguments now that we've gotten them.
3221	unsigned NumBytes;
3222	if (!processCallArgs(CLI, OutVTs, NumBytes))
3223	return false;
3224
3225	const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3226	if (RegInfo->isAnyArgRegReserved(MF: *MF))
3227	RegInfo->emitReservedArgRegCallError(MF: *MF);
3228
3229	// Issue the call.
3230	MachineInstrBuilder MIB;
3231	if (Subtarget->useSmallAddressing()) {
3232	const MCInstrDesc &II =
3233	TII.get(Addr.getReg() ? getBLRCallOpcode(MF) : (unsigned*)AArch64::BL);
3234	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II);
3235	if (Symbol)
3236	MIB.addSym(Sym: Symbol, TargetFlags: `0`);
3237	else if (Addr.getGlobalValue())
3238	MIB.addGlobalAddress(GV: Addr.getGlobalValue(), Offset: `0`, TargetFlags: `0`);
3239	else if (Addr.getReg()) {
3240	Register Reg = constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: `0`);
3241	MIB.addReg(RegNo: Reg);
3242	} else
3243	return false;
3244	} else {
3245	unsigned CallReg = `0`;
3246	if (Symbol) {
3247	Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3248	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3249	ADRPReg)
3250	.addSym(Symbol, AArch64II::MO_GOT \| AArch64II::MO_PAGE);
3251
3252	CallReg = createResultReg(&AArch64::GPR64RegClass);
3253	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3254	TII.get(AArch64::LDRXui), CallReg)
3255	.addReg(ADRPReg)
3256	.addSym(Symbol,
3257	AArch64II::MO_GOT \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
3258	} else if (Addr.getGlobalValue())
3259	CallReg = materializeGV(GV: Addr.getGlobalValue());
3260	else if (Addr.getReg())
3261	CallReg = Addr.getReg();
3262
3263	if (!CallReg)
3264	return false;
3265
3266	const MCInstrDesc &II = TII.get(Opcode: getBLRCallOpcode(MF: *MF));
3267	CallReg = constrainOperandRegClass(II, Op: CallReg, OpNum: `0`);
3268	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: CallReg);
3269	}
3270
3271	// Add implicit physical register uses to the call.
3272	for (auto Reg : CLI.OutRegs)
3273	MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
3274
3275	// Add a register mask with the call-preserved registers.
3276	// Proper defs for return values will be added by setPhysRegsDeadExcept().
3277	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
3278
3279	CLI.Call = MIB;
3280
3281	// Finish off the call including any return values.
3282	return finishCall(CLI, NumBytes);
3283	}
3284
3285	bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3286	if (Alignment)
3287	return Len / Alignment ->value() <= `4`;
3288	else
3289	return Len < `32`;
3290	}
3291
3292	bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3293	uint64_t Len, MaybeAlign Alignment) {
3294	// Make sure we don't bloat code by inlining very large memcpy's.
3295	if (!isMemCpySmall(Len, Alignment))
3296	return false;
3297
3298	int64_t UnscaledOffset = `0`;
3299	Address OrigDest = Dest;
3300	Address OrigSrc = Src;
3301
3302	while (Len) {
3303	MVT VT;
3304	if (!Alignment \|\| *Alignment >= `8`) {
3305	if (Len >= `8`)
3306	VT = MVT::i64;
3307	else if (Len >= `4`)
3308	VT = MVT::i32;
3309	else if (Len >= `2`)
3310	VT = MVT::i16;
3311	else {
3312	VT = MVT::i8;
3313	}
3314	} else {
3315	assert(Alignment && "Alignment is set in this branch");
3316	// Bound based on alignment.
3317	if (Len >= `4` && *Alignment == `4`)
3318	VT = MVT::i32;
3319	else if (Len >= `2` && *Alignment == `2`)
3320	VT = MVT::i16;
3321	else {
3322	VT = MVT::i8;
3323	}
3324	}
3325
3326	unsigned ResultReg = emitLoad(VT, RetVT: VT, Addr: Src);
3327	if (!ResultReg)
3328	return false;
3329
3330	if (!emitStore(VT, SrcReg: ResultReg, Addr: Dest))
3331	return false;
3332
3333	int64_t Size = VT.getSizeInBits() / `8`;
3334	Len -= Size;
3335	UnscaledOffset += Size;
3336
3337	// We need to recompute the unscaled offset for each iteration.
3338	Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3339	Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3340	}
3341
3342	return true;
3343	}
3344
3345	/// Check if it is possible to fold the condition from the XALU intrinsic
3346	/// into the user. The condition code will only be updated on success.
3347	bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3348	const Instruction *I,
3349	const Value *Cond) {
3350	if (!isa<ExtractValueInst>(Val: Cond))
3351	return false;
3352
3353	const auto *EV = cast<ExtractValueInst>(Val: Cond);
3354	if (!isa<IntrinsicInst>(Val: EV->getAggregateOperand()))
3355	return false;
3356
3357	const auto *II = cast<IntrinsicInst>(Val: EV->getAggregateOperand());
3358	MVT RetVT;
3359	const Function *Callee = II->getCalledFunction();
3360	Type *RetTy =
3361	cast<StructType>(Val: Callee->getReturnType())->getTypeAtIndex(N: `0U`);
3362	if (!isTypeLegal(Ty: RetTy, VT&: RetVT))
3363	return false;
3364
3365	if (RetVT != MVT::i32 && RetVT != MVT::i64)
3366	return false;
3367
3368	const Value *LHS = II->getArgOperand(i: `0`);
3369	const Value *RHS = II->getArgOperand(i: `1`);
3370
3371	// Canonicalize immediate to the RHS.
3372	if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative())
3373	std::swap(a&: LHS, b&: RHS);
3374
3375	// Simplify multiplies.
3376	Intrinsic::ID IID = II->getIntrinsicID();
3377	switch (IID) {
3378	default:
3379	break;
3380	case Intrinsic::smul_with_overflow:
3381	if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382	if (C->getValue() == `2`)
3383	IID = Intrinsic::sadd_with_overflow;
3384	break;
3385	case Intrinsic::umul_with_overflow:
3386	if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387	if (C->getValue() == `2`)
3388	IID = Intrinsic::uadd_with_overflow;
3389	break;
3390	}
3391
3392	AArch64CC::CondCode TmpCC;
3393	switch (IID) {
3394	default:
3395	return false;
3396	case Intrinsic::sadd_with_overflow:
3397	case Intrinsic::ssub_with_overflow:
3398	TmpCC = AArch64CC::VS;
3399	break;
3400	case Intrinsic::uadd_with_overflow:
3401	TmpCC = AArch64CC::HS;
3402	break;
3403	case Intrinsic::usub_with_overflow:
3404	TmpCC = AArch64CC::LO;
3405	break;
3406	case Intrinsic::smul_with_overflow:
3407	case Intrinsic::umul_with_overflow:
3408	TmpCC = AArch64CC::NE;
3409	break;
3410	}
3411
3412	// Check if both instructions are in the same basic block.
3413	if (!isValueAvailable(V: II))
3414	return false;
3415
3416	// Make sure nothing is in the way
3417	BasicBlock::const_iterator Start(I);
3418	BasicBlock::const_iterator End(II);
3419	for (auto Itr = std::prev(x: Start); Itr != End; --Itr) {
3420	// We only expect extractvalue instructions between the intrinsic and the
3421	// instruction to be selected.
3422	if (!isa<ExtractValueInst>(Val: Itr))
3423	return false;
3424
3425	// Check that the extractvalue operand comes from the intrinsic.
3426	const auto *EVI = cast<ExtractValueInst>(Val&: Itr);
3427	if (EVI->getAggregateOperand() != II)
3428	return false;
3429	}
3430
3431	CC = TmpCC;
3432	return true;
3433	}
3434
3435	bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3436	// FIXME: Handle more intrinsics.
3437	switch (II->getIntrinsicID()) {
3438	default: return false;
3439	case Intrinsic::frameaddress: {
3440	MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3441	MFI.setFrameAddressIsTaken(true);
3442
3443	const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3444	Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF));
3445	Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3446	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3447	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: SrcReg).addReg(RegNo: FramePtr);
3448	// Recursively load frame address
3449	// ldr x0, [fp]
3450	// ldr x0, [x0]
3451	// ldr x0, [x0]
3452	// ...
3453	unsigned DestReg;
3454	unsigned Depth = cast<ConstantInt>(Val: II->getOperand(i_nocapture: `0`))->getZExtValue();
3455	while (Depth--) {
3456	DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3457	SrcReg, `0`);
3458	assert(DestReg && "Unexpected LDR instruction emission failure.");
3459	SrcReg = DestReg;
3460	}
3461
3462	updateValueMap(I: II, Reg: SrcReg);
3463	return true;
3464	}
3465	case Intrinsic::sponentry: {
3466	MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3467
3468	// SP = FP + Fixed Object + 16
3469	int FI = MFI.CreateFixedObject(Size: `4`, SPOffset: `0`, IsImmutable: false);
3470	Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3471	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3472	TII.get(AArch64::ADDXri), ResultReg)
3473	.addFrameIndex(FI)
3474	.addImm(`0`)
3475	.addImm(`0`);
3476
3477	updateValueMap(I: II, Reg: ResultReg);
3478	return true;
3479	}
3480	case Intrinsic::memcpy:
3481	case Intrinsic::memmove: {
3482	const auto *MTI = cast<MemTransferInst>(Val: II);
3483	// Don't handle volatile.
3484	if (MTI->isVolatile())
3485	return false;
3486
3487	// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3488	// we would emit dead code because we don't currently handle memmoves.
3489	bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3490	if (isa<ConstantInt>(Val: MTI->getLength()) && IsMemCpy) {
3491	// Small memcpy's are common enough that we want to do them without a call
3492	// if possible.
3493	uint64_t Len = cast<ConstantInt>(Val: MTI->getLength())->getZExtValue();
3494	MaybeAlign Alignment;
3495	if (MTI->getDestAlign() \|\| MTI->getSourceAlign())
3496	Alignment = std::min(a: MTI->getDestAlign().valueOrOne(),
3497	b: MTI->getSourceAlign().valueOrOne());
3498	if (isMemCpySmall(Len, Alignment)) {
3499	Address Dest, Src;
3500	if (!computeAddress(Obj: MTI->getRawDest(), Addr&: Dest) \|\|
3501	!computeAddress(Obj: MTI->getRawSource(), Addr&: Src))
3502	return false;
3503	if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3504	return true;
3505	}
3506	}
3507
3508	if (!MTI->getLength()->getType()->isIntegerTy(Bitwidth: `64`))
3509	return false;
3510
3511	if (MTI->getSourceAddressSpace() > `255` \|\| MTI->getDestAddressSpace() > `255`)
3512	// Fast instruction selection doesn't support the special
3513	// address spaces.
3514	return false;
3515
3516	const char *IntrMemName = isa<MemCpyInst>(Val: II) ? "memcpy" : "memmove";
3517	return lowerCallTo(CI: II, SymName: IntrMemName, NumArgs: II->arg_size() - `1`);
3518	}
3519	case Intrinsic::memset: {
3520	const MemSetInst *MSI = cast<MemSetInst>(Val: II);
3521	// Don't handle volatile.
3522	if (MSI->isVolatile())
3523	return false;
3524
3525	if (!MSI->getLength()->getType()->isIntegerTy(Bitwidth: `64`))
3526	return false;
3527
3528	if (MSI->getDestAddressSpace() > `255`)
3529	// Fast instruction selection doesn't support the special
3530	// address spaces.
3531	return false;
3532
3533	return lowerCallTo(CI: II, SymName: "memset", NumArgs: II->arg_size() - `1`);
3534	}
3535	case Intrinsic::sin:
3536	case Intrinsic::cos:
3537	case Intrinsic::pow: {
3538	MVT RetVT;
3539	if (!isTypeLegal(Ty: II->getType(), VT&: RetVT))
3540	return false;
3541
3542	if (RetVT != MVT::f32 && RetVT != MVT::f64)
3543	return false;
3544
3545	static const RTLIB::Libcall LibCallTable[`3`][`2`] = {
3546	{ RTLIB::SIN_F32, RTLIB::SIN_F64 },
3547	{ RTLIB::COS_F32, RTLIB::COS_F64 },
3548	{ RTLIB::POW_F32, RTLIB::POW_F64 }
3549	};
3550	RTLIB::Libcall LC;
3551	bool Is64Bit = RetVT == MVT::f64;
3552	switch (II->getIntrinsicID()) {
3553	default:
3554	llvm_unreachable("Unexpected intrinsic.");
3555	case Intrinsic::sin:
3556	LC = LibCallTable[`0`][Is64Bit];
3557	break;
3558	case Intrinsic::cos:
3559	LC = LibCallTable[`1`][Is64Bit];
3560	break;
3561	case Intrinsic::pow:
3562	LC = LibCallTable[`2`][Is64Bit];
3563	break;
3564	}
3565
3566	ArgListTy Args;
3567	Args.reserve(n: II->arg_size());
3568
3569	// Populate the argument list.
3570	for (auto &Arg : II->args()) {
3571	ArgListEntry Entry;
3572	Entry.Val = Arg;
3573	Entry.Ty = Arg ->getType();
3574	Args.push_back(x: Entry);
3575	}
3576
3577	CallLoweringInfo CLI;
3578	MCContext &Ctx = MF->getContext();
3579	CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: II->getType(),
3580	Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args));
3581	if (!lowerCallTo(CLI))
3582	return false;
3583	updateValueMap(I: II, Reg: CLI.ResultReg);
3584	return true;
3585	}
3586	case Intrinsic::fabs: {
3587	MVT VT;
3588	if (!isTypeLegal(Ty: II->getType(), VT))
3589	return false;
3590
3591	unsigned Opc;
3592	switch (VT.SimpleTy) {
3593	default:
3594	return false;
3595	case MVT::f32:
3596	Opc = AArch64::FABSSr;
3597	break;
3598	case MVT::f64:
3599	Opc = AArch64::FABSDr;
3600	break;
3601	}
3602	Register SrcReg = getRegForValue(V: II->getOperand(i_nocapture: `0`));
3603	if (!SrcReg)
3604	return false;
3605	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
3606	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
3607	.addReg(RegNo: SrcReg);
3608	updateValueMap(I: II, Reg: ResultReg);
3609	return true;
3610	}
3611	case Intrinsic::trap:
3612	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3613	.addImm(`1`);
3614	return true;
3615	case Intrinsic::debugtrap:
3616	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3617	.addImm(`0xF000`);
3618	return true;
3619
3620	case Intrinsic::sqrt: {
3621	Type *RetTy = II->getCalledFunction()->getReturnType();
3622
3623	MVT VT;
3624	if (!isTypeLegal(Ty: RetTy, VT))
3625	return false;
3626
3627	Register Op0Reg = getRegForValue(V: II->getOperand(i_nocapture: `0`));
3628	if (!Op0Reg)
3629	return false;
3630
3631	unsigned ResultReg = fastEmit_r(VT, RetVT: VT, Opcode: ISD::FSQRT, Op0: Op0Reg);
3632	if (!ResultReg)
3633	return false;
3634
3635	updateValueMap(I: II, Reg: ResultReg);
3636	return true;
3637	}
3638	case Intrinsic::sadd_with_overflow:
3639	case Intrinsic::uadd_with_overflow:
3640	case Intrinsic::ssub_with_overflow:
3641	case Intrinsic::usub_with_overflow:
3642	case Intrinsic::smul_with_overflow:
3643	case Intrinsic::umul_with_overflow: {
3644	// This implements the basic lowering of the xalu with overflow intrinsics.
3645	const Function *Callee = II->getCalledFunction();
3646	auto *Ty = cast<StructType>(Val: Callee->getReturnType());
3647	Type *RetTy = Ty->getTypeAtIndex(N: `0U`);
3648
3649	MVT VT;
3650	if (!isTypeLegal(Ty: RetTy, VT))
3651	return false;
3652
3653	if (VT != MVT::i32 && VT != MVT::i64)
3654	return false;
3655
3656	const Value *LHS = II->getArgOperand(i: `0`);
3657	const Value *RHS = II->getArgOperand(i: `1`);
3658	// Canonicalize immediate to the RHS.
3659	if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative())
3660	std::swap(a&: LHS, b&: RHS);
3661
3662	// Simplify multiplies.
3663	Intrinsic::ID IID = II->getIntrinsicID();
3664	switch (IID) {
3665	default:
3666	break;
3667	case Intrinsic::smul_with_overflow:
3668	if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3669	if (C->getValue() == `2`) {
3670	IID = Intrinsic::sadd_with_overflow;
3671	RHS = LHS;
3672	}
3673	break;
3674	case Intrinsic::umul_with_overflow:
3675	if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3676	if (C->getValue() == `2`) {
3677	IID = Intrinsic::uadd_with_overflow;
3678	RHS = LHS;
3679	}
3680	break;
3681	}
3682
3683	unsigned ResultReg1 = `0`, ResultReg2 = `0`, MulReg = `0`;
3684	AArch64CC::CondCode CC = AArch64CC::Invalid;
3685	switch (IID) {
3686	default: llvm_unreachable("Unexpected intrinsic!");
3687	case Intrinsic::sadd_with_overflow:
3688	ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /SetFlags=/true);
3689	CC = AArch64CC::VS;
3690	break;
3691	case Intrinsic::uadd_with_overflow:
3692	ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /SetFlags=/true);
3693	CC = AArch64CC::HS;
3694	break;
3695	case Intrinsic::ssub_with_overflow:
3696	ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /SetFlags=/true);
3697	CC = AArch64CC::VS;
3698	break;
3699	case Intrinsic::usub_with_overflow:
3700	ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /SetFlags=/true);
3701	CC = AArch64CC::LO;
3702	break;
3703	case Intrinsic::smul_with_overflow: {
3704	CC = AArch64CC::NE;
3705	Register LHSReg = getRegForValue(V: LHS);
3706	if (!LHSReg)
3707	return false;
3708
3709	Register RHSReg = getRegForValue(V: RHS);
3710	if (!RHSReg)
3711	return false;
3712
3713	if (VT == MVT::i32) {
3714	MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3715	Register MulSubReg =
3716	fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3717	// cmp xreg, wreg, sxtw
3718	emitAddSub_rx(/UseAdd=/false, MVT::i64, MulReg, MulSubReg,
3719	AArch64_AM::SXTW, /ShiftImm=/`0`, /SetFlags=/true,
3720	/WantResult=/false);
3721	MulReg = MulSubReg;
3722	} else {
3723	assert(VT == MVT::i64 && "Unexpected value type.");
3724	// LHSReg and RHSReg cannot be killed by this Mul, since they are
3725	// reused in the next instruction.
3726	MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg);
3727	unsigned SMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHS, Op0: LHSReg, Op1: RHSReg);
3728	emitSubs_rs(RetVT: VT, LHSReg: SMULHReg, RHSReg: MulReg, ShiftType: AArch64_AM::ASR, ShiftImm: `63`,
3729	/WantResult=/false);
3730	}
3731	break;
3732	}
3733	case Intrinsic::umul_with_overflow: {
3734	CC = AArch64CC::NE;
3735	Register LHSReg = getRegForValue(V: LHS);
3736	if (!LHSReg)
3737	return false;
3738
3739	Register RHSReg = getRegForValue(V: RHS);
3740	if (!RHSReg)
3741	return false;
3742
3743	if (VT == MVT::i32) {
3744	MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3745	// tst xreg, #0xffffffff00000000
3746	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3747	TII.get(AArch64::ANDSXri), AArch64::XZR)
3748	.addReg(MulReg)
3749	.addImm(AArch64_AM::encodeLogicalImmediate(`0xFFFFFFFF00000000`, `64`));
3750	MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3751	} else {
3752	assert(VT == MVT::i64 && "Unexpected value type.");
3753	// LHSReg and RHSReg cannot be killed by this Mul, since they are
3754	// reused in the next instruction.
3755	MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg);
3756	unsigned UMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHU, Op0: LHSReg, Op1: RHSReg);
3757	emitSubs_rr(VT, AArch64::XZR, UMULHReg, /WantResult=/false);
3758	}
3759	break;
3760	}
3761	}
3762
3763	if (MulReg) {
3764	ResultReg1 = createResultReg(RC: TLI.getRegClassFor(VT));
3765	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3766	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg1).addReg(RegNo: MulReg);
3767	}
3768
3769	if (!ResultReg1)
3770	return false;
3771
3772	ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3773	AArch64::WZR, AArch64::WZR,
3774	getInvertedCondCode(CC));
3775	(void)ResultReg2;
3776	assert((ResultReg1 + `1`) == ResultReg2 &&
3777	"Nonconsecutive result registers.");
3778	updateValueMap(I: II, Reg: ResultReg1, NumRegs: `2`);
3779	return true;
3780	}
3781	case Intrinsic::aarch64_crc32b:
3782	case Intrinsic::aarch64_crc32h:
3783	case Intrinsic::aarch64_crc32w:
3784	case Intrinsic::aarch64_crc32x:
3785	case Intrinsic::aarch64_crc32cb:
3786	case Intrinsic::aarch64_crc32ch:
3787	case Intrinsic::aarch64_crc32cw:
3788	case Intrinsic::aarch64_crc32cx: {
3789	if (!Subtarget->hasCRC())
3790	return false;
3791
3792	unsigned Opc;
3793	switch (II->getIntrinsicID()) {
3794	default:
3795	llvm_unreachable("Unexpected intrinsic!");
3796	case Intrinsic::aarch64_crc32b:
3797	Opc = AArch64::CRC32Brr;
3798	break;
3799	case Intrinsic::aarch64_crc32h:
3800	Opc = AArch64::CRC32Hrr;
3801	break;
3802	case Intrinsic::aarch64_crc32w:
3803	Opc = AArch64::CRC32Wrr;
3804	break;
3805	case Intrinsic::aarch64_crc32x:
3806	Opc = AArch64::CRC32Xrr;
3807	break;
3808	case Intrinsic::aarch64_crc32cb:
3809	Opc = AArch64::CRC32CBrr;
3810	break;
3811	case Intrinsic::aarch64_crc32ch:
3812	Opc = AArch64::CRC32CHrr;
3813	break;
3814	case Intrinsic::aarch64_crc32cw:
3815	Opc = AArch64::CRC32CWrr;
3816	break;
3817	case Intrinsic::aarch64_crc32cx:
3818	Opc = AArch64::CRC32CXrr;
3819	break;
3820	}
3821
3822	Register LHSReg = getRegForValue(V: II->getArgOperand(i: `0`));
3823	Register RHSReg = getRegForValue(V: II->getArgOperand(i: `1`));
3824	if (!LHSReg \|\| !RHSReg)
3825	return false;
3826
3827	Register ResultReg =
3828	fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3829	updateValueMap(I: II, Reg: ResultReg);
3830	return true;
3831	}
3832	}
3833	return false;
3834	}
3835
3836	bool AArch64FastISel::selectRet(const Instruction *I) {
3837	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
3838	const Function &F = *I->getParent()->getParent();
3839
3840	if (!FuncInfo.CanLowerReturn)
3841	return false;
3842
3843	if (F.isVarArg())
3844	return false;
3845
3846	if (TLI.supportSwiftError() &&
3847	F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3848	return false;
3849
3850	if (TLI.supportSplitCSR(MF: FuncInfo.MF))
3851	return false;
3852
3853	// Build a list of return value registers.
3854	SmallVector<unsigned, `4`> RetRegs;
3855
3856	if (Ret->getNumOperands() > `0`) {
3857	CallingConv::ID CC = F.getCallingConv();
3858	SmallVector<ISD::OutputArg, `4`> Outs;
3859	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
3860
3861	// Analyze operands of the call, assigning locations to each operand.
3862	SmallVector<CCValAssign, `16`> ValLocs;
3863	CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3864	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_AArch64_AAPCS);
3865
3866	// Only handle a single return value for now.
3867	if (ValLocs.size() != `1`)
3868	return false;
3869
3870	CCValAssign &VA = ValLocs [`0`];
3871	const Value *RV = Ret->getOperand(i_nocapture: `0`);
3872
3873	// Don't bother handling odd stuff for now.
3874	if ((VA.getLocInfo() != CCValAssign::Full) &&
3875	(VA.getLocInfo() != CCValAssign::BCvt))
3876	return false;
3877
3878	// Only handle register returns for now.
3879	if (!VA.isRegLoc())
3880	return false;
3881
3882	Register Reg = getRegForValue(V: RV);
3883	if (Reg == `0`)
3884	return false;
3885
3886	unsigned SrcReg = Reg + VA.getValNo();
3887	Register DestReg = VA.getLocReg();
3888	// Avoid a cross-class copy. This is very unlikely.
3889	if (!MRI.getRegClass(Reg: SrcReg)->contains(Reg: DestReg))
3890	return false;
3891
3892	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
3893	if (!RVEVT.isSimple())
3894	return false;
3895
3896	// Vectors (of > 1 lane) in big endian need tricky handling.
3897	if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3898	!Subtarget->isLittleEndian())
3899	return false;
3900
3901	MVT RVVT = RVEVT.getSimpleVT();
3902	if (RVVT == MVT::f128)
3903	return false;
3904
3905	MVT DestVT = VA.getValVT();
3906	// Special handling for extended integers.
3907	if (RVVT != DestVT) {
3908	if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3909	return false;
3910
3911	if (!Outs [`0`].Flags.isZExt() && !Outs [`0`].Flags.isSExt())
3912	return false;
3913
3914	bool IsZExt = Outs [`0`].Flags.isZExt();
3915	SrcReg = emitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: IsZExt);
3916	if (SrcReg == `0`)
3917	return false;
3918	}
3919
3920	// "Callee" (i.e. value producer) zero extends pointers at function
3921	// boundary.
3922	if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3923	SrcReg = emitAnd_ri(MVT::i64, SrcReg, `0xffffffff`);
3924
3925	// Make the copy.
3926	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3927	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg).addReg(RegNo: SrcReg);
3928
3929	// Add register to return instruction.
3930	RetRegs.push_back(Elt: VA.getLocReg());
3931	}
3932
3933	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3934	TII.get(AArch64::RET_ReallyLR));
3935	for (unsigned RetReg : RetRegs)
3936	MIB.addReg(RegNo: RetReg, flags: RegState::Implicit);
3937	return true;
3938	}
3939
3940	bool AArch64FastISel::selectTrunc(const Instruction *I) {
3941	Type *DestTy = I->getType();
3942	Value *Op = I->getOperand(i: `0`);
3943	Type *SrcTy = Op->getType();
3944
3945	EVT SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
3946	EVT DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
3947	if (!SrcEVT.isSimple())
3948	return false;
3949	if (!DestEVT.isSimple())
3950	return false;
3951
3952	MVT SrcVT = SrcEVT.getSimpleVT();
3953	MVT DestVT = DestEVT.getSimpleVT();
3954
3955	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3956	SrcVT != MVT::i8)
3957	return false;
3958	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3959	DestVT != MVT::i1)
3960	return false;
3961
3962	Register SrcReg = getRegForValue(V: Op);
3963	if (!SrcReg)
3964	return false;
3965
3966	// If we're truncating from i64 to a smaller non-legal type then generate an
3967	// AND. Otherwise, we know the high bits are undefined and a truncate only
3968	// generate a COPY. We cannot mark the source register also as result
3969	// register, because this can incorrectly transfer the kill flag onto the
3970	// source register.
3971	unsigned ResultReg;
3972	if (SrcVT == MVT::i64) {
3973	uint64_t Mask = `0`;
3974	switch (DestVT.SimpleTy) {
3975	default:
3976	// Trunc i64 to i32 is handled by the target-independent fast-isel.
3977	return false;
3978	case MVT::i1:
3979	Mask = `0x1`;
3980	break;
3981	case MVT::i8:
3982	Mask = `0xff`;
3983	break;
3984	case MVT::i16:
3985	Mask = `0xffff`;
3986	break;
3987	}
3988	// Issue an extract_subreg to get the lower 32-bits.
3989	Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3990	AArch64::sub_32);
3991	// Create the AND instruction which performs the actual truncation.
3992	ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3993	assert(ResultReg && "Unexpected AND instruction emission failure.");
3994	} else {
3995	ResultReg = createResultReg(&AArch64::GPR32RegClass);
3996	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3997	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
3998	.addReg(RegNo: SrcReg);
3999	}
4000
4001	updateValueMap(I, Reg: ResultReg);
4002	return true;
4003	}
4004
4005	unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4006	assert((DestVT == MVT::i8 \|\| DestVT == MVT::i16 \|\| DestVT == MVT::i32 \|\|
4007	DestVT == MVT::i64) &&
4008	"Unexpected value type.");
4009	// Handle i8 and i16 as i32.
4010	if (DestVT == MVT::i8 \|\| DestVT == MVT::i16)
4011	DestVT = MVT::i32;
4012
4013	if (IsZExt) {
4014	unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, `1`);
4015	assert(ResultReg && "Unexpected AND instruction emission failure.");
4016	if (DestVT == MVT::i64) {
4017	// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4018	// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4019	Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4020	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4021	TII.get(AArch64::SUBREG_TO_REG), Reg64)
4022	.addImm(`0`)
4023	.addReg(ResultReg)
4024	.addImm(AArch64::sub_32);
4025	ResultReg = Reg64;
4026	}
4027	return ResultReg;
4028	} else {
4029	if (DestVT == MVT::i64) {
4030	// FIXME: We're SExt i1 to i64.
4031	return `0`;
4032	}
4033	return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4034	`0`, `0`);
4035	}
4036	}
4037
4038	unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4039	unsigned Opc, ZReg;
4040	switch (RetVT.SimpleTy) {
4041	default: return `0`;
4042	case MVT::i8:
4043	case MVT::i16:
4044	case MVT::i32:
4045	RetVT = MVT::i32;
4046	Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4047	case MVT::i64:
4048	Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4049	}
4050
4051	const TargetRegisterClass *RC =
4052	(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4053	return fastEmitInst_rrr(MachineInstOpcode: Opc, RC, Op0, Op1, Op2: ZReg);
4054	}
4055
4056	unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4057	if (RetVT != MVT::i64)
4058	return `0`;
4059
4060	return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4061	Op0, Op1, AArch64::XZR);
4062	}
4063
4064	unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4065	if (RetVT != MVT::i64)
4066	return `0`;
4067
4068	return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4069	Op0, Op1, AArch64::XZR);
4070	}
4071
4072	unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4073	unsigned Op1Reg) {
4074	unsigned Opc = `0`;
4075	bool NeedTrunc = false;
4076	uint64_t Mask = `0`;
4077	switch (RetVT.SimpleTy) {
4078	default: return `0`;
4079	case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = `0xff`; break;
4080	case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = `0xffff`; break;
4081	case MVT::i32: Opc = AArch64::LSLVWr; break;
4082	case MVT::i64: Opc = AArch64::LSLVXr; break;
4083	}
4084
4085	const TargetRegisterClass *RC =
4086	(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4087	if (NeedTrunc)
4088	Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4089
4090	Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4091	if (NeedTrunc)
4092	ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4093	return ResultReg;
4094	}
4095
4096	unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4097	uint64_t Shift, bool IsZExt) {
4098	assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4099	"Unexpected source/return type pair.");
4100	assert((SrcVT == MVT::i1 \|\| SrcVT == MVT::i8 \|\| SrcVT == MVT::i16 \|\|
4101	SrcVT == MVT::i32 \|\| SrcVT == MVT::i64) &&
4102	"Unexpected source value type.");
4103	assert((RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32 \|\|
4104	RetVT == MVT::i64) && "Unexpected return value type.");
4105
4106	bool Is64Bit = (RetVT == MVT::i64);
4107	unsigned RegSize = Is64Bit ? `64` : `32`;
4108	unsigned DstBits = RetVT.getSizeInBits();
4109	unsigned SrcBits = SrcVT.getSizeInBits();
4110	const TargetRegisterClass *RC =
4111	Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4112
4113	// Just emit a copy for "zero" shifts.
4114	if (Shift == `0`) {
4115	if (RetVT == SrcVT) {
4116	Register ResultReg = createResultReg(RC);
4117	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4118	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4119	.addReg(RegNo: Op0);
4120	return ResultReg;
4121	} else
4122	return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4123	}
4124
4125	// Don't deal with undefined shifts.
4126	if (Shift >= DstBits)
4127	return `0`;
4128
4129	// For immediate shifts we can fold the zero-/sign-extension into the shift.
4130	// {S\|U}BFM Wd, Wn, #r, #s
4131	// Wd<32+s-r,32-r> = Wn<s:0> when r > s
4132
4133	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4134	// %2 = shl i16 %1, 4
4135	// Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4136	// 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4137	// 0b0000_0000_0000_0000__0000_0101_0101_0000 sext \| zext
4138	// 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4139
4140	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4141	// %2 = shl i16 %1, 8
4142	// Wd<32+7-24,32-24> = Wn<7:0>
4143	// 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4144	// 0b0000_0000_0000_0000__0101_0101_0000_0000 sext \| zext
4145	// 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4146
4147	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4148	// %2 = shl i16 %1, 12
4149	// Wd<32+3-20,32-20> = Wn<3:0>
4150	// 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4151	// 0b0000_0000_0000_0000__0101_0000_0000_0000 sext \| zext
4152	// 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4153
4154	unsigned ImmR = RegSize - Shift;
4155	// Limit the width to the length of the source type.
4156	unsigned ImmS = std::min<unsigned>(a: SrcBits - `1`, b: DstBits - `1` - Shift);
4157	static const unsigned OpcTable[`2`][`2`] = {
4158	{AArch64::SBFMWri, AArch64::SBFMXri},
4159	{AArch64::UBFMWri, AArch64::UBFMXri}
4160	};
4161	unsigned Opc = OpcTable[IsZExt][Is64Bit];
4162	if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4163	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4164	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4165	TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4166	.addImm(`0`)
4167	.addReg(Op0)
4168	.addImm(AArch64::sub_32);
4169	Op0 = TmpReg;
4170	}
4171	return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4172	}
4173
4174	unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4175	unsigned Op1Reg) {
4176	unsigned Opc = `0`;
4177	bool NeedTrunc = false;
4178	uint64_t Mask = `0`;
4179	switch (RetVT.SimpleTy) {
4180	default: return `0`;
4181	case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = `0xff`; break;
4182	case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = `0xffff`; break;
4183	case MVT::i32: Opc = AArch64::LSRVWr; break;
4184	case MVT::i64: Opc = AArch64::LSRVXr; break;
4185	}
4186
4187	const TargetRegisterClass *RC =
4188	(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4189	if (NeedTrunc) {
4190	Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4191	Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4192	}
4193	Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4194	if (NeedTrunc)
4195	ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4196	return ResultReg;
4197	}
4198
4199	unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4200	uint64_t Shift, bool IsZExt) {
4201	assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4202	"Unexpected source/return type pair.");
4203	assert((SrcVT == MVT::i1 \|\| SrcVT == MVT::i8 \|\| SrcVT == MVT::i16 \|\|
4204	SrcVT == MVT::i32 \|\| SrcVT == MVT::i64) &&
4205	"Unexpected source value type.");
4206	assert((RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32 \|\|
4207	RetVT == MVT::i64) && "Unexpected return value type.");
4208
4209	bool Is64Bit = (RetVT == MVT::i64);
4210	unsigned RegSize = Is64Bit ? `64` : `32`;
4211	unsigned DstBits = RetVT.getSizeInBits();
4212	unsigned SrcBits = SrcVT.getSizeInBits();
4213	const TargetRegisterClass *RC =
4214	Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4215
4216	// Just emit a copy for "zero" shifts.
4217	if (Shift == `0`) {
4218	if (RetVT == SrcVT) {
4219	Register ResultReg = createResultReg(RC);
4220	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4221	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4222	.addReg(RegNo: Op0);
4223	return ResultReg;
4224	} else
4225	return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4226	}
4227
4228	// Don't deal with undefined shifts.
4229	if (Shift >= DstBits)
4230	return `0`;
4231
4232	// For immediate shifts we can fold the zero-/sign-extension into the shift.
4233	// {S\|U}BFM Wd, Wn, #r, #s
4234	// Wd<s-r:0> = Wn<s:r> when r <= s
4235
4236	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4237	// %2 = lshr i16 %1, 4
4238	// Wd<7-4:0> = Wn<7:4>
4239	// 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4240	// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext \| zext
4241	// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4242
4243	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4244	// %2 = lshr i16 %1, 8
4245	// Wd<7-7,0> = Wn<7:7>
4246	// 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4247	// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4248	// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4249
4250	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4251	// %2 = lshr i16 %1, 12
4252	// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4253	// 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4254	// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4255	// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4256
4257	if (Shift >= SrcBits && IsZExt)
4258	return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt (RegSize, `0`)), VT: RetVT);
4259
4260	// It is not possible to fold a sign-extend into the LShr instruction. In this
4261	// case emit a sign-extend.
4262	if (!IsZExt) {
4263	Op0 = emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4264	if (!Op0)
4265	return `0`;
4266	SrcVT = RetVT;
4267	SrcBits = SrcVT.getSizeInBits();
4268	IsZExt = true;
4269	}
4270
4271	unsigned ImmR = std::min<unsigned>(a: SrcBits - `1`, b: Shift);
4272	unsigned ImmS = SrcBits - `1`;
4273	static const unsigned OpcTable[`2`][`2`] = {
4274	{AArch64::SBFMWri, AArch64::SBFMXri},
4275	{AArch64::UBFMWri, AArch64::UBFMXri}
4276	};
4277	unsigned Opc = OpcTable[IsZExt][Is64Bit];
4278	if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4279	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4280	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4281	TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4282	.addImm(`0`)
4283	.addReg(Op0)
4284	.addImm(AArch64::sub_32);
4285	Op0 = TmpReg;
4286	}
4287	return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4288	}
4289
4290	unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4291	unsigned Op1Reg) {
4292	unsigned Opc = `0`;
4293	bool NeedTrunc = false;
4294	uint64_t Mask = `0`;
4295	switch (RetVT.SimpleTy) {
4296	default: return `0`;
4297	case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = `0xff`; break;
4298	case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = `0xffff`; break;
4299	case MVT::i32: Opc = AArch64::ASRVWr; break;
4300	case MVT::i64: Opc = AArch64::ASRVXr; break;
4301	}
4302
4303	const TargetRegisterClass *RC =
4304	(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4305	if (NeedTrunc) {
4306	Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /isZExt=/false);
4307	Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4308	}
4309	Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4310	if (NeedTrunc)
4311	ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4312	return ResultReg;
4313	}
4314
4315	unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4316	uint64_t Shift, bool IsZExt) {
4317	assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4318	"Unexpected source/return type pair.");
4319	assert((SrcVT == MVT::i1 \|\| SrcVT == MVT::i8 \|\| SrcVT == MVT::i16 \|\|
4320	SrcVT == MVT::i32 \|\| SrcVT == MVT::i64) &&
4321	"Unexpected source value type.");
4322	assert((RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32 \|\|
4323	RetVT == MVT::i64) && "Unexpected return value type.");
4324
4325	bool Is64Bit = (RetVT == MVT::i64);
4326	unsigned RegSize = Is64Bit ? `64` : `32`;
4327	unsigned DstBits = RetVT.getSizeInBits();
4328	unsigned SrcBits = SrcVT.getSizeInBits();
4329	const TargetRegisterClass *RC =
4330	Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4331
4332	// Just emit a copy for "zero" shifts.
4333	if (Shift == `0`) {
4334	if (RetVT == SrcVT) {
4335	Register ResultReg = createResultReg(RC);
4336	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4337	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4338	.addReg(RegNo: Op0);
4339	return ResultReg;
4340	} else
4341	return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4342	}
4343
4344	// Don't deal with undefined shifts.
4345	if (Shift >= DstBits)
4346	return `0`;
4347
4348	// For immediate shifts we can fold the zero-/sign-extension into the shift.
4349	// {S\|U}BFM Wd, Wn, #r, #s
4350	// Wd<s-r:0> = Wn<s:r> when r <= s
4351
4352	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4353	// %2 = ashr i16 %1, 4
4354	// Wd<7-4:0> = Wn<7:4>
4355	// 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4356	// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext \| zext
4357	// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4358
4359	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4360	// %2 = ashr i16 %1, 8
4361	// Wd<7-7,0> = Wn<7:7>
4362	// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4363	// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4364	// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4365
4366	// %1 = {s\|z}ext i8 {0b1010_1010\|0b0101_0101} to i16
4367	// %2 = ashr i16 %1, 12
4368	// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4369	// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4370	// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4371	// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4372
4373	if (Shift >= SrcBits && IsZExt)
4374	return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt (RegSize, `0`)), VT: RetVT);
4375
4376	unsigned ImmR = std::min<unsigned>(a: SrcBits - `1`, b: Shift);
4377	unsigned ImmS = SrcBits - `1`;
4378	static const unsigned OpcTable[`2`][`2`] = {
4379	{AArch64::SBFMWri, AArch64::SBFMXri},
4380	{AArch64::UBFMWri, AArch64::UBFMXri}
4381	};
4382	unsigned Opc = OpcTable[IsZExt][Is64Bit];
4383	if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4384	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4385	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4386	TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4387	.addImm(`0`)
4388	.addReg(Op0)
4389	.addImm(AArch64::sub_32);
4390	Op0 = TmpReg;
4391	}
4392	return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4393	}
4394
4395	unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4396	bool IsZExt) {
4397	assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4398
4399	// FastISel does not have plumbing to deal with extensions where the SrcVT or
4400	// DestVT are odd things, so test to make sure that they are both types we can
4401	// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4402	// bail out to SelectionDAG.
4403	if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4404	(DestVT != MVT::i32) && (DestVT != MVT::i64)) \|\|
4405	((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4406	(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4407	return `0`;
4408
4409	unsigned Opc;
4410	unsigned Imm = `0`;
4411
4412	switch (SrcVT.SimpleTy) {
4413	default:
4414	return `0`;
4415	case MVT::i1:
4416	return emiti1Ext(SrcReg, DestVT, IsZExt);
4417	case MVT::i8:
4418	if (DestVT == MVT::i64)
4419	Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4420	else
4421	Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4422	Imm = `7`;
4423	break;
4424	case MVT::i16:
4425	if (DestVT == MVT::i64)
4426	Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4427	else
4428	Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4429	Imm = `15`;
4430	break;
4431	case MVT::i32:
4432	assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4433	Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4434	Imm = `31`;
4435	break;
4436	}
4437
4438	// Handle i8 and i16 as i32.
4439	if (DestVT == MVT::i8 \|\| DestVT == MVT::i16)
4440	DestVT = MVT::i32;
4441	else if (DestVT == MVT::i64) {
4442	Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4443	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4444	TII.get(AArch64::SUBREG_TO_REG), Src64)
4445	.addImm(`0`)
4446	.addReg(SrcReg)
4447	.addImm(AArch64::sub_32);
4448	SrcReg = Src64;
4449	}
4450
4451	const TargetRegisterClass *RC =
4452	(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4453	return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0: SrcReg, Imm1: `0`, Imm2: Imm);
4454	}
4455
4456	static bool isZExtLoad(const MachineInstr *LI) {
4457	switch (LI->getOpcode()) {
4458	default:
4459	return false;
4460	case AArch64::LDURBBi:
4461	case AArch64::LDURHHi:
4462	case AArch64::LDURWi:
4463	case AArch64::LDRBBui:
4464	case AArch64::LDRHHui:
4465	case AArch64::LDRWui:
4466	case AArch64::LDRBBroX:
4467	case AArch64::LDRHHroX:
4468	case AArch64::LDRWroX:
4469	case AArch64::LDRBBroW:
4470	case AArch64::LDRHHroW:
4471	case AArch64::LDRWroW:
4472	return true;
4473	}
4474	}
4475
4476	static bool isSExtLoad(const MachineInstr *LI) {
4477	switch (LI->getOpcode()) {
4478	default:
4479	return false;
4480	case AArch64::LDURSBWi:
4481	case AArch64::LDURSHWi:
4482	case AArch64::LDURSBXi:
4483	case AArch64::LDURSHXi:
4484	case AArch64::LDURSWi:
4485	case AArch64::LDRSBWui:
4486	case AArch64::LDRSHWui:
4487	case AArch64::LDRSBXui:
4488	case AArch64::LDRSHXui:
4489	case AArch64::LDRSWui:
4490	case AArch64::LDRSBWroX:
4491	case AArch64::LDRSHWroX:
4492	case AArch64::LDRSBXroX:
4493	case AArch64::LDRSHXroX:
4494	case AArch64::LDRSWroX:
4495	case AArch64::LDRSBWroW:
4496	case AArch64::LDRSHWroW:
4497	case AArch64::LDRSBXroW:
4498	case AArch64::LDRSHXroW:
4499	case AArch64::LDRSWroW:
4500	return true;
4501	}
4502	}
4503
4504	bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4505	MVT SrcVT) {
4506	const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: `0`));
4507	if (!LI \|\| !LI->hasOneUse())
4508	return false;
4509
4510	// Check if the load instruction has already been selected.
4511	Register Reg = lookUpRegForValue(V: LI);
4512	if (!Reg)
4513	return false;
4514
4515	MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4516	if (!MI)
4517	return false;
4518
4519	// Check if the correct load instruction has been emitted - SelectionDAG might
4520	// have emitted a zero-extending load, but we need a sign-extending load.
4521	bool IsZExt = isa<ZExtInst>(Val: I);
4522	const auto *LoadMI = MI;
4523	if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4524	LoadMI->getOperand(`1`).getSubReg() == AArch64::sub_32) {
4525	Register LoadReg = MI->getOperand(i: `1`).getReg();
4526	LoadMI = MRI.getUniqueVRegDef(Reg: LoadReg);
4527	assert(LoadMI && "Expected valid instruction");
4528	}
4529	if (!(IsZExt && isZExtLoad(LI: LoadMI)) && !(!IsZExt && isSExtLoad(LI: LoadMI)))
4530	return false;
4531
4532	// Nothing to be done.
4533	if (RetVT != MVT::i64 \|\| SrcVT > MVT::i32) {
4534	updateValueMap(I, Reg);
4535	return true;
4536	}
4537
4538	if (IsZExt) {
4539	Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4540	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4541	TII.get(AArch64::SUBREG_TO_REG), Reg64)
4542	.addImm(`0`)
4543	.addReg(Reg, getKillRegState(true))
4544	.addImm(AArch64::sub_32);
4545	Reg = Reg64;
4546	} else {
4547	assert((MI->getOpcode() == TargetOpcode::COPY &&
4548	MI->getOperand(`1`).getSubReg() == AArch64::sub_32) &&
4549	"Expected copy instruction");
4550	Reg = MI->getOperand(i: `1`).getReg();
4551	MachineBasicBlock::iterator I(MI);
4552	removeDeadCode(I, E: std::next(x: I));
4553	}
4554	updateValueMap(I, Reg);
4555	return true;
4556	}
4557
4558	bool AArch64FastISel::selectIntExt(const Instruction *I) {
4559	assert((isa<ZExtInst>(I) \|\| isa<SExtInst>(I)) &&
4560	"Unexpected integer extend instruction.");
4561	MVT RetVT;
4562	MVT SrcVT;
4563	if (!isTypeSupported(Ty: I->getType(), VT&: RetVT))
4564	return false;
4565
4566	if (!isTypeSupported(Ty: I->getOperand(i: `0`)->getType(), VT&: SrcVT))
4567	return false;
4568
4569	// Try to optimize already sign-/zero-extended values from load instructions.
4570	if (optimizeIntExtLoad(I, RetVT, SrcVT))
4571	return true;
4572
4573	Register SrcReg = getRegForValue(V: I->getOperand(i: `0`));
4574	if (!SrcReg)
4575	return false;
4576
4577	// Try to optimize already sign-/zero-extended values from function arguments.
4578	bool IsZExt = isa<ZExtInst>(Val: I);
4579	if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: `0`))) {
4580	if ((IsZExt && Arg->hasZExtAttr()) \|\| (!IsZExt && Arg->hasSExtAttr())) {
4581	if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4582	Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4583	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4584	TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4585	.addImm(`0`)
4586	.addReg(SrcReg)
4587	.addImm(AArch64::sub_32);
4588	SrcReg = ResultReg;
4589	}
4590
4591	updateValueMap(I, Reg: SrcReg);
4592	return true;
4593	}
4594	}
4595
4596	unsigned ResultReg = emitIntExt(SrcVT, SrcReg, DestVT: RetVT, IsZExt);
4597	if (!ResultReg)
4598	return false;
4599
4600	updateValueMap(I, Reg: ResultReg);
4601	return true;
4602	}
4603
4604	bool AArch64FastISel::selectRem(const Instruction I, unsigned* ISDOpcode) {
4605	EVT DestEVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
4606	if (!DestEVT.isSimple())
4607	return false;
4608
4609	MVT DestVT = DestEVT.getSimpleVT();
4610	if (DestVT != MVT::i64 && DestVT != MVT::i32)
4611	return false;
4612
4613	unsigned DivOpc;
4614	bool Is64bit = (DestVT == MVT::i64);
4615	switch (ISDOpcode) {
4616	default:
4617	return false;
4618	case ISD::SREM:
4619	DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4620	break;
4621	case ISD::UREM:
4622	DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4623	break;
4624	}
4625	unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4626	Register Src0Reg = getRegForValue(V: I->getOperand(i: `0`));
4627	if (!Src0Reg)
4628	return false;
4629
4630	Register Src1Reg = getRegForValue(V: I->getOperand(i: `1`));
4631	if (!Src1Reg)
4632	return false;
4633
4634	const TargetRegisterClass *RC =
4635	(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4636	Register QuotReg = fastEmitInst_rr(MachineInstOpcode: DivOpc, RC, Op0: Src0Reg, Op1: Src1Reg);
4637	assert(QuotReg && "Unexpected DIV instruction emission failure.");
4638	// The remainder is computed as numerator - (quotient denominator) using the*
4639	// MSUB instruction.
4640	Register ResultReg = fastEmitInst_rrr(MachineInstOpcode: MSubOpc, RC, Op0: QuotReg, Op1: Src1Reg, Op2: Src0Reg);
4641	updateValueMap(I, Reg: ResultReg);
4642	return true;
4643	}
4644
4645	bool AArch64FastISel::selectMul(const Instruction *I) {
4646	MVT VT;
4647	if (!isTypeSupported(Ty: I->getType(), VT, /IsVectorAllowed=/true))
4648	return false;
4649
4650	if (VT.isVector())
4651	return selectBinaryOp(I, ISDOpcode: ISD::MUL);
4652
4653	const Value *Src0 = I->getOperand(i: `0`);
4654	const Value *Src1 = I->getOperand(i: `1`);
4655	if (const auto *C = dyn_cast<ConstantInt>(Val: Src0))
4656	if (C->getValue().isPowerOf2())
4657	std::swap(a&: Src0, b&: Src1);
4658
4659	// Try to simplify to a shift instruction.
4660	if (const auto *C = dyn_cast<ConstantInt>(Val: Src1))
4661	if (C->getValue().isPowerOf2()) {
4662	uint64_t ShiftVal = C->getValue().logBase2();
4663	MVT SrcVT = VT;
4664	bool IsZExt = true;
4665	if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Src0)) {
4666	if (!isIntExtFree(I: ZExt)) {
4667	MVT VT;
4668	if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT)) {
4669	SrcVT = VT;
4670	IsZExt = true;
4671	Src0 = ZExt->getOperand(i_nocapture: `0`);
4672	}
4673	}
4674	} else if (const auto *SExt = dyn_cast<SExtInst>(Val: Src0)) {
4675	if (!isIntExtFree(I: SExt)) {
4676	MVT VT;
4677	if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT)) {
4678	SrcVT = VT;
4679	IsZExt = false;
4680	Src0 = SExt->getOperand(i_nocapture: `0`);
4681	}
4682	}
4683	}
4684
4685	Register Src0Reg = getRegForValue(V: Src0);
4686	if (!Src0Reg)
4687	return false;
4688
4689	unsigned ResultReg =
4690	emitLSL_ri(RetVT: VT, SrcVT, Op0: Src0Reg, Shift: ShiftVal, IsZExt);
4691
4692	if (ResultReg) {
4693	updateValueMap(I, Reg: ResultReg);
4694	return true;
4695	}
4696	}
4697
4698	Register Src0Reg = getRegForValue(V: I->getOperand(i: `0`));
4699	if (!Src0Reg)
4700	return false;
4701
4702	Register Src1Reg = getRegForValue(V: I->getOperand(i: `1`));
4703	if (!Src1Reg)
4704	return false;
4705
4706	unsigned ResultReg = emitMul_rr(RetVT: VT, Op0: Src0Reg, Op1: Src1Reg);
4707
4708	if (!ResultReg)
4709	return false;
4710
4711	updateValueMap(I, Reg: ResultReg);
4712	return true;
4713	}
4714
4715	bool AArch64FastISel::selectShift(const Instruction *I) {
4716	MVT RetVT;
4717	if (!isTypeSupported(Ty: I->getType(), VT&: RetVT, /IsVectorAllowed=/true))
4718	return false;
4719
4720	if (RetVT.isVector())
4721	return selectOperator(I, Opcode: I->getOpcode());
4722
4723	if (const auto *C = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`))) {
4724	unsigned ResultReg = `0`;
4725	uint64_t ShiftVal = C->getZExtValue();
4726	MVT SrcVT = RetVT;
4727	bool IsZExt = I->getOpcode() != Instruction::AShr;
4728	const Value *Op0 = I->getOperand(i: `0`);
4729	if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Op0)) {
4730	if (!isIntExtFree(I: ZExt)) {
4731	MVT TmpVT;
4732	if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT&: TmpVT)) {
4733	SrcVT = TmpVT;
4734	IsZExt = true;
4735	Op0 = ZExt->getOperand(i_nocapture: `0`);
4736	}
4737	}
4738	} else if (const auto *SExt = dyn_cast<SExtInst>(Val: Op0)) {
4739	if (!isIntExtFree(I: SExt)) {
4740	MVT TmpVT;
4741	if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT&: TmpVT)) {
4742	SrcVT = TmpVT;
4743	IsZExt = false;
4744	Op0 = SExt->getOperand(i_nocapture: `0`);
4745	}
4746	}
4747	}
4748
4749	Register Op0Reg = getRegForValue(V: Op0);
4750	if (!Op0Reg)
4751	return false;
4752
4753	switch (I->getOpcode()) {
4754	default: llvm_unreachable("Unexpected instruction.");
4755	case Instruction::Shl:
4756	ResultReg = emitLSL_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4757	break;
4758	case Instruction::AShr:
4759	ResultReg = emitASR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4760	break;
4761	case Instruction::LShr:
4762	ResultReg = emitLSR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4763	break;
4764	}
4765	if (!ResultReg)
4766	return false;
4767
4768	updateValueMap(I, Reg: ResultReg);
4769	return true;
4770	}
4771
4772	Register Op0Reg = getRegForValue(V: I->getOperand(i: `0`));
4773	if (!Op0Reg)
4774	return false;
4775
4776	Register Op1Reg = getRegForValue(V: I->getOperand(i: `1`));
4777	if (!Op1Reg)
4778	return false;
4779
4780	unsigned ResultReg = `0`;
4781	switch (I->getOpcode()) {
4782	default: llvm_unreachable("Unexpected instruction.");
4783	case Instruction::Shl:
4784	ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4785	break;
4786	case Instruction::AShr:
4787	ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4788	break;
4789	case Instruction::LShr:
4790	ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4791	break;
4792	}
4793
4794	if (!ResultReg)
4795	return false;
4796
4797	updateValueMap(I, Reg: ResultReg);
4798	return true;
4799	}
4800
4801	bool AArch64FastISel::selectBitCast(const Instruction *I) {
4802	MVT RetVT, SrcVT;
4803
4804	if (!isTypeLegal(Ty: I->getOperand(i: `0`)->getType(), VT&: SrcVT))
4805	return false;
4806	if (!isTypeLegal(Ty: I->getType(), VT&: RetVT))
4807	return false;
4808
4809	unsigned Opc;
4810	if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4811	Opc = AArch64::FMOVWSr;
4812	else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4813	Opc = AArch64::FMOVXDr;
4814	else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4815	Opc = AArch64::FMOVSWr;
4816	else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4817	Opc = AArch64::FMOVDXr;
4818	else
4819	return false;
4820
4821	const TargetRegisterClass RC = nullptr*;
4822	switch (RetVT.SimpleTy) {
4823	default: llvm_unreachable("Unexpected value type.");
4824	case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4825	case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4826	case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4827	case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4828	}
4829	Register Op0Reg = getRegForValue(V: I->getOperand(i: `0`));
4830	if (!Op0Reg)
4831	return false;
4832
4833	Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC, Op0: Op0Reg);
4834	if (!ResultReg)
4835	return false;
4836
4837	updateValueMap(I, Reg: ResultReg);
4838	return true;
4839	}
4840
4841	bool AArch64FastISel::selectFRem(const Instruction *I) {
4842	MVT RetVT;
4843	if (!isTypeLegal(Ty: I->getType(), VT&: RetVT))
4844	return false;
4845
4846	RTLIB::Libcall LC;
4847	switch (RetVT.SimpleTy) {
4848	default:
4849	return false;
4850	case MVT::f32:
4851	LC = RTLIB::REM_F32;
4852	break;
4853	case MVT::f64:
4854	LC = RTLIB::REM_F64;
4855	break;
4856	}
4857
4858	ArgListTy Args;
4859	Args.reserve(n: I->getNumOperands());
4860
4861	// Populate the argument list.
4862	for (auto &Arg : I->operands()) {
4863	ArgListEntry Entry;
4864	Entry.Val = Arg;
4865	Entry.Ty = Arg ->getType();
4866	Args.push_back(x: Entry);
4867	}
4868
4869	CallLoweringInfo CLI;
4870	MCContext &Ctx = MF->getContext();
4871	CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: I->getType(),
4872	Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args));
4873	if (!lowerCallTo(CLI))
4874	return false;
4875	updateValueMap(I, Reg: CLI.ResultReg);
4876	return true;
4877	}
4878
4879	bool AArch64FastISel::selectSDiv(const Instruction *I) {
4880	MVT VT;
4881	if (!isTypeLegal(Ty: I->getType(), VT))
4882	return false;
4883
4884	if (!isa<ConstantInt>(Val: I->getOperand(i: `1`)))
4885	return selectBinaryOp(I, ISDOpcode: ISD::SDIV);
4886
4887	const APInt &C = cast<ConstantInt>(Val: I->getOperand(i: `1`))->getValue();
4888	if ((VT != MVT::i32 && VT != MVT::i64) \|\| !C \|\|
4889	!(C.isPowerOf2() \|\| C.isNegatedPowerOf2()))
4890	return selectBinaryOp(I, ISDOpcode: ISD::SDIV);
4891
4892	unsigned Lg2 = C.countr_zero();
4893	Register Src0Reg = getRegForValue(V: I->getOperand(i: `0`));
4894	if (!Src0Reg)
4895	return false;
4896
4897	if (cast<BinaryOperator>(Val: I)->isExact()) {
4898	unsigned ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: Src0Reg, Shift: Lg2);
4899	if (!ResultReg)
4900	return false;
4901	updateValueMap(I, Reg: ResultReg);
4902	return true;
4903	}
4904
4905	int64_t Pow2MinusOne = (`1ULL` << Lg2) - `1`;
4906	unsigned AddReg = emitAdd_ri_(VT, Op0: Src0Reg, Imm: Pow2MinusOne);
4907	if (!AddReg)
4908	return false;
4909
4910	// (Src0 < 0) ? Pow2 - 1 : 0;
4911	if (!emitICmp_ri(RetVT: VT, LHSReg: Src0Reg, Imm: `0`))
4912	return false;
4913
4914	unsigned SelectOpc;
4915	const TargetRegisterClass *RC;
4916	if (VT == MVT::i64) {
4917	SelectOpc = AArch64::CSELXr;
4918	RC = &AArch64::GPR64RegClass;
4919	} else {
4920	SelectOpc = AArch64::CSELWr;
4921	RC = &AArch64::GPR32RegClass;
4922	}
4923	Register SelectReg = fastEmitInst_rri(MachineInstOpcode: SelectOpc, RC, Op0: AddReg, Op1: Src0Reg,
4924	Imm: AArch64CC::LT);
4925	if (!SelectReg)
4926	return false;
4927
4928	// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4929	// negate the result.
4930	unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4931	unsigned ResultReg;
4932	if (C.isNegative())
4933	ResultReg = emitAddSub_rs(/UseAdd=/false, RetVT: VT, LHSReg: ZeroReg, RHSReg: SelectReg,
4934	ShiftType: AArch64_AM::ASR, ShiftImm: Lg2);
4935	else
4936	ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: SelectReg, Shift: Lg2);
4937
4938	if (!ResultReg)
4939	return false;
4940
4941	updateValueMap(I, Reg: ResultReg);
4942	return true;
4943	}
4944
4945	/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4946	/// have to duplicate it for AArch64, because otherwise we would fail during the
4947	/// sign-extend emission.
4948	unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4949	Register IdxN = getRegForValue(V: Idx);
4950	if (IdxN == `0`)
4951	// Unhandled operand. Halt "fast" selection and bail.
4952	return `0`;
4953
4954	// If the index is smaller or larger than intptr_t, truncate or extend it.
4955	MVT PtrVT = TLI.getPointerTy(DL);
4956	EVT IdxVT = EVT::getEVT(Ty: Idx->getType(), /HandleUnknown=/false);
4957	if (IdxVT.bitsLT(VT: PtrVT)) {
4958	IdxN = emitIntExt(SrcVT: IdxVT.getSimpleVT(), SrcReg: IdxN, DestVT: PtrVT, /isZExt=/IsZExt: false);
4959	} else if (IdxVT.bitsGT(VT: PtrVT))
4960	llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4961	return IdxN;
4962	}
4963
4964	/// This is mostly a copy of the existing FastISel GEP code, but we have to
4965	/// duplicate it for AArch64, because otherwise we would bail out even for
4966	/// simple cases. This is because the standard fastEmit functions don't cover
4967	/// MUL at all and ADD is lowered very inefficientily.
4968	bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4969	if (Subtarget->isTargetILP32())
4970	return false;
4971
4972	Register N = getRegForValue(V: I->getOperand(i: `0`));
4973	if (!N)
4974	return false;
4975
4976	// Keep a running tab of the total offset to coalesce multiple N = N + Offset
4977	// into a single N = N + TotalOffset.
4978	uint64_t TotalOffs = `0`;
4979	MVT VT = TLI.getPointerTy(DL);
4980	for (gep_type_iterator GTI = gep_type_begin(GEP: I), E = gep_type_end(GEP: I);
4981	GTI != E; ++GTI) {
4982	const Value *Idx = GTI.getOperand();
4983	if (auto *StTy = GTI.getStructTypeOrNull()) {
4984	unsigned Field = cast<ConstantInt>(Val: Idx)->getZExtValue();
4985	// N = N + Offset
4986	if (Field)
4987	TotalOffs += DL.getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
4988	} else {
4989	// If this is a constant subscript, handle it quickly.
4990	if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) {
4991	if (CI->isZero())
4992	continue;
4993	// N = N + Offset
4994	TotalOffs += GTI.getSequentialElementStride(DL) *
4995	cast<ConstantInt>(Val: CI)->getSExtValue();
4996	continue;
4997	}
4998	if (TotalOffs) {
4999	N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs);
5000	if (!N)
5001	return false;
5002	TotalOffs = `0`;
5003	}
5004
5005	// N = N + Idx ElementSize;*
5006	uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5007	unsigned IdxN = getRegForGEPIndex(Idx);
5008	if (!IdxN)
5009	return false;
5010
5011	if (ElementSize != `1`) {
5012	unsigned C = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm: ElementSize);
5013	if (!C)
5014	return false;
5015	IdxN = emitMul_rr(RetVT: VT, Op0: IdxN, Op1: C);
5016	if (!IdxN)
5017	return false;
5018	}
5019	N = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::ADD, Op0: N, Op1: IdxN);
5020	if (!N)
5021	return false;
5022	}
5023	}
5024	if (TotalOffs) {
5025	N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs);
5026	if (!N)
5027	return false;
5028	}
5029	updateValueMap(I, Reg: N);
5030	return true;
5031	}
5032
5033	bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5034	assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5035	"cmpxchg survived AtomicExpand at optlevel > -O0");
5036
5037	auto *RetPairTy = cast<StructType>(Val: I->getType());
5038	Type *RetTy = RetPairTy->getTypeAtIndex(N: `0U`);
5039	assert(RetPairTy->getTypeAtIndex(`1U`)->isIntegerTy(`1`) &&
5040	"cmpxchg has a non-i1 status result");
5041
5042	MVT VT;
5043	if (!isTypeLegal(Ty: RetTy, VT))
5044	return false;
5045
5046	const TargetRegisterClass *ResRC;
5047	unsigned Opc, CmpOpc;
5048	// This only supports i32/i64, because i8/i16 aren't legal, and the generic
5049	// extractvalue selection doesn't support that.
5050	if (VT == MVT::i32) {
5051	Opc = AArch64::CMP_SWAP_32;
5052	CmpOpc = AArch64::SUBSWrs;
5053	ResRC = &AArch64::GPR32RegClass;
5054	} else if (VT == MVT::i64) {
5055	Opc = AArch64::CMP_SWAP_64;
5056	CmpOpc = AArch64::SUBSXrs;
5057	ResRC = &AArch64::GPR64RegClass;
5058	} else {
5059	return false;
5060	}
5061
5062	const MCInstrDesc &II = TII.get(Opcode: Opc);
5063
5064	const Register AddrReg = constrainOperandRegClass(
5065	II, Op: getRegForValue(V: I->getPointerOperand()), OpNum: II.getNumDefs());
5066	const Register DesiredReg = constrainOperandRegClass(
5067	II, Op: getRegForValue(V: I->getCompareOperand()), OpNum: II.getNumDefs() + `1`);
5068	const Register NewReg = constrainOperandRegClass(
5069	II, Op: getRegForValue(V: I->getNewValOperand()), OpNum: II.getNumDefs() + `2`);
5070
5071	const Register ResultReg1 = createResultReg(RC: ResRC);
5072	const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5073	const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5074
5075	// FIXME: MachineMemOperand doesn't support cmpxchg yet.
5076	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
5077	.addDef(RegNo: ResultReg1)
5078	.addDef(RegNo: ScratchReg)
5079	.addUse(RegNo: AddrReg)
5080	.addUse(RegNo: DesiredReg)
5081	.addUse(RegNo: NewReg);
5082
5083	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5084	.addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5085	.addUse(ResultReg1)
5086	.addUse(DesiredReg)
5087	.addImm(`0`);
5088
5089	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5090	.addDef(ResultReg2)
5091	.addUse(AArch64::WZR)
5092	.addUse(AArch64::WZR)
5093	.addImm(AArch64CC::NE);
5094
5095	assert((ResultReg1 + `1`) == ResultReg2 && "Nonconsecutive result registers.");
5096	updateValueMap(I, Reg: ResultReg1, NumRegs: `2`);
5097	return true;
5098	}
5099
5100	bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5101	if (TLI.fallBackToDAGISel(Inst: *I))
5102	return false;
5103	switch (I->getOpcode()) {
5104	default:
5105	break;
5106	case Instruction::Add:
5107	case Instruction::Sub:
5108	return selectAddSub(I);
5109	case Instruction::Mul:
5110	return selectMul(I);
5111	case Instruction::SDiv:
5112	return selectSDiv(I);
5113	case Instruction::SRem:
5114	if (!selectBinaryOp(I, ISDOpcode: ISD::SREM))
5115	return selectRem(I, ISDOpcode: ISD::SREM);
5116	return true;
5117	case Instruction::URem:
5118	if (!selectBinaryOp(I, ISDOpcode: ISD::UREM))
5119	return selectRem(I, ISDOpcode: ISD::UREM);
5120	return true;
5121	case Instruction::Shl:
5122	case Instruction::LShr:
5123	case Instruction::AShr:
5124	return selectShift(I);
5125	case Instruction::And:
5126	case Instruction::Or:
5127	case Instruction::Xor:
5128	return selectLogicalOp(I);
5129	case Instruction::Br:
5130	return selectBranch(I);
5131	case Instruction::IndirectBr:
5132	return selectIndirectBr(I);
5133	case Instruction::BitCast:
5134	if (!FastISel::selectBitCast(I))
5135	return selectBitCast(I);
5136	return true;
5137	case Instruction::FPToSI:
5138	if (!selectCast(I, Opcode: ISD::FP_TO_SINT))
5139	return selectFPToInt(I, /Signed=/true);
5140	return true;
5141	case Instruction::FPToUI:
5142	return selectFPToInt(I, /Signed=/false);
5143	case Instruction::ZExt:
5144	case Instruction::SExt:
5145	return selectIntExt(I);
5146	case Instruction::Trunc:
5147	if (!selectCast(I, Opcode: ISD::TRUNCATE))
5148	return selectTrunc(I);
5149	return true;
5150	case Instruction::FPExt:
5151	return selectFPExt(I);
5152	case Instruction::FPTrunc:
5153	return selectFPTrunc(I);
5154	case Instruction::SIToFP:
5155	if (!selectCast(I, Opcode: ISD::SINT_TO_FP))
5156	return selectIntToFP(I, /Signed=/true);
5157	return true;
5158	case Instruction::UIToFP:
5159	return selectIntToFP(I, /Signed=/false);
5160	case Instruction::Load:
5161	return selectLoad(I);
5162	case Instruction::Store:
5163	return selectStore(I);
5164	case Instruction::FCmp:
5165	case Instruction::ICmp:
5166	return selectCmp(I);
5167	case Instruction::Select:
5168	return selectSelect(I);
5169	case Instruction::Ret:
5170	return selectRet(I);
5171	case Instruction::FRem:
5172	return selectFRem(I);
5173	case Instruction::GetElementPtr:
5174	return selectGetElementPtr(I);
5175	case Instruction::AtomicCmpXchg:
5176	return selectAtomicCmpXchg(I: cast<AtomicCmpXchgInst>(Val: I));
5177	}
5178
5179	// fall-back to target-independent instruction selection.
5180	return selectOperator(I, Opcode: I->getOpcode());
5181	}
5182
5183	FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5184	const TargetLibraryInfo *LibInfo) {
5185
5186	SMEAttrs CallerAttrs(*FuncInfo.Fn);
5187	if (CallerAttrs.hasZAState() \|\| CallerAttrs.hasZT0State() \|\|
5188	CallerAttrs.hasStreamingInterfaceOrBody() \|\|
5189	CallerAttrs.hasStreamingCompatibleInterface())
5190	return nullptr;
5191	return new AArch64FastISel (FuncInfo, LibInfo);
5192	}
5193

source code of llvm/lib/Target/AArch64/AArch64FastISel.cpp