1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
16#include "AArch64CallingConvention.h"
17#include "AArch64MachineFunctionInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/Analysis/BranchProbabilityInfo.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/FastISel.h"
29#include "llvm/CodeGen/FunctionLoweringInfo.h"
30#include "llvm/CodeGen/ISDOpcodes.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineInstr.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/RuntimeLibcalls.h"
39#include "llvm/CodeGen/ValueTypes.h"
40#include "llvm/CodeGenTypes/MachineValueType.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
48#include "llvm/IR/DerivedTypes.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/GetElementPtrTypeIterator.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
54#include "llvm/IR/Instructions.h"
55#include "llvm/IR/IntrinsicInst.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/User.h"
61#include "llvm/IR/Value.h"
62#include "llvm/MC/MCInstrDesc.h"
63#include "llvm/MC/MCRegisterInfo.h"
64#include "llvm/MC/MCSymbol.h"
65#include "llvm/Support/AtomicOrdering.h"
66#include "llvm/Support/Casting.h"
67#include "llvm/Support/CodeGen.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/ErrorHandling.h"
70#include "llvm/Support/MathExtras.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
91 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
151 LLVMContext *Context;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191 MachineMemOperand::Flags Flags,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(Val: I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338 if (CC == CallingConv::GHC)
339 return CC_AArch64_GHC;
340 if (CC == CallingConv::CFGuard_Check)
341 return CC_AArch64_Win64_CFGuard_Check;
342 if (Subtarget->isTargetDarwin())
343 return CC_AArch64_DarwinPCS;
344 if (Subtarget->isTargetWindows())
345 return CC_AArch64_Win64PCS;
346 return CC_AArch64_AAPCS;
347}
348
349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351 "Alloca should always return a pointer.");
352
353 // Don't handle dynamic allocas.
354 if (!FuncInfo.StaticAllocaMap.count(Val: AI))
355 return 0;
356
357 DenseMap<const AllocaInst *, int>::iterator SI =
358 FuncInfo.StaticAllocaMap.find(Val: AI);
359
360 if (SI != FuncInfo.StaticAllocaMap.end()) {
361 Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: ADDXri),
363 ResultReg)
364 .addFrameIndex(SI->second)
365 .addImm(0)
366 .addImm(0);
367 return ResultReg;
368 }
369
370 return 0;
371}
372
373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374 if (VT > MVT::i64)
375 return 0;
376
377 if (!CI->isZero())
378 return fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm: CI->getZExtValue());
379
380 // Create a copy from the zero register to materialize a "0" value.
381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382 : &AArch64::GPR32RegClass;
383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384 Register ResultReg = createResultReg(RC);
385 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY),
386 DestReg: ResultReg).addReg(RegNo: ZeroReg, flags: getKillRegState(B: true));
387 return ResultReg;
388}
389
390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391 // Positive zero (+0.0) has to be materialized with a fmov from the zero
392 // register, because the immediate version of fmov cannot encode zero.
393 if (CFP->isNullValue())
394 return fastMaterializeFloatZero(CF: CFP);
395
396 if (VT != MVT::f32 && VT != MVT::f64)
397 return 0;
398
399 const APFloat Val = CFP->getValueAPF();
400 bool Is64Bit = (VT == MVT::f64);
401 // This checks to see if we can use FMOV instructions to materialize
402 // a constant, otherwise we have to materialize via the constant pool.
403 int Imm =
404 Is64Bit ? AArch64_AM::getFP64Imm(FPImm: Val) : AArch64_AM::getFP32Imm(FPImm: Val);
405 if (Imm != -1) {
406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407 return fastEmitInst_i(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Imm);
408 }
409
410 // For the large code model materialize the FP constant in code.
411 if (TM.getCodeModel() == CodeModel::Large) {
412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413 const TargetRegisterClass *RC = Is64Bit ?
414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416 Register TmpReg = createResultReg(RC);
417 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc1), DestReg: TmpReg)
418 .addImm(Val: CFP->getValueAPF().bitcastToAPInt().getZExtValue());
419
420 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
421 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
422 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
423 .addReg(RegNo: TmpReg, flags: getKillRegState(B: true));
424
425 return ResultReg;
426 }
427
428 // Materialize via constant pool. MachineConstantPool wants an explicit
429 // alignment.
430 Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
431
432 unsigned CPI = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
433 Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
434 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
435 DestReg: ADRPReg).addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
436
437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
439 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
440 .addReg(RegNo: ADRPReg)
441 .addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
442 return ResultReg;
443}
444
445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446 // We can't handle thread-local variables quickly yet.
447 if (GV->isThreadLocal())
448 return 0;
449
450 // MachO still uses GOT for large code-model accesses, but ELF requires
451 // movz/movk sequences, which FastISel doesn't handle yet.
452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 return 0;
454
455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456
457 EVT DestEVT = TLI.getValueType(DL, Ty: GV->getType(), AllowUnknown: true);
458 if (!DestEVT.isSimple())
459 return 0;
460
461 Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
462 unsigned ResultReg;
463
464 if (OpFlags & AArch64II::MO_GOT) {
465 // ADRP + LDRX
466 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
467 DestReg: ADRPReg)
468 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags);
469
470 unsigned LdrOpc;
471 if (Subtarget->isTargetILP32()) {
472 ResultReg = createResultReg(RC: &AArch64::GPR32RegClass);
473 LdrOpc = AArch64::LDRWui;
474 } else {
475 ResultReg = createResultReg(RC: &AArch64::GPR64RegClass);
476 LdrOpc = AArch64::LDRXui;
477 }
478 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: LdrOpc),
479 DestReg: ResultReg)
480 .addReg(RegNo: ADRPReg)
481 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
482 AArch64II::MO_NC | OpFlags);
483 if (!Subtarget->isTargetILP32())
484 return ResultReg;
485
486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487 // so we must extend the result on ILP32.
488 Register Result64 = createResultReg(RC: &AArch64::GPR64RegClass);
489 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
490 MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
491 .addDef(RegNo: Result64)
492 .addImm(Val: 0)
493 .addReg(RegNo: ResultReg, flags: RegState::Kill)
494 .addImm(AArch64::Val: sub_32);
495 return Result64;
496 } else {
497 // ADRP + ADDX
498 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
499 DestReg: ADRPReg)
500 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags);
501
502 if (OpFlags & AArch64II::MO_TAGGED) {
503 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
504 // We do so by creating a MOVK that sets bits 48-63 of the register to
505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506 // the small code model so we can assume a binary size of <= 4GB, which
507 // makes the untagged PC relative offset positive. The binary must also be
508 // loaded into address range [0, 2^48). Both of these properties need to
509 // be ensured at runtime when using tagged addresses.
510 //
511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513 // are not exactly 1:1 with FastISel so we cannot easily abstract this
514 // out. At some point, it would be nice to find a way to not have this
515 // duplciate code.
516 unsigned DstReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: MOVKXi),
518 DstReg)
519 .addReg(ADRPReg)
520 .addGlobalAddress(GV, /*Offset=*/0x100000000,
521 AArch64II::MO_PREL | AArch64II::MO_G3)
522 .addImm(48);
523 ADRPReg = DstReg;
524 }
525
526 ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: ADDXri),
528 ResultReg)
529 .addReg(ADRPReg)
530 .addGlobalAddress(GV, 0,
531 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
532 .addImm(0);
533 }
534 return ResultReg;
535}
536
537unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538 EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
539
540 // Only handle simple types.
541 if (!CEVT.isSimple())
542 return 0;
543 MVT VT = CEVT.getSimpleVT();
544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545 // 'null' pointers need to have a somewhat special treatment.
546 if (isa<ConstantPointerNull>(Val: C)) {
547 assert(VT == MVT::i64 && "Expected 64-bit pointers");
548 return materializeInt(CI: ConstantInt::get(Ty: Type::getInt64Ty(C&: *Context), V: 0), VT);
549 }
550
551 if (const auto *CI = dyn_cast<ConstantInt>(Val: C))
552 return materializeInt(CI, VT);
553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
554 return materializeFP(CFP, VT);
555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
556 return materializeGV(GV);
557
558 return 0;
559}
560
561unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562 assert(CFP->isNullValue() &&
563 "Floating-point constant is not a positive zero.");
564 MVT VT;
565 if (!isTypeLegal(Ty: CFP->getType(), VT))
566 return 0;
567
568 if (VT != MVT::f32 && VT != MVT::f64)
569 return 0;
570
571 bool Is64Bit = (VT == MVT::f64);
572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574 return fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Op0: ZReg);
575}
576
577/// Check if the multiply is by a power-of-2 constant.
578static bool isMulPowOf2(const Value *I) {
579 if (const auto *MI = dyn_cast<MulOperator>(Val: I)) {
580 if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 0)))
581 if (C->getValue().isPowerOf2())
582 return true;
583 if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 1)))
584 if (C->getValue().isPowerOf2())
585 return true;
586 }
587 return false;
588}
589
590// Computes the address to get to an object.
591bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
592{
593 const User *U = nullptr;
594 unsigned Opcode = Instruction::UserOp1;
595 if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
596 // Don't walk into other basic blocks unless the object is an alloca from
597 // another block, otherwise it may not have a virtual register assigned.
598 if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) ||
599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
600 Opcode = I->getOpcode();
601 U = I;
602 }
603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
604 Opcode = C->getOpcode();
605 U = C;
606 }
607
608 if (auto *Ty = dyn_cast<PointerType>(Val: Obj->getType()))
609 if (Ty->getAddressSpace() > 255)
610 // Fast instruction selection doesn't support the special
611 // address spaces.
612 return false;
613
614 switch (Opcode) {
615 default:
616 break;
617 case Instruction::BitCast:
618 // Look through bitcasts.
619 return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty);
620
621 case Instruction::IntToPtr:
622 // Look past no-op inttoptrs.
623 if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
624 TLI.getPointerTy(DL))
625 return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty);
626 break;
627
628 case Instruction::PtrToInt:
629 // Look past no-op ptrtoints.
630 if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
631 return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty);
632 break;
633
634 case Instruction::GetElementPtr: {
635 Address SavedAddr = Addr;
636 uint64_t TmpOffset = Addr.getOffset();
637
638 // Iterate through the GEP folding the constants into offsets where
639 // we can.
640 for (gep_type_iterator GTI = gep_type_begin(GEP: U), E = gep_type_end(GEP: U);
641 GTI != E; ++GTI) {
642 const Value *Op = GTI.getOperand();
643 if (StructType *STy = GTI.getStructTypeOrNull()) {
644 const StructLayout *SL = DL.getStructLayout(Ty: STy);
645 unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
646 TmpOffset += SL->getElementOffset(Idx);
647 } else {
648 uint64_t S = GTI.getSequentialElementStride(DL);
649 while (true) {
650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
651 // Constant-offset addressing.
652 TmpOffset += CI->getSExtValue() * S;
653 break;
654 }
655 if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
656 // A compatible add with a constant operand. Fold the constant.
657 ConstantInt *CI =
658 cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1));
659 TmpOffset += CI->getSExtValue() * S;
660 // Iterate on the other operand.
661 Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0);
662 continue;
663 }
664 // Unsupported
665 goto unsupported_gep;
666 }
667 }
668 }
669
670 // Try to grab the base operand now.
671 Addr.setOffset(TmpOffset);
672 if (computeAddress(Obj: U->getOperand(i: 0), Addr, Ty))
673 return true;
674
675 // We failed, restore everything and try the other options.
676 Addr = SavedAddr;
677
678 unsupported_gep:
679 break;
680 }
681 case Instruction::Alloca: {
682 const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
683 DenseMap<const AllocaInst *, int>::iterator SI =
684 FuncInfo.StaticAllocaMap.find(Val: AI);
685 if (SI != FuncInfo.StaticAllocaMap.end()) {
686 Addr.setKind(Address::FrameIndexBase);
687 Addr.setFI(SI->second);
688 return true;
689 }
690 break;
691 }
692 case Instruction::Add: {
693 // Adds of constants are common and easy enough.
694 const Value *LHS = U->getOperand(i: 0);
695 const Value *RHS = U->getOperand(i: 1);
696
697 if (isa<ConstantInt>(Val: LHS))
698 std::swap(a&: LHS, b&: RHS);
699
700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) {
701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702 return computeAddress(Obj: LHS, Addr, Ty);
703 }
704
705 Address Backup = Addr;
706 if (computeAddress(Obj: LHS, Addr, Ty) && computeAddress(Obj: RHS, Addr, Ty))
707 return true;
708 Addr = Backup;
709
710 break;
711 }
712 case Instruction::Sub: {
713 // Subs of constants are common and easy enough.
714 const Value *LHS = U->getOperand(i: 0);
715 const Value *RHS = U->getOperand(i: 1);
716
717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) {
718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719 return computeAddress(Obj: LHS, Addr, Ty);
720 }
721 break;
722 }
723 case Instruction::Shl: {
724 if (Addr.getOffsetReg())
725 break;
726
727 const auto *CI = dyn_cast<ConstantInt>(Val: U->getOperand(i: 1));
728 if (!CI)
729 break;
730
731 unsigned Val = CI->getZExtValue();
732 if (Val < 1 || Val > 3)
733 break;
734
735 uint64_t NumBytes = 0;
736 if (Ty && Ty->isSized()) {
737 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738 NumBytes = NumBits / 8;
739 if (!isPowerOf2_64(Value: NumBits))
740 NumBytes = 0;
741 }
742
743 if (NumBytes != (1ULL << Val))
744 break;
745
746 Addr.setShift(Val);
747 Addr.setExtendType(AArch64_AM::LSL);
748
749 const Value *Src = U->getOperand(i: 0);
750 if (const auto *I = dyn_cast<Instruction>(Val: Src)) {
751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
752 // Fold the zext or sext when it won't become a noop.
753 if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) {
754 if (!isIntExtFree(I: ZE) &&
755 ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
756 Addr.setExtendType(AArch64_AM::UXTW);
757 Src = ZE->getOperand(i_nocapture: 0);
758 }
759 } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) {
760 if (!isIntExtFree(I: SE) &&
761 SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
762 Addr.setExtendType(AArch64_AM::SXTW);
763 Src = SE->getOperand(i_nocapture: 0);
764 }
765 }
766 }
767 }
768
769 if (const auto *AI = dyn_cast<BinaryOperator>(Val: Src))
770 if (AI->getOpcode() == Instruction::And) {
771 const Value *LHS = AI->getOperand(i_nocapture: 0);
772 const Value *RHS = AI->getOperand(i_nocapture: 1);
773
774 if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
775 if (C->getValue() == 0xffffffff)
776 std::swap(a&: LHS, b&: RHS);
777
778 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
779 if (C->getValue() == 0xffffffff) {
780 Addr.setExtendType(AArch64_AM::UXTW);
781 Register Reg = getRegForValue(V: LHS);
782 if (!Reg)
783 return false;
784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785 Addr.setOffsetReg(Reg);
786 return true;
787 }
788 }
789
790 Register Reg = getRegForValue(V: Src);
791 if (!Reg)
792 return false;
793 Addr.setOffsetReg(Reg);
794 return true;
795 }
796 case Instruction::Mul: {
797 if (Addr.getOffsetReg())
798 break;
799
800 if (!isMulPowOf2(I: U))
801 break;
802
803 const Value *LHS = U->getOperand(i: 0);
804 const Value *RHS = U->getOperand(i: 1);
805
806 // Canonicalize power-of-2 value to the RHS.
807 if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
808 if (C->getValue().isPowerOf2())
809 std::swap(a&: LHS, b&: RHS);
810
811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812 const auto *C = cast<ConstantInt>(Val: RHS);
813 unsigned Val = C->getValue().logBase2();
814 if (Val < 1 || Val > 3)
815 break;
816
817 uint64_t NumBytes = 0;
818 if (Ty && Ty->isSized()) {
819 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820 NumBytes = NumBits / 8;
821 if (!isPowerOf2_64(Value: NumBits))
822 NumBytes = 0;
823 }
824
825 if (NumBytes != (1ULL << Val))
826 break;
827
828 Addr.setShift(Val);
829 Addr.setExtendType(AArch64_AM::LSL);
830
831 const Value *Src = LHS;
832 if (const auto *I = dyn_cast<Instruction>(Val: Src)) {
833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834 // Fold the zext or sext when it won't become a noop.
835 if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) {
836 if (!isIntExtFree(I: ZE) &&
837 ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
838 Addr.setExtendType(AArch64_AM::UXTW);
839 Src = ZE->getOperand(i_nocapture: 0);
840 }
841 } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) {
842 if (!isIntExtFree(I: SE) &&
843 SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
844 Addr.setExtendType(AArch64_AM::SXTW);
845 Src = SE->getOperand(i_nocapture: 0);
846 }
847 }
848 }
849 }
850
851 Register Reg = getRegForValue(V: Src);
852 if (!Reg)
853 return false;
854 Addr.setOffsetReg(Reg);
855 return true;
856 }
857 case Instruction::And: {
858 if (Addr.getOffsetReg())
859 break;
860
861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
862 break;
863
864 const Value *LHS = U->getOperand(i: 0);
865 const Value *RHS = U->getOperand(i: 1);
866
867 if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
868 if (C->getValue() == 0xffffffff)
869 std::swap(a&: LHS, b&: RHS);
870
871 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
872 if (C->getValue() == 0xffffffff) {
873 Addr.setShift(0);
874 Addr.setExtendType(AArch64_AM::LSL);
875 Addr.setExtendType(AArch64_AM::UXTW);
876
877 Register Reg = getRegForValue(V: LHS);
878 if (!Reg)
879 return false;
880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881 Addr.setOffsetReg(Reg);
882 return true;
883 }
884 break;
885 }
886 case Instruction::SExt:
887 case Instruction::ZExt: {
888 if (!Addr.getReg() || Addr.getOffsetReg())
889 break;
890
891 const Value *Src = nullptr;
892 // Fold the zext or sext when it won't become a noop.
893 if (const auto *ZE = dyn_cast<ZExtInst>(Val: U)) {
894 if (!isIntExtFree(I: ZE) && ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
895 Addr.setExtendType(AArch64_AM::UXTW);
896 Src = ZE->getOperand(i_nocapture: 0);
897 }
898 } else if (const auto *SE = dyn_cast<SExtInst>(Val: U)) {
899 if (!isIntExtFree(I: SE) && SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
900 Addr.setExtendType(AArch64_AM::SXTW);
901 Src = SE->getOperand(i_nocapture: 0);
902 }
903 }
904
905 if (!Src)
906 break;
907
908 Addr.setShift(0);
909 Register Reg = getRegForValue(V: Src);
910 if (!Reg)
911 return false;
912 Addr.setOffsetReg(Reg);
913 return true;
914 }
915 } // end switch
916
917 if (Addr.isRegBase() && !Addr.getReg()) {
918 Register Reg = getRegForValue(V: Obj);
919 if (!Reg)
920 return false;
921 Addr.setReg(Reg);
922 return true;
923 }
924
925 if (!Addr.getOffsetReg()) {
926 Register Reg = getRegForValue(V: Obj);
927 if (!Reg)
928 return false;
929 Addr.setOffsetReg(Reg);
930 return true;
931 }
932
933 return false;
934}
935
936bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937 const User *U = nullptr;
938 unsigned Opcode = Instruction::UserOp1;
939 bool InMBB = true;
940
941 if (const auto *I = dyn_cast<Instruction>(Val: V)) {
942 Opcode = I->getOpcode();
943 U = I;
944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945 } else if (const auto *C = dyn_cast<ConstantExpr>(Val: V)) {
946 Opcode = C->getOpcode();
947 U = C;
948 }
949
950 switch (Opcode) {
951 default: break;
952 case Instruction::BitCast:
953 // Look past bitcasts if its operand is in the same BB.
954 if (InMBB)
955 return computeCallAddress(V: U->getOperand(i: 0), Addr);
956 break;
957 case Instruction::IntToPtr:
958 // Look past no-op inttoptrs if its operand is in the same BB.
959 if (InMBB &&
960 TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
961 TLI.getPointerTy(DL))
962 return computeCallAddress(V: U->getOperand(i: 0), Addr);
963 break;
964 case Instruction::PtrToInt:
965 // Look past no-op ptrtoints if its operand is in the same BB.
966 if (InMBB && TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
967 return computeCallAddress(V: U->getOperand(i: 0), Addr);
968 break;
969 }
970
971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
972 Addr.setGlobalValue(GV);
973 return true;
974 }
975
976 // If all else fails, try to materialize the value in a register.
977 if (!Addr.getGlobalValue()) {
978 Addr.setReg(getRegForValue(V));
979 return Addr.getReg() != 0;
980 }
981
982 return false;
983}
984
985bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986 EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
987
988 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989 return false;
990
991 // Only handle simple types.
992 if (evt == MVT::Other || !evt.isSimple())
993 return false;
994 VT = evt.getSimpleVT();
995
996 // This is a legal type, but it's not something we handle in fast-isel.
997 if (VT == MVT::f128)
998 return false;
999
1000 // Handle all other legal types, i.e. a register that will directly hold this
1001 // value.
1002 return TLI.isTypeLegal(VT);
1003}
1004
1005/// Determine if the value type is supported by FastISel.
1006///
1007/// FastISel for AArch64 can handle more value types than are legal. This adds
1008/// simple value type such as i1, i8, and i16.
1009bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1010 if (Ty->isVectorTy() && !IsVectorAllowed)
1011 return false;
1012
1013 if (isTypeLegal(Ty, VT))
1014 return true;
1015
1016 // If this is a type than can be sign or zero-extended to a basic operation
1017 // go ahead and accept it now.
1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1019 return true;
1020
1021 return false;
1022}
1023
1024bool AArch64FastISel::isValueAvailable(const Value *V) const {
1025 if (!isa<Instruction>(Val: V))
1026 return true;
1027
1028 const auto *I = cast<Instruction>(Val: V);
1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1030}
1031
1032bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033 if (Subtarget->isTargetILP32())
1034 return false;
1035
1036 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037 if (!ScaleFactor)
1038 return false;
1039
1040 bool ImmediateOffsetNeedsLowering = false;
1041 bool RegisterOffsetNeedsLowering = false;
1042 int64_t Offset = Addr.getOffset();
1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(x: Offset))
1044 ImmediateOffsetNeedsLowering = true;
1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1046 !isUInt<12>(x: Offset / ScaleFactor))
1047 ImmediateOffsetNeedsLowering = true;
1048
1049 // Cannot encode an offset register and an immediate offset in the same
1050 // instruction. Fold the immediate offset into the load/store instruction and
1051 // emit an additional add to take care of the offset register.
1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // Cannot encode zero register as base.
1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057 RegisterOffsetNeedsLowering = true;
1058
1059 // If this is a stack pointer and the offset needs to be simplified then put
1060 // the alloca address into a register, set the base type back to register and
1061 // continue. This should almost never happen.
1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1063 {
1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066 ResultReg)
1067 .addFrameIndex(Addr.getFI())
1068 .addImm(0)
1069 .addImm(0);
1070 Addr.setKind(Address::RegBase);
1071 Addr.setReg(ResultReg);
1072 }
1073
1074 if (RegisterOffsetNeedsLowering) {
1075 unsigned ResultReg = 0;
1076 if (Addr.getReg()) {
1077 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1078 Addr.getExtendType() == AArch64_AM::UXTW )
1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), Addr.getExtendType(),
1081 Addr.getShift());
1082 else
1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1084 Addr.getOffsetReg(), AArch64_AM::LSL,
1085 Addr.getShift());
1086 } else {
1087 if (Addr.getExtendType() == AArch64_AM::UXTW)
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089 Addr.getShift(), /*IsZExt=*/true);
1090 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092 Addr.getShift(), /*IsZExt=*/false);
1093 else
1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095 Addr.getShift());
1096 }
1097 if (!ResultReg)
1098 return false;
1099
1100 Addr.setReg(ResultReg);
1101 Addr.setOffsetReg(0);
1102 Addr.setShift(0);
1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104 }
1105
1106 // Since the offset is too large for the load/store instruction get the
1107 // reg+offset into a register.
1108 if (ImmediateOffsetNeedsLowering) {
1109 unsigned ResultReg;
1110 if (Addr.getReg())
1111 // Try to fold the immediate into the add instruction.
1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113 else
1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115
1116 if (!ResultReg)
1117 return false;
1118 Addr.setReg(ResultReg);
1119 Addr.setOffset(0);
1120 }
1121 return true;
1122}
1123
1124void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125 const MachineInstrBuilder &MIB,
1126 MachineMemOperand::Flags Flags,
1127 unsigned ScaleFactor,
1128 MachineMemOperand *MMO) {
1129 int64_t Offset = Addr.getOffset() / ScaleFactor;
1130 // Frame base works a bit differently. Handle it separately.
1131 if (Addr.isFIBase()) {
1132 int FI = Addr.getFI();
1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1134 // and alignment should be based on the VT.
1135 MMO = FuncInfo.MF->getMachineMemOperand(
1136 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags,
1137 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
1138 // Now add the rest of the operands.
1139 MIB.addFrameIndex(Idx: FI).addImm(Val: Offset);
1140 } else {
1141 assert(Addr.isRegBase() && "Unexpected address kind.");
1142 const MCInstrDesc &II = MIB->getDesc();
1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1144 Addr.setReg(
1145 constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: II.getNumDefs()+Idx));
1146 Addr.setOffsetReg(
1147 constrainOperandRegClass(II, Op: Addr.getOffsetReg(), OpNum: II.getNumDefs()+Idx+1));
1148 if (Addr.getOffsetReg()) {
1149 assert(Addr.getOffset() == 0 && "Unexpected offset");
1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1151 Addr.getExtendType() == AArch64_AM::SXTX;
1152 MIB.addReg(RegNo: Addr.getReg());
1153 MIB.addReg(RegNo: Addr.getOffsetReg());
1154 MIB.addImm(Val: IsSigned);
1155 MIB.addImm(Val: Addr.getShift() != 0);
1156 } else
1157 MIB.addReg(RegNo: Addr.getReg()).addImm(Val: Offset);
1158 }
1159
1160 if (MMO)
1161 MIB.addMemOperand(MMO);
1162}
1163
1164unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165 const Value *RHS, bool SetFlags,
1166 bool WantResult, bool IsZExt) {
1167 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1168 bool NeedExtend = false;
1169 switch (RetVT.SimpleTy) {
1170 default:
1171 return 0;
1172 case MVT::i1:
1173 NeedExtend = true;
1174 break;
1175 case MVT::i8:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178 break;
1179 case MVT::i16:
1180 NeedExtend = true;
1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182 break;
1183 case MVT::i32: // fall-through
1184 case MVT::i64:
1185 break;
1186 }
1187 MVT SrcVT = RetVT;
1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189
1190 // Canonicalize immediates to the RHS first.
1191 if (UseAdd && isa<Constant>(Val: LHS) && !isa<Constant>(Val: RHS))
1192 std::swap(a&: LHS, b&: RHS);
1193
1194 // Canonicalize mul by power of 2 to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS))
1196 if (isMulPowOf2(I: LHS))
1197 std::swap(a&: LHS, b&: RHS);
1198
1199 // Canonicalize shift immediate to the RHS.
1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS))
1201 if (const auto *SI = dyn_cast<BinaryOperator>(Val: LHS))
1202 if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1)))
1203 if (SI->getOpcode() == Instruction::Shl ||
1204 SI->getOpcode() == Instruction::LShr ||
1205 SI->getOpcode() == Instruction::AShr )
1206 std::swap(a&: LHS, b&: RHS);
1207
1208 Register LHSReg = getRegForValue(V: LHS);
1209 if (!LHSReg)
1210 return 0;
1211
1212 if (NeedExtend)
1213 LHSReg = emitIntExt(SrcVT, SrcReg: LHSReg, DestVT: RetVT, isZExt: IsZExt);
1214
1215 unsigned ResultReg = 0;
1216 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) {
1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218 if (C->isNegative())
1219 ResultReg = emitAddSub_ri(UseAdd: !UseAdd, RetVT, LHSReg, Imm: -Imm, SetFlags,
1220 WantResult);
1221 else
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223 WantResult);
1224 } else if (const auto *C = dyn_cast<Constant>(Val: RHS))
1225 if (C->isNullValue())
1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm: 0, SetFlags, WantResult);
1227
1228 if (ResultReg)
1229 return ResultReg;
1230
1231 // Only extend the RHS within the instruction if there is a valid extend type.
1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233 isValueAvailable(V: RHS)) {
1234 Register RHSReg = getRegForValue(V: RHS);
1235 if (!RHSReg)
1236 return 0;
1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtType: ExtendType, ShiftImm: 0,
1238 SetFlags, WantResult);
1239 }
1240
1241 // Check if the mul can be folded into the instruction.
1242 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1243 if (isMulPowOf2(I: RHS)) {
1244 const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0);
1245 const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1);
1246
1247 if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS))
1248 if (C->getValue().isPowerOf2())
1249 std::swap(a&: MulLHS, b&: MulRHS);
1250
1251 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1252 uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2();
1253 Register RHSReg = getRegForValue(V: MulLHS);
1254 if (!RHSReg)
1255 return 0;
1256 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType: AArch64_AM::LSL,
1257 ShiftImm: ShiftVal, SetFlags, WantResult);
1258 if (ResultReg)
1259 return ResultReg;
1260 }
1261 }
1262
1263 // Check if the shift can be folded into the instruction.
1264 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1265 if (const auto *SI = dyn_cast<BinaryOperator>(Val: RHS)) {
1266 if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) {
1267 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1268 switch (SI->getOpcode()) {
1269 default: break;
1270 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1271 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1272 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1273 }
1274 uint64_t ShiftVal = C->getZExtValue();
1275 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1276 Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0));
1277 if (!RHSReg)
1278 return 0;
1279 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1280 ShiftImm: ShiftVal, SetFlags, WantResult);
1281 if (ResultReg)
1282 return ResultReg;
1283 }
1284 }
1285 }
1286 }
1287
1288 Register RHSReg = getRegForValue(V: RHS);
1289 if (!RHSReg)
1290 return 0;
1291
1292 if (NeedExtend)
1293 RHSReg = emitIntExt(SrcVT, SrcReg: RHSReg, DestVT: RetVT, isZExt: IsZExt);
1294
1295 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1296}
1297
1298unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1299 unsigned RHSReg, bool SetFlags,
1300 bool WantResult) {
1301 assert(LHSReg && RHSReg && "Invalid register number.");
1302
1303 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1304 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1305 return 0;
1306
1307 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1308 return 0;
1309
1310 static const unsigned OpcTable[2][2][2] = {
1311 { { AArch64::SUBWrr, AArch64::SUBXrr },
1312 { AArch64::ADDWrr, AArch64::ADDXrr } },
1313 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1314 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1315 };
1316 bool Is64Bit = RetVT == MVT::i64;
1317 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1318 const TargetRegisterClass *RC =
1319 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1320 unsigned ResultReg;
1321 if (WantResult)
1322 ResultReg = createResultReg(RC);
1323 else
1324 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1325
1326 const MCInstrDesc &II = TII.get(Opcode: Opc);
1327 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1328 RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1);
1329 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1330 .addReg(RegNo: LHSReg)
1331 .addReg(RegNo: RHSReg);
1332 return ResultReg;
1333}
1334
1335unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1336 uint64_t Imm, bool SetFlags,
1337 bool WantResult) {
1338 assert(LHSReg && "Invalid register number.");
1339
1340 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1341 return 0;
1342
1343 unsigned ShiftImm;
1344 if (isUInt<12>(x: Imm))
1345 ShiftImm = 0;
1346 else if ((Imm & 0xfff000) == Imm) {
1347 ShiftImm = 12;
1348 Imm >>= 12;
1349 } else
1350 return 0;
1351
1352 static const unsigned OpcTable[2][2][2] = {
1353 { { AArch64::SUBWri, AArch64::SUBXri },
1354 { AArch64::ADDWri, AArch64::ADDXri } },
1355 { { AArch64::SUBSWri, AArch64::SUBSXri },
1356 { AArch64::ADDSWri, AArch64::ADDSXri } }
1357 };
1358 bool Is64Bit = RetVT == MVT::i64;
1359 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1360 const TargetRegisterClass *RC;
1361 if (SetFlags)
1362 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1363 else
1364 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1365 unsigned ResultReg;
1366 if (WantResult)
1367 ResultReg = createResultReg(RC);
1368 else
1369 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1370
1371 const MCInstrDesc &II = TII.get(Opcode: Opc);
1372 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1373 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1374 .addReg(RegNo: LHSReg)
1375 .addImm(Val: Imm)
1376 .addImm(Val: getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm));
1377 return ResultReg;
1378}
1379
1380unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1381 unsigned RHSReg,
1382 AArch64_AM::ShiftExtendType ShiftType,
1383 uint64_t ShiftImm, bool SetFlags,
1384 bool WantResult) {
1385 assert(LHSReg && RHSReg && "Invalid register number.");
1386 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1387 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1388
1389 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1390 return 0;
1391
1392 // Don't deal with undefined shifts.
1393 if (ShiftImm >= RetVT.getSizeInBits())
1394 return 0;
1395
1396 static const unsigned OpcTable[2][2][2] = {
1397 { { AArch64::SUBWrs, AArch64::SUBXrs },
1398 { AArch64::ADDWrs, AArch64::ADDXrs } },
1399 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1400 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1401 };
1402 bool Is64Bit = RetVT == MVT::i64;
1403 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1404 const TargetRegisterClass *RC =
1405 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1406 unsigned ResultReg;
1407 if (WantResult)
1408 ResultReg = createResultReg(RC);
1409 else
1410 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1411
1412 const MCInstrDesc &II = TII.get(Opcode: Opc);
1413 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1414 RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1);
1415 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1416 .addReg(RegNo: LHSReg)
1417 .addReg(RegNo: RHSReg)
1418 .addImm(Val: getShifterImm(ST: ShiftType, Imm: ShiftImm));
1419 return ResultReg;
1420}
1421
1422unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1423 unsigned RHSReg,
1424 AArch64_AM::ShiftExtendType ExtType,
1425 uint64_t ShiftImm, bool SetFlags,
1426 bool WantResult) {
1427 assert(LHSReg && RHSReg && "Invalid register number.");
1428 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1429 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1430
1431 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1432 return 0;
1433
1434 if (ShiftImm >= 4)
1435 return 0;
1436
1437 static const unsigned OpcTable[2][2][2] = {
1438 { { AArch64::SUBWrx, AArch64::SUBXrx },
1439 { AArch64::ADDWrx, AArch64::ADDXrx } },
1440 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1441 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1442 };
1443 bool Is64Bit = RetVT == MVT::i64;
1444 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1445 const TargetRegisterClass *RC = nullptr;
1446 if (SetFlags)
1447 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1448 else
1449 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1450 unsigned ResultReg;
1451 if (WantResult)
1452 ResultReg = createResultReg(RC);
1453 else
1454 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1455
1456 const MCInstrDesc &II = TII.get(Opcode: Opc);
1457 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1458 RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1);
1459 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1460 .addReg(RegNo: LHSReg)
1461 .addReg(RegNo: RHSReg)
1462 .addImm(Val: getArithExtendImm(ET: ExtType, Imm: ShiftImm));
1463 return ResultReg;
1464}
1465
1466bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1467 Type *Ty = LHS->getType();
1468 EVT EVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
1469 if (!EVT.isSimple())
1470 return false;
1471 MVT VT = EVT.getSimpleVT();
1472
1473 switch (VT.SimpleTy) {
1474 default:
1475 return false;
1476 case MVT::i1:
1477 case MVT::i8:
1478 case MVT::i16:
1479 case MVT::i32:
1480 case MVT::i64:
1481 return emitICmp(RetVT: VT, LHS, RHS, IsZExt);
1482 case MVT::f32:
1483 case MVT::f64:
1484 return emitFCmp(RetVT: VT, LHS, RHS);
1485 }
1486}
1487
1488bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1489 bool IsZExt) {
1490 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1491 IsZExt) != 0;
1492}
1493
1494bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1495 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1496 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1497}
1498
1499bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1500 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1501 return false;
1502
1503 // Check to see if the 2nd operand is a constant that we can encode directly
1504 // in the compare.
1505 bool UseImm = false;
1506 if (const auto *CFP = dyn_cast<ConstantFP>(Val: RHS))
1507 if (CFP->isZero() && !CFP->isNegative())
1508 UseImm = true;
1509
1510 Register LHSReg = getRegForValue(V: LHS);
1511 if (!LHSReg)
1512 return false;
1513
1514 if (UseImm) {
1515 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1516 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
1517 .addReg(RegNo: LHSReg);
1518 return true;
1519 }
1520
1521 Register RHSReg = getRegForValue(V: RHS);
1522 if (!RHSReg)
1523 return false;
1524
1525 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1526 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
1527 .addReg(RegNo: LHSReg)
1528 .addReg(RegNo: RHSReg);
1529 return true;
1530}
1531
1532unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1533 bool SetFlags, bool WantResult, bool IsZExt) {
1534 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1535 IsZExt);
1536}
1537
1538/// This method is a wrapper to simplify add emission.
1539///
1540/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1541/// that fails, then try to materialize the immediate into a register and use
1542/// emitAddSub_rr instead.
1543unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1544 unsigned ResultReg;
1545 if (Imm < 0)
1546 ResultReg = emitAddSub_ri(UseAdd: false, RetVT: VT, LHSReg: Op0, Imm: -Imm);
1547 else
1548 ResultReg = emitAddSub_ri(UseAdd: true, RetVT: VT, LHSReg: Op0, Imm);
1549
1550 if (ResultReg)
1551 return ResultReg;
1552
1553 unsigned CReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm);
1554 if (!CReg)
1555 return 0;
1556
1557 ResultReg = emitAddSub_rr(UseAdd: true, RetVT: VT, LHSReg: Op0, RHSReg: CReg);
1558 return ResultReg;
1559}
1560
1561unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1562 bool SetFlags, bool WantResult, bool IsZExt) {
1563 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1564 IsZExt);
1565}
1566
1567unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1568 unsigned RHSReg, bool WantResult) {
1569 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1570 /*SetFlags=*/true, WantResult);
1571}
1572
1573unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1574 unsigned RHSReg,
1575 AArch64_AM::ShiftExtendType ShiftType,
1576 uint64_t ShiftImm, bool WantResult) {
1577 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1578 ShiftImm, /*SetFlags=*/true, WantResult);
1579}
1580
1581unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1582 const Value *LHS, const Value *RHS) {
1583 // Canonicalize immediates to the RHS first.
1584 if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS))
1585 std::swap(a&: LHS, b&: RHS);
1586
1587 // Canonicalize mul by power-of-2 to the RHS.
1588 if (LHS->hasOneUse() && isValueAvailable(V: LHS))
1589 if (isMulPowOf2(I: LHS))
1590 std::swap(a&: LHS, b&: RHS);
1591
1592 // Canonicalize shift immediate to the RHS.
1593 if (LHS->hasOneUse() && isValueAvailable(V: LHS))
1594 if (const auto *SI = dyn_cast<ShlOperator>(Val: LHS))
1595 if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1)))
1596 std::swap(a&: LHS, b&: RHS);
1597
1598 Register LHSReg = getRegForValue(V: LHS);
1599 if (!LHSReg)
1600 return 0;
1601
1602 unsigned ResultReg = 0;
1603 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) {
1604 uint64_t Imm = C->getZExtValue();
1605 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1606 }
1607 if (ResultReg)
1608 return ResultReg;
1609
1610 // Check if the mul can be folded into the instruction.
1611 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1612 if (isMulPowOf2(I: RHS)) {
1613 const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0);
1614 const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1);
1615
1616 if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS))
1617 if (C->getValue().isPowerOf2())
1618 std::swap(a&: MulLHS, b&: MulRHS);
1619
1620 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1621 uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2();
1622
1623 Register RHSReg = getRegForValue(V: MulLHS);
1624 if (!RHSReg)
1625 return 0;
1626 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal);
1627 if (ResultReg)
1628 return ResultReg;
1629 }
1630 }
1631
1632 // Check if the shift can be folded into the instruction.
1633 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1634 if (const auto *SI = dyn_cast<ShlOperator>(Val: RHS))
1635 if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) {
1636 uint64_t ShiftVal = C->getZExtValue();
1637 Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0));
1638 if (!RHSReg)
1639 return 0;
1640 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal);
1641 if (ResultReg)
1642 return ResultReg;
1643 }
1644 }
1645
1646 Register RHSReg = getRegForValue(V: RHS);
1647 if (!RHSReg)
1648 return 0;
1649
1650 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1651 ResultReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISDOpc, Op0: LHSReg, Op1: RHSReg);
1652 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1653 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1654 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1655 }
1656 return ResultReg;
1657}
1658
1659unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1660 unsigned LHSReg, uint64_t Imm) {
1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662 "ISD nodes are not consecutive!");
1663 static const unsigned OpcTable[3][2] = {
1664 { AArch64::ANDWri, AArch64::ANDXri },
1665 { AArch64::ORRWri, AArch64::ORRXri },
1666 { AArch64::EORWri, AArch64::EORXri }
1667 };
1668 const TargetRegisterClass *RC;
1669 unsigned Opc;
1670 unsigned RegSize;
1671 switch (RetVT.SimpleTy) {
1672 default:
1673 return 0;
1674 case MVT::i1:
1675 case MVT::i8:
1676 case MVT::i16:
1677 case MVT::i32: {
1678 unsigned Idx = ISDOpc - ISD::AND;
1679 Opc = OpcTable[Idx][0];
1680 RC = &AArch64::GPR32spRegClass;
1681 RegSize = 32;
1682 break;
1683 }
1684 case MVT::i64:
1685 Opc = OpcTable[ISDOpc - ISD::AND][1];
1686 RC = &AArch64::GPR64spRegClass;
1687 RegSize = 64;
1688 break;
1689 }
1690
1691 if (!AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize))
1692 return 0;
1693
1694 Register ResultReg =
1695 fastEmitInst_ri(MachineInstOpcode: Opc, RC, Op0: LHSReg,
1696 Imm: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1700 }
1701 return ResultReg;
1702}
1703
1704unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705 unsigned LHSReg, unsigned RHSReg,
1706 uint64_t ShiftImm) {
1707 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708 "ISD nodes are not consecutive!");
1709 static const unsigned OpcTable[3][2] = {
1710 { AArch64::ANDWrs, AArch64::ANDXrs },
1711 { AArch64::ORRWrs, AArch64::ORRXrs },
1712 { AArch64::EORWrs, AArch64::EORXrs }
1713 };
1714
1715 // Don't deal with undefined shifts.
1716 if (ShiftImm >= RetVT.getSizeInBits())
1717 return 0;
1718
1719 const TargetRegisterClass *RC;
1720 unsigned Opc;
1721 switch (RetVT.SimpleTy) {
1722 default:
1723 return 0;
1724 case MVT::i1:
1725 case MVT::i8:
1726 case MVT::i16:
1727 case MVT::i32:
1728 Opc = OpcTable[ISDOpc - ISD::AND][0];
1729 RC = &AArch64::GPR32RegClass;
1730 break;
1731 case MVT::i64:
1732 Opc = OpcTable[ISDOpc - ISD::AND][1];
1733 RC = &AArch64::GPR64RegClass;
1734 break;
1735 }
1736 Register ResultReg =
1737 fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: LHSReg, Op1: RHSReg,
1738 Imm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm));
1739 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1742 }
1743 return ResultReg;
1744}
1745
1746unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1747 uint64_t Imm) {
1748 return emitLogicalOp_ri(ISDOpc: ISD::AND, RetVT, LHSReg, Imm);
1749}
1750
1751unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752 bool WantZExt, MachineMemOperand *MMO) {
1753 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754 return 0;
1755
1756 // Simplify this down to something we can handle.
1757 if (!simplifyAddress(Addr, VT))
1758 return 0;
1759
1760 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761 if (!ScaleFactor)
1762 llvm_unreachable("Unexpected value type.");
1763
1764 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766 bool UseScaled = true;
1767 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768 UseScaled = false;
1769 ScaleFactor = 1;
1770 }
1771
1772 static const unsigned GPOpcTable[2][8][4] = {
1773 // Sign-extend.
1774 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1775 AArch64::LDURXi },
1776 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1777 AArch64::LDURXi },
1778 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1779 AArch64::LDRXui },
1780 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1781 AArch64::LDRXui },
1782 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783 AArch64::LDRXroX },
1784 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785 AArch64::LDRXroX },
1786 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787 AArch64::LDRXroW },
1788 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789 AArch64::LDRXroW }
1790 },
1791 // Zero-extend.
1792 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1793 AArch64::LDURXi },
1794 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1795 AArch64::LDURXi },
1796 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1797 AArch64::LDRXui },
1798 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1799 AArch64::LDRXui },
1800 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1801 AArch64::LDRXroX },
1802 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1803 AArch64::LDRXroX },
1804 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1805 AArch64::LDRXroW },
1806 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1807 AArch64::LDRXroW }
1808 }
1809 };
1810
1811 static const unsigned FPOpcTable[4][2] = {
1812 { AArch64::LDURSi, AArch64::LDURDi },
1813 { AArch64::LDRSui, AArch64::LDRDui },
1814 { AArch64::LDRSroX, AArch64::LDRDroX },
1815 { AArch64::LDRSroW, AArch64::LDRDroW }
1816 };
1817
1818 unsigned Opc;
1819 const TargetRegisterClass *RC;
1820 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821 Addr.getOffsetReg();
1822 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824 Addr.getExtendType() == AArch64_AM::SXTW)
1825 Idx++;
1826
1827 bool IsRet64Bit = RetVT == MVT::i64;
1828 switch (VT.SimpleTy) {
1829 default:
1830 llvm_unreachable("Unexpected value type.");
1831 case MVT::i1: // Intentional fall-through.
1832 case MVT::i8:
1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834 RC = (IsRet64Bit && !WantZExt) ?
1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836 break;
1837 case MVT::i16:
1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839 RC = (IsRet64Bit && !WantZExt) ?
1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841 break;
1842 case MVT::i32:
1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844 RC = (IsRet64Bit && !WantZExt) ?
1845 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846 break;
1847 case MVT::i64:
1848 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849 RC = &AArch64::GPR64RegClass;
1850 break;
1851 case MVT::f32:
1852 Opc = FPOpcTable[Idx][0];
1853 RC = &AArch64::FPR32RegClass;
1854 break;
1855 case MVT::f64:
1856 Opc = FPOpcTable[Idx][1];
1857 RC = &AArch64::FPR64RegClass;
1858 break;
1859 }
1860
1861 // Create the base instruction, then add the operands.
1862 Register ResultReg = createResultReg(RC);
1863 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1864 MCID: TII.get(Opcode: Opc), DestReg: ResultReg);
1865 addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
1867 // Loading an i1 requires special handling.
1868 if (VT == MVT::i1) {
1869 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1870 assert(ANDReg && "Unexpected AND instruction emission failure.");
1871 ResultReg = ANDReg;
1872 }
1873
1874 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875 // the 32bit reg to a 64bit reg.
1876 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1879 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880 .addImm(0)
1881 .addReg(ResultReg, getKillRegState(true))
1882 .addImm(AArch64::sub_32);
1883 ResultReg = Reg64;
1884 }
1885 return ResultReg;
1886}
1887
1888bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889 MVT VT;
1890 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true))
1891 return false;
1892
1893 if (VT.isVector())
1894 return selectOperator(I, Opcode: I->getOpcode());
1895
1896 unsigned ResultReg;
1897 switch (I->getOpcode()) {
1898 default:
1899 llvm_unreachable("Unexpected instruction.");
1900 case Instruction::Add:
1901 ResultReg = emitAdd(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1902 break;
1903 case Instruction::Sub:
1904 ResultReg = emitSub(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1905 break;
1906 }
1907 if (!ResultReg)
1908 return false;
1909
1910 updateValueMap(I, Reg: ResultReg);
1911 return true;
1912}
1913
1914bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915 MVT VT;
1916 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true))
1917 return false;
1918
1919 if (VT.isVector())
1920 return selectOperator(I, Opcode: I->getOpcode());
1921
1922 unsigned ResultReg;
1923 switch (I->getOpcode()) {
1924 default:
1925 llvm_unreachable("Unexpected instruction.");
1926 case Instruction::And:
1927 ResultReg = emitLogicalOp(ISDOpc: ISD::AND, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1928 break;
1929 case Instruction::Or:
1930 ResultReg = emitLogicalOp(ISDOpc: ISD::OR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1931 break;
1932 case Instruction::Xor:
1933 ResultReg = emitLogicalOp(ISDOpc: ISD::XOR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1934 break;
1935 }
1936 if (!ResultReg)
1937 return false;
1938
1939 updateValueMap(I, Reg: ResultReg);
1940 return true;
1941}
1942
1943bool AArch64FastISel::selectLoad(const Instruction *I) {
1944 MVT VT;
1945 // Verify we have a legal type before going any further. Currently, we handle
1946 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949 cast<LoadInst>(Val: I)->isAtomic())
1950 return false;
1951
1952 const Value *SV = I->getOperand(i: 0);
1953 if (TLI.supportSwiftError()) {
1954 // Swifterror values can come from either a function parameter with
1955 // swifterror attribute or an alloca with swifterror attribute.
1956 if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
1957 if (Arg->hasSwiftErrorAttr())
1958 return false;
1959 }
1960
1961 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
1962 if (Alloca->isSwiftError())
1963 return false;
1964 }
1965 }
1966
1967 // See if we can handle this address.
1968 Address Addr;
1969 if (!computeAddress(Obj: I->getOperand(i: 0), Addr, Ty: I->getType()))
1970 return false;
1971
1972 // Fold the following sign-/zero-extend into the load instruction.
1973 bool WantZExt = true;
1974 MVT RetVT = VT;
1975 const Value *IntExtVal = nullptr;
1976 if (I->hasOneUse()) {
1977 if (const auto *ZE = dyn_cast<ZExtInst>(Val: I->use_begin()->getUser())) {
1978 if (isTypeSupported(Ty: ZE->getType(), VT&: RetVT))
1979 IntExtVal = ZE;
1980 else
1981 RetVT = VT;
1982 } else if (const auto *SE = dyn_cast<SExtInst>(Val: I->use_begin()->getUser())) {
1983 if (isTypeSupported(Ty: SE->getType(), VT&: RetVT))
1984 IntExtVal = SE;
1985 else
1986 RetVT = VT;
1987 WantZExt = false;
1988 }
1989 }
1990
1991 unsigned ResultReg =
1992 emitLoad(VT, RetVT, Addr, WantZExt, MMO: createMachineMemOperandFor(I));
1993 if (!ResultReg)
1994 return false;
1995
1996 // There are a few different cases we have to handle, because the load or the
1997 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998 // SelectionDAG. There is also an ordering issue when both instructions are in
1999 // different basic blocks.
2000 // 1.) The load instruction is selected by FastISel, but the integer extend
2001 // not. This usually happens when the integer extend is in a different
2002 // basic block and SelectionDAG took over for that basic block.
2003 // 2.) The load instruction is selected before the integer extend. This only
2004 // happens when the integer extend is in a different basic block.
2005 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006 // by FastISel. This happens if there are instructions between the load
2007 // and the integer extend that couldn't be selected by FastISel.
2008 if (IntExtVal) {
2009 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011 // it when it selects the integer extend.
2012 Register Reg = lookUpRegForValue(V: IntExtVal);
2013 auto *MI = MRI.getUniqueVRegDef(Reg);
2014 if (!MI) {
2015 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016 if (WantZExt) {
2017 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018 MachineBasicBlock::iterator I(std::prev(x: FuncInfo.InsertPt));
2019 ResultReg = std::prev(x: I)->getOperand(i: 0).getReg();
2020 removeDeadCode(I, E: std::next(x: I));
2021 } else
2022 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023 AArch64::sub_32);
2024 }
2025 updateValueMap(I, Reg: ResultReg);
2026 return true;
2027 }
2028
2029 // The integer extend has already been emitted - delete all the instructions
2030 // that have been emitted by the integer extend lowering code and use the
2031 // result from the load instruction directly.
2032 while (MI) {
2033 Reg = 0;
2034 for (auto &Opnd : MI->uses()) {
2035 if (Opnd.isReg()) {
2036 Reg = Opnd.getReg();
2037 break;
2038 }
2039 }
2040 MachineBasicBlock::iterator I(MI);
2041 removeDeadCode(I, E: std::next(x: I));
2042 MI = nullptr;
2043 if (Reg)
2044 MI = MRI.getUniqueVRegDef(Reg);
2045 }
2046 updateValueMap(I: IntExtVal, Reg: ResultReg);
2047 return true;
2048 }
2049
2050 updateValueMap(I, Reg: ResultReg);
2051 return true;
2052}
2053
2054bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055 unsigned AddrReg,
2056 MachineMemOperand *MMO) {
2057 unsigned Opc;
2058 switch (VT.SimpleTy) {
2059 default: return false;
2060 case MVT::i8: Opc = AArch64::STLRB; break;
2061 case MVT::i16: Opc = AArch64::STLRH; break;
2062 case MVT::i32: Opc = AArch64::STLRW; break;
2063 case MVT::i64: Opc = AArch64::STLRX; break;
2064 }
2065
2066 const MCInstrDesc &II = TII.get(Opcode: Opc);
2067 SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: 0);
2068 AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: 1);
2069 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
2070 .addReg(RegNo: SrcReg)
2071 .addReg(RegNo: AddrReg)
2072 .addMemOperand(MMO);
2073 return true;
2074}
2075
2076bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077 MachineMemOperand *MMO) {
2078 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079 return false;
2080
2081 // Simplify this down to something we can handle.
2082 if (!simplifyAddress(Addr, VT))
2083 return false;
2084
2085 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086 if (!ScaleFactor)
2087 llvm_unreachable("Unexpected value type.");
2088
2089 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091 bool UseScaled = true;
2092 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093 UseScaled = false;
2094 ScaleFactor = 1;
2095 }
2096
2097 static const unsigned OpcTable[4][6] = {
2098 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099 AArch64::STURSi, AArch64::STURDi },
2100 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101 AArch64::STRSui, AArch64::STRDui },
2102 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103 AArch64::STRSroX, AArch64::STRDroX },
2104 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105 AArch64::STRSroW, AArch64::STRDroW }
2106 };
2107
2108 unsigned Opc;
2109 bool VTIsi1 = false;
2110 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111 Addr.getOffsetReg();
2112 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114 Addr.getExtendType() == AArch64_AM::SXTW)
2115 Idx++;
2116
2117 switch (VT.SimpleTy) {
2118 default: llvm_unreachable("Unexpected value type.");
2119 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2120 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126 }
2127
2128 // Storing an i1 requires special handling.
2129 if (VTIsi1 && SrcReg != AArch64::WZR) {
2130 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2131 assert(ANDReg && "Unexpected AND instruction emission failure.");
2132 SrcReg = ANDReg;
2133 }
2134 // Create the base instruction, then add the operands.
2135 const MCInstrDesc &II = TII.get(Opcode: Opc);
2136 SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs());
2137 MachineInstrBuilder MIB =
2138 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: SrcReg);
2139 addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141 return true;
2142}
2143
2144bool AArch64FastISel::selectStore(const Instruction *I) {
2145 MVT VT;
2146 const Value *Op0 = I->getOperand(i: 0);
2147 // Verify we have a legal type before going any further. Currently, we handle
2148 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150 if (!isTypeSupported(Ty: Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151 return false;
2152
2153 const Value *PtrV = I->getOperand(i: 1);
2154 if (TLI.supportSwiftError()) {
2155 // Swifterror values can come from either a function parameter with
2156 // swifterror attribute or an alloca with swifterror attribute.
2157 if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
2158 if (Arg->hasSwiftErrorAttr())
2159 return false;
2160 }
2161
2162 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
2163 if (Alloca->isSwiftError())
2164 return false;
2165 }
2166 }
2167
2168 // Get the value to be stored into a register. Use the zero register directly
2169 // when possible to avoid an unnecessary copy and a wasted register.
2170 unsigned SrcReg = 0;
2171 if (const auto *CI = dyn_cast<ConstantInt>(Val: Op0)) {
2172 if (CI->isZero())
2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 } else if (const auto *CF = dyn_cast<ConstantFP>(Val: Op0)) {
2175 if (CF->isZero() && !CF->isNegative()) {
2176 VT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
2177 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178 }
2179 }
2180
2181 if (!SrcReg)
2182 SrcReg = getRegForValue(V: Op0);
2183
2184 if (!SrcReg)
2185 return false;
2186
2187 auto *SI = cast<StoreInst>(Val: I);
2188
2189 // Try to emit a STLR for seq_cst/release.
2190 if (SI->isAtomic()) {
2191 AtomicOrdering Ord = SI->getOrdering();
2192 // The non-atomic instructions are sufficient for relaxed stores.
2193 if (isReleaseOrStronger(AO: Ord)) {
2194 // The STLR addressing mode only supports a base reg; pass that directly.
2195 Register AddrReg = getRegForValue(V: PtrV);
2196 return emitStoreRelease(VT, SrcReg, AddrReg,
2197 MMO: createMachineMemOperandFor(I));
2198 }
2199 }
2200
2201 // See if we can handle this address.
2202 Address Addr;
2203 if (!computeAddress(Obj: PtrV, Addr, Ty: Op0->getType()))
2204 return false;
2205
2206 if (!emitStore(VT, SrcReg, Addr, MMO: createMachineMemOperandFor(I)))
2207 return false;
2208 return true;
2209}
2210
2211static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2212 switch (Pred) {
2213 case CmpInst::FCMP_ONE:
2214 case CmpInst::FCMP_UEQ:
2215 default:
2216 // AL is our "false" for now. The other two need more compares.
2217 return AArch64CC::AL;
2218 case CmpInst::ICMP_EQ:
2219 case CmpInst::FCMP_OEQ:
2220 return AArch64CC::EQ;
2221 case CmpInst::ICMP_SGT:
2222 case CmpInst::FCMP_OGT:
2223 return AArch64CC::GT;
2224 case CmpInst::ICMP_SGE:
2225 case CmpInst::FCMP_OGE:
2226 return AArch64CC::GE;
2227 case CmpInst::ICMP_UGT:
2228 case CmpInst::FCMP_UGT:
2229 return AArch64CC::HI;
2230 case CmpInst::FCMP_OLT:
2231 return AArch64CC::MI;
2232 case CmpInst::ICMP_ULE:
2233 case CmpInst::FCMP_OLE:
2234 return AArch64CC::LS;
2235 case CmpInst::FCMP_ORD:
2236 return AArch64CC::VC;
2237 case CmpInst::FCMP_UNO:
2238 return AArch64CC::VS;
2239 case CmpInst::FCMP_UGE:
2240 return AArch64CC::PL;
2241 case CmpInst::ICMP_SLT:
2242 case CmpInst::FCMP_ULT:
2243 return AArch64CC::LT;
2244 case CmpInst::ICMP_SLE:
2245 case CmpInst::FCMP_ULE:
2246 return AArch64CC::LE;
2247 case CmpInst::FCMP_UNE:
2248 case CmpInst::ICMP_NE:
2249 return AArch64CC::NE;
2250 case CmpInst::ICMP_UGE:
2251 return AArch64CC::HS;
2252 case CmpInst::ICMP_ULT:
2253 return AArch64CC::LO;
2254 }
2255}
2256
2257/// Try to emit a combined compare-and-branch instruction.
2258bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2260 // will not be produced, as they are conditional branch instructions that do
2261 // not set flags.
2262 if (FuncInfo.MF->getFunction().hasFnAttribute(
2263 Attribute::SpeculativeLoadHardening))
2264 return false;
2265
2266 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2267 const CmpInst *CI = cast<CmpInst>(Val: BI->getCondition());
2268 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2269
2270 const Value *LHS = CI->getOperand(i_nocapture: 0);
2271 const Value *RHS = CI->getOperand(i_nocapture: 1);
2272
2273 MVT VT;
2274 if (!isTypeSupported(Ty: LHS->getType(), VT))
2275 return false;
2276
2277 unsigned BW = VT.getSizeInBits();
2278 if (BW > 64)
2279 return false;
2280
2281 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(i: 0)];
2282 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(i: 1)];
2283
2284 // Try to take advantage of fallthrough opportunities.
2285 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2286 std::swap(a&: TBB, b&: FBB);
2287 Predicate = CmpInst::getInversePredicate(pred: Predicate);
2288 }
2289
2290 int TestBit = -1;
2291 bool IsCmpNE;
2292 switch (Predicate) {
2293 default:
2294 return false;
2295 case CmpInst::ICMP_EQ:
2296 case CmpInst::ICMP_NE:
2297 if (isa<Constant>(Val: LHS) && cast<Constant>(Val: LHS)->isNullValue())
2298 std::swap(a&: LHS, b&: RHS);
2299
2300 if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue())
2301 return false;
2302
2303 if (const auto *AI = dyn_cast<BinaryOperator>(Val: LHS))
2304 if (AI->getOpcode() == Instruction::And && isValueAvailable(V: AI)) {
2305 const Value *AndLHS = AI->getOperand(i_nocapture: 0);
2306 const Value *AndRHS = AI->getOperand(i_nocapture: 1);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(Val: AndLHS))
2309 if (C->getValue().isPowerOf2())
2310 std::swap(a&: AndLHS, b&: AndRHS);
2311
2312 if (const auto *C = dyn_cast<ConstantInt>(Val: AndRHS))
2313 if (C->getValue().isPowerOf2()) {
2314 TestBit = C->getValue().logBase2();
2315 LHS = AndLHS;
2316 }
2317 }
2318
2319 if (VT == MVT::i1)
2320 TestBit = 0;
2321
2322 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2323 break;
2324 case CmpInst::ICMP_SLT:
2325 case CmpInst::ICMP_SGE:
2326 if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue())
2327 return false;
2328
2329 TestBit = BW - 1;
2330 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2331 break;
2332 case CmpInst::ICMP_SGT:
2333 case CmpInst::ICMP_SLE:
2334 if (!isa<ConstantInt>(Val: RHS))
2335 return false;
2336
2337 if (cast<ConstantInt>(Val: RHS)->getValue() != APInt(BW, -1, true))
2338 return false;
2339
2340 TestBit = BW - 1;
2341 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2342 break;
2343 } // end switch
2344
2345 static const unsigned OpcTable[2][2][2] = {
2346 { {AArch64::CBZW, AArch64::CBZX },
2347 {AArch64::CBNZW, AArch64::CBNZX} },
2348 { {AArch64::TBZW, AArch64::TBZX },
2349 {AArch64::TBNZW, AArch64::TBNZX} }
2350 };
2351
2352 bool IsBitTest = TestBit != -1;
2353 bool Is64Bit = BW == 64;
2354 if (TestBit < 32 && TestBit >= 0)
2355 Is64Bit = false;
2356
2357 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2358 const MCInstrDesc &II = TII.get(Opcode: Opc);
2359
2360 Register SrcReg = getRegForValue(V: LHS);
2361 if (!SrcReg)
2362 return false;
2363
2364 if (BW == 64 && !Is64Bit)
2365 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2366
2367 if ((BW < 32) && !IsBitTest)
2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369
2370 // Emit the combined compare and branch instruction.
2371 SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs());
2372 MachineInstrBuilder MIB =
2373 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
2374 .addReg(RegNo: SrcReg);
2375 if (IsBitTest)
2376 MIB.addImm(Val: TestBit);
2377 MIB.addMBB(MBB: TBB);
2378
2379 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2380 return true;
2381}
2382
2383bool AArch64FastISel::selectBranch(const Instruction *I) {
2384 const BranchInst *BI = cast<BranchInst>(Val: I);
2385 if (BI->isUnconditional()) {
2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(i: 0)];
2387 fastEmitBranch(MSucc, DbgLoc: BI->getDebugLoc());
2388 return true;
2389 }
2390
2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(i: 0)];
2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(i: 1)];
2393
2394 if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
2395 if (CI->hasOneUse() && isValueAvailable(V: CI)) {
2396 // Try to optimize or fold the cmp.
2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398 switch (Predicate) {
2399 default:
2400 break;
2401 case CmpInst::FCMP_FALSE:
2402 fastEmitBranch(MSucc: FBB, DbgLoc: MIMD.getDL());
2403 return true;
2404 case CmpInst::FCMP_TRUE:
2405 fastEmitBranch(MSucc: TBB, DbgLoc: MIMD.getDL());
2406 return true;
2407 }
2408
2409 // Try to emit a combined compare-and-branch first.
2410 if (emitCompareAndBranch(BI))
2411 return true;
2412
2413 // Try to take advantage of fallthrough opportunities.
2414 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2415 std::swap(a&: TBB, b&: FBB);
2416 Predicate = CmpInst::getInversePredicate(pred: Predicate);
2417 }
2418
2419 // Emit the cmp.
2420 if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned()))
2421 return false;
2422
2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424 // instruction.
2425 AArch64CC::CondCode CC = getCompareCC(Pred: Predicate);
2426 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2427 switch (Predicate) {
2428 default:
2429 break;
2430 case CmpInst::FCMP_UEQ:
2431 ExtraCC = AArch64CC::EQ;
2432 CC = AArch64CC::VS;
2433 break;
2434 case CmpInst::FCMP_ONE:
2435 ExtraCC = AArch64CC::MI;
2436 CC = AArch64CC::GT;
2437 break;
2438 }
2439 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440
2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442 if (ExtraCC != AArch64CC::AL) {
2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2444 .addImm(ExtraCC)
2445 .addMBB(TBB);
2446 }
2447
2448 // Emit the branch.
2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450 .addImm(CC)
2451 .addMBB(TBB);
2452
2453 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2454 return true;
2455 }
2456 } else if (const auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
2457 uint64_t Imm = CI->getZExtValue();
2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2460 .addMBB(Target);
2461
2462 // Obtain the branch probability and add the target to the successor list.
2463 if (FuncInfo.BPI) {
2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465 Src: BI->getParent(), Dst: Target->getBasicBlock());
2466 FuncInfo.MBB->addSuccessor(Succ: Target, Prob: BranchProbability);
2467 } else
2468 FuncInfo.MBB->addSuccessorWithoutProb(Succ: Target);
2469 return true;
2470 } else {
2471 AArch64CC::CondCode CC = AArch64CC::NE;
2472 if (foldXALUIntrinsic(CC, I, Cond: BI->getCondition())) {
2473 // Fake request the condition, otherwise the intrinsic might be completely
2474 // optimized away.
2475 Register CondReg = getRegForValue(V: BI->getCondition());
2476 if (!CondReg)
2477 return false;
2478
2479 // Emit the branch.
2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2481 .addImm(CC)
2482 .addMBB(TBB);
2483
2484 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2485 return true;
2486 }
2487 }
2488
2489 Register CondReg = getRegForValue(V: BI->getCondition());
2490 if (CondReg == 0)
2491 return false;
2492
2493 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2494 unsigned Opcode = AArch64::TBNZW;
2495 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2496 std::swap(a&: TBB, b&: FBB);
2497 Opcode = AArch64::TBZW;
2498 }
2499
2500 const MCInstrDesc &II = TII.get(Opcode);
2501 Register ConstrainedCondReg
2502 = constrainOperandRegClass(II, Op: CondReg, OpNum: II.getNumDefs());
2503 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
2504 .addReg(RegNo: ConstrainedCondReg)
2505 .addImm(Val: 0)
2506 .addMBB(MBB: TBB);
2507
2508 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2509 return true;
2510}
2511
2512bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2513 const IndirectBrInst *BI = cast<IndirectBrInst>(Val: I);
2514 Register AddrReg = getRegForValue(V: BI->getOperand(i_nocapture: 0));
2515 if (AddrReg == 0)
2516 return false;
2517
2518 // Emit the indirect branch.
2519 const MCInstrDesc &II = TII.get(AArch64::BR);
2520 AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs());
2521 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: AddrReg);
2522
2523 // Make sure the CFG is up-to-date.
2524 for (const auto *Succ : BI->successors())
2525 FuncInfo.MBB->addSuccessor(Succ: FuncInfo.MBBMap[Succ]);
2526
2527 return true;
2528}
2529
2530bool AArch64FastISel::selectCmp(const Instruction *I) {
2531 const CmpInst *CI = cast<CmpInst>(Val: I);
2532
2533 // Vectors of i1 are weird: bail out.
2534 if (CI->getType()->isVectorTy())
2535 return false;
2536
2537 // Try to optimize or fold the cmp.
2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539 unsigned ResultReg = 0;
2540 switch (Predicate) {
2541 default:
2542 break;
2543 case CmpInst::FCMP_FALSE:
2544 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546 TII.get(TargetOpcode::COPY), ResultReg)
2547 .addReg(AArch64::WZR, getKillRegState(true));
2548 break;
2549 case CmpInst::FCMP_TRUE:
2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551 break;
2552 }
2553
2554 if (ResultReg) {
2555 updateValueMap(I, Reg: ResultReg);
2556 return true;
2557 }
2558
2559 // Emit the cmp.
2560 if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned()))
2561 return false;
2562
2563 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566 // condition codes are inverted, because they are used by CSINC.
2567 static unsigned CondCodeTable[2][2] = {
2568 { AArch64CC::NE, AArch64CC::VC },
2569 { AArch64CC::PL, AArch64CC::LE }
2570 };
2571 unsigned *CondCodes = nullptr;
2572 switch (Predicate) {
2573 default:
2574 break;
2575 case CmpInst::FCMP_UEQ:
2576 CondCodes = &CondCodeTable[0][0];
2577 break;
2578 case CmpInst::FCMP_ONE:
2579 CondCodes = &CondCodeTable[1][0];
2580 break;
2581 }
2582
2583 if (CondCodes) {
2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 TmpReg1)
2587 .addReg(AArch64::WZR, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[0]);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 ResultReg)
2592 .addReg(TmpReg1, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[1]);
2595
2596 updateValueMap(I, Reg: ResultReg);
2597 return true;
2598 }
2599
2600 // Now set a register based on the comparison.
2601 AArch64CC::CondCode CC = getCompareCC(Pred: Predicate);
2602 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(Code: CC);
2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605 ResultReg)
2606 .addReg(AArch64::WZR, getKillRegState(true))
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addImm(invertedCC);
2609
2610 updateValueMap(I, Reg: ResultReg);
2611 return true;
2612}
2613
2614/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615/// value.
2616bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617 if (!SI->getType()->isIntegerTy(Bitwidth: 1))
2618 return false;
2619
2620 const Value *Src1Val, *Src2Val;
2621 unsigned Opc = 0;
2622 bool NeedExtraOp = false;
2623 if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getTrueValue())) {
2624 if (CI->isOne()) {
2625 Src1Val = SI->getCondition();
2626 Src2Val = SI->getFalseValue();
2627 Opc = AArch64::ORRWrr;
2628 } else {
2629 assert(CI->isZero());
2630 Src1Val = SI->getFalseValue();
2631 Src2Val = SI->getCondition();
2632 Opc = AArch64::BICWrr;
2633 }
2634 } else if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getFalseValue())) {
2635 if (CI->isOne()) {
2636 Src1Val = SI->getCondition();
2637 Src2Val = SI->getTrueValue();
2638 Opc = AArch64::ORRWrr;
2639 NeedExtraOp = true;
2640 } else {
2641 assert(CI->isZero());
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ANDWrr;
2645 }
2646 }
2647
2648 if (!Opc)
2649 return false;
2650
2651 Register Src1Reg = getRegForValue(V: Src1Val);
2652 if (!Src1Reg)
2653 return false;
2654
2655 Register Src2Reg = getRegForValue(V: Src2Val);
2656 if (!Src2Reg)
2657 return false;
2658
2659 if (NeedExtraOp)
2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661
2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663 Src2Reg);
2664 updateValueMap(I: SI, Reg: ResultReg);
2665 return true;
2666}
2667
2668bool AArch64FastISel::selectSelect(const Instruction *I) {
2669 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670 MVT VT;
2671 if (!isTypeSupported(Ty: I->getType(), VT))
2672 return false;
2673
2674 unsigned Opc;
2675 const TargetRegisterClass *RC;
2676 switch (VT.SimpleTy) {
2677 default:
2678 return false;
2679 case MVT::i1:
2680 case MVT::i8:
2681 case MVT::i16:
2682 case MVT::i32:
2683 Opc = AArch64::CSELWr;
2684 RC = &AArch64::GPR32RegClass;
2685 break;
2686 case MVT::i64:
2687 Opc = AArch64::CSELXr;
2688 RC = &AArch64::GPR64RegClass;
2689 break;
2690 case MVT::f32:
2691 Opc = AArch64::FCSELSrrr;
2692 RC = &AArch64::FPR32RegClass;
2693 break;
2694 case MVT::f64:
2695 Opc = AArch64::FCSELDrrr;
2696 RC = &AArch64::FPR64RegClass;
2697 break;
2698 }
2699
2700 const SelectInst *SI = cast<SelectInst>(Val: I);
2701 const Value *Cond = SI->getCondition();
2702 AArch64CC::CondCode CC = AArch64CC::NE;
2703 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2704
2705 if (optimizeSelect(SI))
2706 return true;
2707
2708 // Try to pickup the flags, so we don't have to emit another compare.
2709 if (foldXALUIntrinsic(CC, I, Cond)) {
2710 // Fake request the condition to force emission of the XALU intrinsic.
2711 Register CondReg = getRegForValue(V: Cond);
2712 if (!CondReg)
2713 return false;
2714 } else if (isa<CmpInst>(Val: Cond) && cast<CmpInst>(Val: Cond)->hasOneUse() &&
2715 isValueAvailable(V: Cond)) {
2716 const auto *Cmp = cast<CmpInst>(Val: Cond);
2717 // Try to optimize or fold the cmp.
2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI: Cmp);
2719 const Value *FoldSelect = nullptr;
2720 switch (Predicate) {
2721 default:
2722 break;
2723 case CmpInst::FCMP_FALSE:
2724 FoldSelect = SI->getFalseValue();
2725 break;
2726 case CmpInst::FCMP_TRUE:
2727 FoldSelect = SI->getTrueValue();
2728 break;
2729 }
2730
2731 if (FoldSelect) {
2732 Register SrcReg = getRegForValue(V: FoldSelect);
2733 if (!SrcReg)
2734 return false;
2735
2736 updateValueMap(I, Reg: SrcReg);
2737 return true;
2738 }
2739
2740 // Emit the cmp.
2741 if (!emitCmp(LHS: Cmp->getOperand(i_nocapture: 0), RHS: Cmp->getOperand(i_nocapture: 1), IsZExt: Cmp->isUnsigned()))
2742 return false;
2743
2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745 CC = getCompareCC(Pred: Predicate);
2746 switch (Predicate) {
2747 default:
2748 break;
2749 case CmpInst::FCMP_UEQ:
2750 ExtraCC = AArch64CC::EQ;
2751 CC = AArch64CC::VS;
2752 break;
2753 case CmpInst::FCMP_ONE:
2754 ExtraCC = AArch64CC::MI;
2755 CC = AArch64CC::GT;
2756 break;
2757 }
2758 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759 } else {
2760 Register CondReg = getRegForValue(V: Cond);
2761 if (!CondReg)
2762 return false;
2763
2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765 CondReg = constrainOperandRegClass(II, Op: CondReg, OpNum: 1);
2766
2767 // Emit a TST instruction (ANDS wzr, reg, #imm).
2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769 AArch64::WZR)
2770 .addReg(CondReg)
2771 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2772 }
2773
2774 Register Src1Reg = getRegForValue(V: SI->getTrueValue());
2775 Register Src2Reg = getRegForValue(V: SI->getFalseValue());
2776
2777 if (!Src1Reg || !Src2Reg)
2778 return false;
2779
2780 if (ExtraCC != AArch64CC::AL)
2781 Src2Reg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: ExtraCC);
2782
2783 Register ResultReg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: CC);
2784 updateValueMap(I, Reg: ResultReg);
2785 return true;
2786}
2787
2788bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789 Value *V = I->getOperand(i: 0);
2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791 return false;
2792
2793 Register Op = getRegForValue(V);
2794 if (Op == 0)
2795 return false;
2796
2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799 ResultReg).addReg(Op);
2800 updateValueMap(I, Reg: ResultReg);
2801 return true;
2802}
2803
2804bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805 Value *V = I->getOperand(i: 0);
2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807 return false;
2808
2809 Register Op = getRegForValue(V);
2810 if (Op == 0)
2811 return false;
2812
2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815 ResultReg).addReg(Op);
2816 updateValueMap(I, Reg: ResultReg);
2817 return true;
2818}
2819
2820// FPToUI and FPToSI
2821bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822 MVT DestVT;
2823 if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector())
2824 return false;
2825
2826 Register SrcReg = getRegForValue(V: I->getOperand(i: 0));
2827 if (SrcReg == 0)
2828 return false;
2829
2830 EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true);
2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2832 return false;
2833
2834 unsigned Opc;
2835 if (SrcVT == MVT::f64) {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840 } else {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845 }
2846 Register ResultReg = createResultReg(
2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
2849 .addReg(RegNo: SrcReg);
2850 updateValueMap(I, Reg: ResultReg);
2851 return true;
2852}
2853
2854bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855 MVT DestVT;
2856 if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector())
2857 return false;
2858 // Let regular ISEL handle FP16
2859 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2860 return false;
2861
2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863 "Unexpected value type.");
2864
2865 Register SrcReg = getRegForValue(V: I->getOperand(i: 0));
2866 if (!SrcReg)
2867 return false;
2868
2869 EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true);
2870
2871 // Handle sign-extension.
2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873 SrcReg =
2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875 if (!SrcReg)
2876 return false;
2877 }
2878
2879 unsigned Opc;
2880 if (SrcVT == MVT::i64) {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885 } else {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890 }
2891
2892 Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT: DestVT), Op0: SrcReg);
2893 updateValueMap(I, Reg: ResultReg);
2894 return true;
2895}
2896
2897bool AArch64FastISel::fastLowerArguments() {
2898 if (!FuncInfo.CanLowerReturn)
2899 return false;
2900
2901 const Function *F = FuncInfo.Fn;
2902 if (F->isVarArg())
2903 return false;
2904
2905 CallingConv::ID CC = F->getCallingConv();
2906 if (CC != CallingConv::C && CC != CallingConv::Swift)
2907 return false;
2908
2909 if (Subtarget->hasCustomCallingConv())
2910 return false;
2911
2912 // Only handle simple cases of up to 8 GPR and FPR each.
2913 unsigned GPRCnt = 0;
2914 unsigned FPRCnt = 0;
2915 for (auto const &Arg : F->args()) {
2916 if (Arg.hasAttribute(Attribute::ByVal) ||
2917 Arg.hasAttribute(Attribute::InReg) ||
2918 Arg.hasAttribute(Attribute::StructRet) ||
2919 Arg.hasAttribute(Attribute::SwiftSelf) ||
2920 Arg.hasAttribute(Attribute::SwiftAsync) ||
2921 Arg.hasAttribute(Attribute::SwiftError) ||
2922 Arg.hasAttribute(Attribute::Nest))
2923 return false;
2924
2925 Type *ArgTy = Arg.getType();
2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927 return false;
2928
2929 EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy);
2930 if (!ArgVT.isSimple())
2931 return false;
2932
2933 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935 return false;
2936
2937 if (VT.isVector() &&
2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939 return false;
2940
2941 if (VT >= MVT::i1 && VT <= MVT::i64)
2942 ++GPRCnt;
2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944 VT.is128BitVector())
2945 ++FPRCnt;
2946 else
2947 return false;
2948
2949 if (GPRCnt > 8 || FPRCnt > 8)
2950 return false;
2951 }
2952
2953 static const MCPhysReg Registers[6][8] = {
2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955 AArch64::W5, AArch64::W6, AArch64::W7 },
2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957 AArch64::X5, AArch64::X6, AArch64::X7 },
2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959 AArch64::H5, AArch64::H6, AArch64::H7 },
2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961 AArch64::S5, AArch64::S6, AArch64::S7 },
2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963 AArch64::D5, AArch64::D6, AArch64::D7 },
2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966 };
2967
2968 unsigned GPRIdx = 0;
2969 unsigned FPRIdx = 0;
2970 for (auto const &Arg : F->args()) {
2971 MVT VT = TLI.getSimpleValueType(DL, Ty: Arg.getType());
2972 unsigned SrcReg;
2973 const TargetRegisterClass *RC;
2974 if (VT >= MVT::i1 && VT <= MVT::i32) {
2975 SrcReg = Registers[0][GPRIdx++];
2976 RC = &AArch64::GPR32RegClass;
2977 VT = MVT::i32;
2978 } else if (VT == MVT::i64) {
2979 SrcReg = Registers[1][GPRIdx++];
2980 RC = &AArch64::GPR64RegClass;
2981 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2982 SrcReg = Registers[2][FPRIdx++];
2983 RC = &AArch64::FPR16RegClass;
2984 } else if (VT == MVT::f32) {
2985 SrcReg = Registers[3][FPRIdx++];
2986 RC = &AArch64::FPR32RegClass;
2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988 SrcReg = Registers[4][FPRIdx++];
2989 RC = &AArch64::FPR64RegClass;
2990 } else if (VT.is128BitVector()) {
2991 SrcReg = Registers[5][FPRIdx++];
2992 RC = &AArch64::FPR128RegClass;
2993 } else
2994 llvm_unreachable("Unexpected value type.");
2995
2996 Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC);
2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998 // Without this, EmitLiveInCopies may eliminate the livein if its only
2999 // use is a bitcast (which isn't turned into an instruction).
3000 Register ResultReg = createResultReg(RC);
3001 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3002 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
3003 .addReg(RegNo: DstReg, flags: getKillRegState(B: true));
3004 updateValueMap(I: &Arg, Reg: ResultReg);
3005 }
3006 return true;
3007}
3008
3009bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010 SmallVectorImpl<MVT> &OutVTs,
3011 unsigned &NumBytes) {
3012 CallingConv::ID CC = CLI.CallConv;
3013 SmallVector<CCValAssign, 16> ArgLocs;
3014 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3015 CCInfo.AnalyzeCallOperands(ArgVTs&: OutVTs, Flags&: CLI.OutFlags, Fn: CCAssignFnForCall(CC));
3016
3017 // Get a count of how many bytes are to be pushed on the stack.
3018 NumBytes = CCInfo.getStackSize();
3019
3020 // Issue CALLSEQ_START
3021 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3022 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackDown))
3023 .addImm(Val: NumBytes).addImm(Val: 0);
3024
3025 // Process the args.
3026 for (CCValAssign &VA : ArgLocs) {
3027 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3028 MVT ArgVT = OutVTs[VA.getValNo()];
3029
3030 Register ArgReg = getRegForValue(V: ArgVal);
3031 if (!ArgReg)
3032 return false;
3033
3034 // Handle arg promotion: SExt, ZExt, AExt.
3035 switch (VA.getLocInfo()) {
3036 case CCValAssign::Full:
3037 break;
3038 case CCValAssign::SExt: {
3039 MVT DestVT = VA.getLocVT();
3040 MVT SrcVT = ArgVT;
3041 ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/false);
3042 if (!ArgReg)
3043 return false;
3044 break;
3045 }
3046 case CCValAssign::AExt:
3047 // Intentional fall-through.
3048 case CCValAssign::ZExt: {
3049 MVT DestVT = VA.getLocVT();
3050 MVT SrcVT = ArgVT;
3051 ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/true);
3052 if (!ArgReg)
3053 return false;
3054 break;
3055 }
3056 default:
3057 llvm_unreachable("Unknown arg promotion!");
3058 }
3059
3060 // Now copy/store arg to correct locations.
3061 if (VA.isRegLoc() && !VA.needsCustom()) {
3062 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3063 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: ArgReg);
3064 CLI.OutRegs.push_back(Elt: VA.getLocReg());
3065 } else if (VA.needsCustom()) {
3066 // FIXME: Handle custom args.
3067 return false;
3068 } else {
3069 assert(VA.isMemLoc() && "Assuming store on stack.");
3070
3071 // Don't emit stores for undef values.
3072 if (isa<UndefValue>(Val: ArgVal))
3073 continue;
3074
3075 // Need to store on the stack.
3076 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3077
3078 unsigned BEAlign = 0;
3079 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3080 BEAlign = 8 - ArgSize;
3081
3082 Address Addr;
3083 Addr.setKind(Address::RegBase);
3084 Addr.setReg(AArch64::SP);
3085 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3086
3087 Align Alignment = DL.getABITypeAlign(Ty: ArgVal->getType());
3088 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3089 PtrInfo: MachinePointerInfo::getStack(MF&: *FuncInfo.MF, Offset: Addr.getOffset()),
3090 F: MachineMemOperand::MOStore, Size: ArgVT.getStoreSize(), BaseAlignment: Alignment);
3091
3092 if (!emitStore(VT: ArgVT, SrcReg: ArgReg, Addr, MMO))
3093 return false;
3094 }
3095 }
3096 return true;
3097}
3098
3099bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3100 CallingConv::ID CC = CLI.CallConv;
3101
3102 // Issue CALLSEQ_END
3103 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3104 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackUp))
3105 .addImm(Val: NumBytes).addImm(Val: 0);
3106
3107 // Now the return values.
3108 SmallVector<CCValAssign, 16> RVLocs;
3109 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3110 CCInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: CCAssignFnForCall(CC));
3111
3112 Register ResultReg = FuncInfo.CreateRegs(Ty: CLI.RetTy);
3113 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3114 CCValAssign &VA = RVLocs[i];
3115 MVT CopyVT = VA.getValVT();
3116 unsigned CopyReg = ResultReg + i;
3117
3118 // TODO: Handle big-endian results
3119 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3120 return false;
3121
3122 // Copy result out of their specified physreg.
3123 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY),
3124 DestReg: CopyReg)
3125 .addReg(RegNo: VA.getLocReg());
3126 CLI.InRegs.push_back(Elt: VA.getLocReg());
3127 }
3128
3129 CLI.ResultReg = ResultReg;
3130 CLI.NumResultRegs = RVLocs.size();
3131
3132 return true;
3133}
3134
3135bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3136 CallingConv::ID CC = CLI.CallConv;
3137 bool IsTailCall = CLI.IsTailCall;
3138 bool IsVarArg = CLI.IsVarArg;
3139 const Value *Callee = CLI.Callee;
3140 MCSymbol *Symbol = CLI.Symbol;
3141
3142 if (!Callee && !Symbol)
3143 return false;
3144
3145 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3146 // a bti instruction following the call.
3147 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3148 !Subtarget->noBTIAtReturnTwice() &&
3149 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3150 return false;
3151
3152 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3153 if (CLI.CB && CLI.CB->isIndirectCall() &&
3154 CLI.CB->getOperandBundle(ID: LLVMContext::OB_kcfi))
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle tail calls.
3158 if (IsTailCall)
3159 return false;
3160
3161 // FIXME: we could and should support this, but for now correctness at -O0 is
3162 // more important.
3163 if (Subtarget->isTargetILP32())
3164 return false;
3165
3166 CodeModel::Model CM = TM.getCodeModel();
3167 // Only support the small-addressing and large code models.
3168 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3169 return false;
3170
3171 // FIXME: Add large code model support for ELF.
3172 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3173 return false;
3174
3175 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3176 // attribute. Check "RtLibUseGOT" instead.
3177 if (MF->getFunction().getParent()->getRtLibUseGOT())
3178 return false;
3179
3180 // Let SDISel handle vararg functions.
3181 if (IsVarArg)
3182 return false;
3183
3184 if (Subtarget->isWindowsArm64EC())
3185 return false;
3186
3187 for (auto Flag : CLI.OutFlags)
3188 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3189 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3190 return false;
3191
3192 // Set up the argument vectors.
3193 SmallVector<MVT, 16> OutVTs;
3194 OutVTs.reserve(N: CLI.OutVals.size());
3195
3196 for (auto *Val : CLI.OutVals) {
3197 MVT VT;
3198 if (!isTypeLegal(Val->getType(), VT) &&
3199 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3200 return false;
3201
3202 // We don't handle vector parameters yet.
3203 if (VT.isVector() || VT.getSizeInBits() > 64)
3204 return false;
3205
3206 OutVTs.push_back(Elt: VT);
3207 }
3208
3209 Address Addr;
3210 if (Callee && !computeCallAddress(V: Callee, Addr))
3211 return false;
3212
3213 // The weak function target may be zero; in that case we must use indirect
3214 // addressing via a stub on windows as it may be out of range for a
3215 // PC-relative jump.
3216 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3217 Addr.getGlobalValue()->hasExternalWeakLinkage())
3218 return false;
3219
3220 // Handle the arguments now that we've gotten them.
3221 unsigned NumBytes;
3222 if (!processCallArgs(CLI, OutVTs, NumBytes))
3223 return false;
3224
3225 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3226 if (RegInfo->isAnyArgRegReserved(MF: *MF))
3227 RegInfo->emitReservedArgRegCallError(MF: *MF);
3228
3229 // Issue the call.
3230 MachineInstrBuilder MIB;
3231 if (Subtarget->useSmallAddressing()) {
3232 const MCInstrDesc &II =
3233 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3234 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II);
3235 if (Symbol)
3236 MIB.addSym(Sym: Symbol, TargetFlags: 0);
3237 else if (Addr.getGlobalValue())
3238 MIB.addGlobalAddress(GV: Addr.getGlobalValue(), Offset: 0, TargetFlags: 0);
3239 else if (Addr.getReg()) {
3240 Register Reg = constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: 0);
3241 MIB.addReg(RegNo: Reg);
3242 } else
3243 return false;
3244 } else {
3245 unsigned CallReg = 0;
3246 if (Symbol) {
3247 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3249 ADRPReg)
3250 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3251
3252 CallReg = createResultReg(&AArch64::GPR64RegClass);
3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3254 TII.get(AArch64::LDRXui), CallReg)
3255 .addReg(ADRPReg)
3256 .addSym(Symbol,
3257 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3258 } else if (Addr.getGlobalValue())
3259 CallReg = materializeGV(GV: Addr.getGlobalValue());
3260 else if (Addr.getReg())
3261 CallReg = Addr.getReg();
3262
3263 if (!CallReg)
3264 return false;
3265
3266 const MCInstrDesc &II = TII.get(Opcode: getBLRCallOpcode(MF: *MF));
3267 CallReg = constrainOperandRegClass(II, Op: CallReg, OpNum: 0);
3268 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: CallReg);
3269 }
3270
3271 // Add implicit physical register uses to the call.
3272 for (auto Reg : CLI.OutRegs)
3273 MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
3274
3275 // Add a register mask with the call-preserved registers.
3276 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3277 MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
3278
3279 CLI.Call = MIB;
3280
3281 // Finish off the call including any return values.
3282 return finishCall(CLI, NumBytes);
3283}
3284
3285bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3286 if (Alignment)
3287 return Len / Alignment->value() <= 4;
3288 else
3289 return Len < 32;
3290}
3291
3292bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3293 uint64_t Len, MaybeAlign Alignment) {
3294 // Make sure we don't bloat code by inlining very large memcpy's.
3295 if (!isMemCpySmall(Len, Alignment))
3296 return false;
3297
3298 int64_t UnscaledOffset = 0;
3299 Address OrigDest = Dest;
3300 Address OrigSrc = Src;
3301
3302 while (Len) {
3303 MVT VT;
3304 if (!Alignment || *Alignment >= 8) {
3305 if (Len >= 8)
3306 VT = MVT::i64;
3307 else if (Len >= 4)
3308 VT = MVT::i32;
3309 else if (Len >= 2)
3310 VT = MVT::i16;
3311 else {
3312 VT = MVT::i8;
3313 }
3314 } else {
3315 assert(Alignment && "Alignment is set in this branch");
3316 // Bound based on alignment.
3317 if (Len >= 4 && *Alignment == 4)
3318 VT = MVT::i32;
3319 else if (Len >= 2 && *Alignment == 2)
3320 VT = MVT::i16;
3321 else {
3322 VT = MVT::i8;
3323 }
3324 }
3325
3326 unsigned ResultReg = emitLoad(VT, RetVT: VT, Addr: Src);
3327 if (!ResultReg)
3328 return false;
3329
3330 if (!emitStore(VT, SrcReg: ResultReg, Addr: Dest))
3331 return false;
3332
3333 int64_t Size = VT.getSizeInBits() / 8;
3334 Len -= Size;
3335 UnscaledOffset += Size;
3336
3337 // We need to recompute the unscaled offset for each iteration.
3338 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3339 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3340 }
3341
3342 return true;
3343}
3344
3345/// Check if it is possible to fold the condition from the XALU intrinsic
3346/// into the user. The condition code will only be updated on success.
3347bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3348 const Instruction *I,
3349 const Value *Cond) {
3350 if (!isa<ExtractValueInst>(Val: Cond))
3351 return false;
3352
3353 const auto *EV = cast<ExtractValueInst>(Val: Cond);
3354 if (!isa<IntrinsicInst>(Val: EV->getAggregateOperand()))
3355 return false;
3356
3357 const auto *II = cast<IntrinsicInst>(Val: EV->getAggregateOperand());
3358 MVT RetVT;
3359 const Function *Callee = II->getCalledFunction();
3360 Type *RetTy =
3361 cast<StructType>(Val: Callee->getReturnType())->getTypeAtIndex(N: 0U);
3362 if (!isTypeLegal(Ty: RetTy, VT&: RetVT))
3363 return false;
3364
3365 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3366 return false;
3367
3368 const Value *LHS = II->getArgOperand(i: 0);
3369 const Value *RHS = II->getArgOperand(i: 1);
3370
3371 // Canonicalize immediate to the RHS.
3372 if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative())
3373 std::swap(a&: LHS, b&: RHS);
3374
3375 // Simplify multiplies.
3376 Intrinsic::ID IID = II->getIntrinsicID();
3377 switch (IID) {
3378 default:
3379 break;
3380 case Intrinsic::smul_with_overflow:
3381 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382 if (C->getValue() == 2)
3383 IID = Intrinsic::sadd_with_overflow;
3384 break;
3385 case Intrinsic::umul_with_overflow:
3386 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387 if (C->getValue() == 2)
3388 IID = Intrinsic::uadd_with_overflow;
3389 break;
3390 }
3391
3392 AArch64CC::CondCode TmpCC;
3393 switch (IID) {
3394 default:
3395 return false;
3396 case Intrinsic::sadd_with_overflow:
3397 case Intrinsic::ssub_with_overflow:
3398 TmpCC = AArch64CC::VS;
3399 break;
3400 case Intrinsic::uadd_with_overflow:
3401 TmpCC = AArch64CC::HS;
3402 break;
3403 case Intrinsic::usub_with_overflow:
3404 TmpCC = AArch64CC::LO;
3405 break;
3406 case Intrinsic::smul_with_overflow:
3407 case Intrinsic::umul_with_overflow:
3408 TmpCC = AArch64CC::NE;
3409 break;
3410 }
3411
3412 // Check if both instructions are in the same basic block.
3413 if (!isValueAvailable(V: II))
3414 return false;
3415
3416 // Make sure nothing is in the way
3417 BasicBlock::const_iterator Start(I);
3418 BasicBlock::const_iterator End(II);
3419 for (auto Itr = std::prev(x: Start); Itr != End; --Itr) {
3420 // We only expect extractvalue instructions between the intrinsic and the
3421 // instruction to be selected.
3422 if (!isa<ExtractValueInst>(Val: Itr))
3423 return false;
3424
3425 // Check that the extractvalue operand comes from the intrinsic.
3426 const auto *EVI = cast<ExtractValueInst>(Val&: Itr);
3427 if (EVI->getAggregateOperand() != II)
3428 return false;
3429 }
3430
3431 CC = TmpCC;
3432 return true;
3433}
3434
3435bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3436 // FIXME: Handle more intrinsics.
3437 switch (II->getIntrinsicID()) {
3438 default: return false;
3439 case Intrinsic::frameaddress: {
3440 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3441 MFI.setFrameAddressIsTaken(true);
3442
3443 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3444 Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF));
3445 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3446 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3447 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: SrcReg).addReg(RegNo: FramePtr);
3448 // Recursively load frame address
3449 // ldr x0, [fp]
3450 // ldr x0, [x0]
3451 // ldr x0, [x0]
3452 // ...
3453 unsigned DestReg;
3454 unsigned Depth = cast<ConstantInt>(Val: II->getOperand(i_nocapture: 0))->getZExtValue();
3455 while (Depth--) {
3456 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3457 SrcReg, 0);
3458 assert(DestReg && "Unexpected LDR instruction emission failure.");
3459 SrcReg = DestReg;
3460 }
3461
3462 updateValueMap(I: II, Reg: SrcReg);
3463 return true;
3464 }
3465 case Intrinsic::sponentry: {
3466 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3467
3468 // SP = FP + Fixed Object + 16
3469 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: 0, IsImmutable: false);
3470 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3472 TII.get(AArch64::ADDXri), ResultReg)
3473 .addFrameIndex(FI)
3474 .addImm(0)
3475 .addImm(0);
3476
3477 updateValueMap(I: II, Reg: ResultReg);
3478 return true;
3479 }
3480 case Intrinsic::memcpy:
3481 case Intrinsic::memmove: {
3482 const auto *MTI = cast<MemTransferInst>(Val: II);
3483 // Don't handle volatile.
3484 if (MTI->isVolatile())
3485 return false;
3486
3487 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3488 // we would emit dead code because we don't currently handle memmoves.
3489 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3490 if (isa<ConstantInt>(Val: MTI->getLength()) && IsMemCpy) {
3491 // Small memcpy's are common enough that we want to do them without a call
3492 // if possible.
3493 uint64_t Len = cast<ConstantInt>(Val: MTI->getLength())->getZExtValue();
3494 MaybeAlign Alignment;
3495 if (MTI->getDestAlign() || MTI->getSourceAlign())
3496 Alignment = std::min(a: MTI->getDestAlign().valueOrOne(),
3497 b: MTI->getSourceAlign().valueOrOne());
3498 if (isMemCpySmall(Len, Alignment)) {
3499 Address Dest, Src;
3500 if (!computeAddress(Obj: MTI->getRawDest(), Addr&: Dest) ||
3501 !computeAddress(Obj: MTI->getRawSource(), Addr&: Src))
3502 return false;
3503 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3504 return true;
3505 }
3506 }
3507
3508 if (!MTI->getLength()->getType()->isIntegerTy(Bitwidth: 64))
3509 return false;
3510
3511 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3512 // Fast instruction selection doesn't support the special
3513 // address spaces.
3514 return false;
3515
3516 const char *IntrMemName = isa<MemCpyInst>(Val: II) ? "memcpy" : "memmove";
3517 return lowerCallTo(CI: II, SymName: IntrMemName, NumArgs: II->arg_size() - 1);
3518 }
3519 case Intrinsic::memset: {
3520 const MemSetInst *MSI = cast<MemSetInst>(Val: II);
3521 // Don't handle volatile.
3522 if (MSI->isVolatile())
3523 return false;
3524
3525 if (!MSI->getLength()->getType()->isIntegerTy(Bitwidth: 64))
3526 return false;
3527
3528 if (MSI->getDestAddressSpace() > 255)
3529 // Fast instruction selection doesn't support the special
3530 // address spaces.
3531 return false;
3532
3533 return lowerCallTo(CI: II, SymName: "memset", NumArgs: II->arg_size() - 1);
3534 }
3535 case Intrinsic::sin:
3536 case Intrinsic::cos:
3537 case Intrinsic::pow: {
3538 MVT RetVT;
3539 if (!isTypeLegal(Ty: II->getType(), VT&: RetVT))
3540 return false;
3541
3542 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3543 return false;
3544
3545 static const RTLIB::Libcall LibCallTable[3][2] = {
3546 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3547 { RTLIB::COS_F32, RTLIB::COS_F64 },
3548 { RTLIB::POW_F32, RTLIB::POW_F64 }
3549 };
3550 RTLIB::Libcall LC;
3551 bool Is64Bit = RetVT == MVT::f64;
3552 switch (II->getIntrinsicID()) {
3553 default:
3554 llvm_unreachable("Unexpected intrinsic.");
3555 case Intrinsic::sin:
3556 LC = LibCallTable[0][Is64Bit];
3557 break;
3558 case Intrinsic::cos:
3559 LC = LibCallTable[1][Is64Bit];
3560 break;
3561 case Intrinsic::pow:
3562 LC = LibCallTable[2][Is64Bit];
3563 break;
3564 }
3565
3566 ArgListTy Args;
3567 Args.reserve(n: II->arg_size());
3568
3569 // Populate the argument list.
3570 for (auto &Arg : II->args()) {
3571 ArgListEntry Entry;
3572 Entry.Val = Arg;
3573 Entry.Ty = Arg->getType();
3574 Args.push_back(x: Entry);
3575 }
3576
3577 CallLoweringInfo CLI;
3578 MCContext &Ctx = MF->getContext();
3579 CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: II->getType(),
3580 Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args));
3581 if (!lowerCallTo(CLI))
3582 return false;
3583 updateValueMap(I: II, Reg: CLI.ResultReg);
3584 return true;
3585 }
3586 case Intrinsic::fabs: {
3587 MVT VT;
3588 if (!isTypeLegal(Ty: II->getType(), VT))
3589 return false;
3590
3591 unsigned Opc;
3592 switch (VT.SimpleTy) {
3593 default:
3594 return false;
3595 case MVT::f32:
3596 Opc = AArch64::FABSSr;
3597 break;
3598 case MVT::f64:
3599 Opc = AArch64::FABSDr;
3600 break;
3601 }
3602 Register SrcReg = getRegForValue(V: II->getOperand(i_nocapture: 0));
3603 if (!SrcReg)
3604 return false;
3605 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
3606 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
3607 .addReg(RegNo: SrcReg);
3608 updateValueMap(I: II, Reg: ResultReg);
3609 return true;
3610 }
3611 case Intrinsic::trap:
3612 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3613 .addImm(1);
3614 return true;
3615 case Intrinsic::debugtrap:
3616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3617 .addImm(0xF000);
3618 return true;
3619
3620 case Intrinsic::sqrt: {
3621 Type *RetTy = II->getCalledFunction()->getReturnType();
3622
3623 MVT VT;
3624 if (!isTypeLegal(Ty: RetTy, VT))
3625 return false;
3626
3627 Register Op0Reg = getRegForValue(V: II->getOperand(i_nocapture: 0));
3628 if (!Op0Reg)
3629 return false;
3630
3631 unsigned ResultReg = fastEmit_r(VT, RetVT: VT, Opcode: ISD::FSQRT, Op0: Op0Reg);
3632 if (!ResultReg)
3633 return false;
3634
3635 updateValueMap(I: II, Reg: ResultReg);
3636 return true;
3637 }
3638 case Intrinsic::sadd_with_overflow:
3639 case Intrinsic::uadd_with_overflow:
3640 case Intrinsic::ssub_with_overflow:
3641 case Intrinsic::usub_with_overflow:
3642 case Intrinsic::smul_with_overflow:
3643 case Intrinsic::umul_with_overflow: {
3644 // This implements the basic lowering of the xalu with overflow intrinsics.
3645 const Function *Callee = II->getCalledFunction();
3646 auto *Ty = cast<StructType>(Val: Callee->getReturnType());
3647 Type *RetTy = Ty->getTypeAtIndex(N: 0U);
3648
3649 MVT VT;
3650 if (!isTypeLegal(Ty: RetTy, VT))
3651 return false;
3652
3653 if (VT != MVT::i32 && VT != MVT::i64)
3654 return false;
3655
3656 const Value *LHS = II->getArgOperand(i: 0);
3657 const Value *RHS = II->getArgOperand(i: 1);
3658 // Canonicalize immediate to the RHS.
3659 if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative())
3660 std::swap(a&: LHS, b&: RHS);
3661
3662 // Simplify multiplies.
3663 Intrinsic::ID IID = II->getIntrinsicID();
3664 switch (IID) {
3665 default:
3666 break;
3667 case Intrinsic::smul_with_overflow:
3668 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3669 if (C->getValue() == 2) {
3670 IID = Intrinsic::sadd_with_overflow;
3671 RHS = LHS;
3672 }
3673 break;
3674 case Intrinsic::umul_with_overflow:
3675 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3676 if (C->getValue() == 2) {
3677 IID = Intrinsic::uadd_with_overflow;
3678 RHS = LHS;
3679 }
3680 break;
3681 }
3682
3683 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3684 AArch64CC::CondCode CC = AArch64CC::Invalid;
3685 switch (IID) {
3686 default: llvm_unreachable("Unexpected intrinsic!");
3687 case Intrinsic::sadd_with_overflow:
3688 ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3689 CC = AArch64CC::VS;
3690 break;
3691 case Intrinsic::uadd_with_overflow:
3692 ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3693 CC = AArch64CC::HS;
3694 break;
3695 case Intrinsic::ssub_with_overflow:
3696 ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3697 CC = AArch64CC::VS;
3698 break;
3699 case Intrinsic::usub_with_overflow:
3700 ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::LO;
3702 break;
3703 case Intrinsic::smul_with_overflow: {
3704 CC = AArch64CC::NE;
3705 Register LHSReg = getRegForValue(V: LHS);
3706 if (!LHSReg)
3707 return false;
3708
3709 Register RHSReg = getRegForValue(V: RHS);
3710 if (!RHSReg)
3711 return false;
3712
3713 if (VT == MVT::i32) {
3714 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3715 Register MulSubReg =
3716 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3717 // cmp xreg, wreg, sxtw
3718 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3719 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3720 /*WantResult=*/false);
3721 MulReg = MulSubReg;
3722 } else {
3723 assert(VT == MVT::i64 && "Unexpected value type.");
3724 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3725 // reused in the next instruction.
3726 MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg);
3727 unsigned SMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHS, Op0: LHSReg, Op1: RHSReg);
3728 emitSubs_rs(RetVT: VT, LHSReg: SMULHReg, RHSReg: MulReg, ShiftType: AArch64_AM::ASR, ShiftImm: 63,
3729 /*WantResult=*/false);
3730 }
3731 break;
3732 }
3733 case Intrinsic::umul_with_overflow: {
3734 CC = AArch64CC::NE;
3735 Register LHSReg = getRegForValue(V: LHS);
3736 if (!LHSReg)
3737 return false;
3738
3739 Register RHSReg = getRegForValue(V: RHS);
3740 if (!RHSReg)
3741 return false;
3742
3743 if (VT == MVT::i32) {
3744 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3745 // tst xreg, #0xffffffff00000000
3746 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3747 TII.get(AArch64::ANDSXri), AArch64::XZR)
3748 .addReg(MulReg)
3749 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3750 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3751 } else {
3752 assert(VT == MVT::i64 && "Unexpected value type.");
3753 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3754 // reused in the next instruction.
3755 MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg);
3756 unsigned UMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHU, Op0: LHSReg, Op1: RHSReg);
3757 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3758 }
3759 break;
3760 }
3761 }
3762
3763 if (MulReg) {
3764 ResultReg1 = createResultReg(RC: TLI.getRegClassFor(VT));
3765 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3766 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg1).addReg(RegNo: MulReg);
3767 }
3768
3769 if (!ResultReg1)
3770 return false;
3771
3772 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3773 AArch64::WZR, AArch64::WZR,
3774 getInvertedCondCode(CC));
3775 (void)ResultReg2;
3776 assert((ResultReg1 + 1) == ResultReg2 &&
3777 "Nonconsecutive result registers.");
3778 updateValueMap(I: II, Reg: ResultReg1, NumRegs: 2);
3779 return true;
3780 }
3781 case Intrinsic::aarch64_crc32b:
3782 case Intrinsic::aarch64_crc32h:
3783 case Intrinsic::aarch64_crc32w:
3784 case Intrinsic::aarch64_crc32x:
3785 case Intrinsic::aarch64_crc32cb:
3786 case Intrinsic::aarch64_crc32ch:
3787 case Intrinsic::aarch64_crc32cw:
3788 case Intrinsic::aarch64_crc32cx: {
3789 if (!Subtarget->hasCRC())
3790 return false;
3791
3792 unsigned Opc;
3793 switch (II->getIntrinsicID()) {
3794 default:
3795 llvm_unreachable("Unexpected intrinsic!");
3796 case Intrinsic::aarch64_crc32b:
3797 Opc = AArch64::CRC32Brr;
3798 break;
3799 case Intrinsic::aarch64_crc32h:
3800 Opc = AArch64::CRC32Hrr;
3801 break;
3802 case Intrinsic::aarch64_crc32w:
3803 Opc = AArch64::CRC32Wrr;
3804 break;
3805 case Intrinsic::aarch64_crc32x:
3806 Opc = AArch64::CRC32Xrr;
3807 break;
3808 case Intrinsic::aarch64_crc32cb:
3809 Opc = AArch64::CRC32CBrr;
3810 break;
3811 case Intrinsic::aarch64_crc32ch:
3812 Opc = AArch64::CRC32CHrr;
3813 break;
3814 case Intrinsic::aarch64_crc32cw:
3815 Opc = AArch64::CRC32CWrr;
3816 break;
3817 case Intrinsic::aarch64_crc32cx:
3818 Opc = AArch64::CRC32CXrr;
3819 break;
3820 }
3821
3822 Register LHSReg = getRegForValue(V: II->getArgOperand(i: 0));
3823 Register RHSReg = getRegForValue(V: II->getArgOperand(i: 1));
3824 if (!LHSReg || !RHSReg)
3825 return false;
3826
3827 Register ResultReg =
3828 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3829 updateValueMap(I: II, Reg: ResultReg);
3830 return true;
3831 }
3832 }
3833 return false;
3834}
3835
3836bool AArch64FastISel::selectRet(const Instruction *I) {
3837 const ReturnInst *Ret = cast<ReturnInst>(Val: I);
3838 const Function &F = *I->getParent()->getParent();
3839
3840 if (!FuncInfo.CanLowerReturn)
3841 return false;
3842
3843 if (F.isVarArg())
3844 return false;
3845
3846 if (TLI.supportSwiftError() &&
3847 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3848 return false;
3849
3850 if (TLI.supportSplitCSR(MF: FuncInfo.MF))
3851 return false;
3852
3853 // Build a list of return value registers.
3854 SmallVector<unsigned, 4> RetRegs;
3855
3856 if (Ret->getNumOperands() > 0) {
3857 CallingConv::ID CC = F.getCallingConv();
3858 SmallVector<ISD::OutputArg, 4> Outs;
3859 GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
3860
3861 // Analyze operands of the call, assigning locations to each operand.
3862 SmallVector<CCValAssign, 16> ValLocs;
3863 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3864 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_AArch64_AAPCS);
3865
3866 // Only handle a single return value for now.
3867 if (ValLocs.size() != 1)
3868 return false;
3869
3870 CCValAssign &VA = ValLocs[0];
3871 const Value *RV = Ret->getOperand(i_nocapture: 0);
3872
3873 // Don't bother handling odd stuff for now.
3874 if ((VA.getLocInfo() != CCValAssign::Full) &&
3875 (VA.getLocInfo() != CCValAssign::BCvt))
3876 return false;
3877
3878 // Only handle register returns for now.
3879 if (!VA.isRegLoc())
3880 return false;
3881
3882 Register Reg = getRegForValue(V: RV);
3883 if (Reg == 0)
3884 return false;
3885
3886 unsigned SrcReg = Reg + VA.getValNo();
3887 Register DestReg = VA.getLocReg();
3888 // Avoid a cross-class copy. This is very unlikely.
3889 if (!MRI.getRegClass(Reg: SrcReg)->contains(Reg: DestReg))
3890 return false;
3891
3892 EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
3893 if (!RVEVT.isSimple())
3894 return false;
3895
3896 // Vectors (of > 1 lane) in big endian need tricky handling.
3897 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3898 !Subtarget->isLittleEndian())
3899 return false;
3900
3901 MVT RVVT = RVEVT.getSimpleVT();
3902 if (RVVT == MVT::f128)
3903 return false;
3904
3905 MVT DestVT = VA.getValVT();
3906 // Special handling for extended integers.
3907 if (RVVT != DestVT) {
3908 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3909 return false;
3910
3911 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3912 return false;
3913
3914 bool IsZExt = Outs[0].Flags.isZExt();
3915 SrcReg = emitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: IsZExt);
3916 if (SrcReg == 0)
3917 return false;
3918 }
3919
3920 // "Callee" (i.e. value producer) zero extends pointers at function
3921 // boundary.
3922 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3923 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3924
3925 // Make the copy.
3926 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3927 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg).addReg(RegNo: SrcReg);
3928
3929 // Add register to return instruction.
3930 RetRegs.push_back(Elt: VA.getLocReg());
3931 }
3932
3933 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3934 TII.get(AArch64::RET_ReallyLR));
3935 for (unsigned RetReg : RetRegs)
3936 MIB.addReg(RegNo: RetReg, flags: RegState::Implicit);
3937 return true;
3938}
3939
3940bool AArch64FastISel::selectTrunc(const Instruction *I) {
3941 Type *DestTy = I->getType();
3942 Value *Op = I->getOperand(i: 0);
3943 Type *SrcTy = Op->getType();
3944
3945 EVT SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
3946 EVT DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
3947 if (!SrcEVT.isSimple())
3948 return false;
3949 if (!DestEVT.isSimple())
3950 return false;
3951
3952 MVT SrcVT = SrcEVT.getSimpleVT();
3953 MVT DestVT = DestEVT.getSimpleVT();
3954
3955 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3956 SrcVT != MVT::i8)
3957 return false;
3958 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3959 DestVT != MVT::i1)
3960 return false;
3961
3962 Register SrcReg = getRegForValue(V: Op);
3963 if (!SrcReg)
3964 return false;
3965
3966 // If we're truncating from i64 to a smaller non-legal type then generate an
3967 // AND. Otherwise, we know the high bits are undefined and a truncate only
3968 // generate a COPY. We cannot mark the source register also as result
3969 // register, because this can incorrectly transfer the kill flag onto the
3970 // source register.
3971 unsigned ResultReg;
3972 if (SrcVT == MVT::i64) {
3973 uint64_t Mask = 0;
3974 switch (DestVT.SimpleTy) {
3975 default:
3976 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3977 return false;
3978 case MVT::i1:
3979 Mask = 0x1;
3980 break;
3981 case MVT::i8:
3982 Mask = 0xff;
3983 break;
3984 case MVT::i16:
3985 Mask = 0xffff;
3986 break;
3987 }
3988 // Issue an extract_subreg to get the lower 32-bits.
3989 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3990 AArch64::sub_32);
3991 // Create the AND instruction which performs the actual truncation.
3992 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3993 assert(ResultReg && "Unexpected AND instruction emission failure.");
3994 } else {
3995 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3996 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3997 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
3998 .addReg(RegNo: SrcReg);
3999 }
4000
4001 updateValueMap(I, Reg: ResultReg);
4002 return true;
4003}
4004
4005unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4006 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4007 DestVT == MVT::i64) &&
4008 "Unexpected value type.");
4009 // Handle i8 and i16 as i32.
4010 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4011 DestVT = MVT::i32;
4012
4013 if (IsZExt) {
4014 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4015 assert(ResultReg && "Unexpected AND instruction emission failure.");
4016 if (DestVT == MVT::i64) {
4017 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4018 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4019 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4021 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4022 .addImm(0)
4023 .addReg(ResultReg)
4024 .addImm(AArch64::sub_32);
4025 ResultReg = Reg64;
4026 }
4027 return ResultReg;
4028 } else {
4029 if (DestVT == MVT::i64) {
4030 // FIXME: We're SExt i1 to i64.
4031 return 0;
4032 }
4033 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4034 0, 0);
4035 }
4036}
4037
4038unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4039 unsigned Opc, ZReg;
4040 switch (RetVT.SimpleTy) {
4041 default: return 0;
4042 case MVT::i8:
4043 case MVT::i16:
4044 case MVT::i32:
4045 RetVT = MVT::i32;
4046 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4047 case MVT::i64:
4048 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4049 }
4050
4051 const TargetRegisterClass *RC =
4052 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4053 return fastEmitInst_rrr(MachineInstOpcode: Opc, RC, Op0, Op1, Op2: ZReg);
4054}
4055
4056unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4057 if (RetVT != MVT::i64)
4058 return 0;
4059
4060 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4061 Op0, Op1, AArch64::XZR);
4062}
4063
4064unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4065 if (RetVT != MVT::i64)
4066 return 0;
4067
4068 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4069 Op0, Op1, AArch64::XZR);
4070}
4071
4072unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4073 unsigned Op1Reg) {
4074 unsigned Opc = 0;
4075 bool NeedTrunc = false;
4076 uint64_t Mask = 0;
4077 switch (RetVT.SimpleTy) {
4078 default: return 0;
4079 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4080 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4081 case MVT::i32: Opc = AArch64::LSLVWr; break;
4082 case MVT::i64: Opc = AArch64::LSLVXr; break;
4083 }
4084
4085 const TargetRegisterClass *RC =
4086 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4087 if (NeedTrunc)
4088 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4089
4090 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4091 if (NeedTrunc)
4092 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4093 return ResultReg;
4094}
4095
4096unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4097 uint64_t Shift, bool IsZExt) {
4098 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4099 "Unexpected source/return type pair.");
4100 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4101 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4102 "Unexpected source value type.");
4103 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4104 RetVT == MVT::i64) && "Unexpected return value type.");
4105
4106 bool Is64Bit = (RetVT == MVT::i64);
4107 unsigned RegSize = Is64Bit ? 64 : 32;
4108 unsigned DstBits = RetVT.getSizeInBits();
4109 unsigned SrcBits = SrcVT.getSizeInBits();
4110 const TargetRegisterClass *RC =
4111 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4112
4113 // Just emit a copy for "zero" shifts.
4114 if (Shift == 0) {
4115 if (RetVT == SrcVT) {
4116 Register ResultReg = createResultReg(RC);
4117 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4118 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4119 .addReg(RegNo: Op0);
4120 return ResultReg;
4121 } else
4122 return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4123 }
4124
4125 // Don't deal with undefined shifts.
4126 if (Shift >= DstBits)
4127 return 0;
4128
4129 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4130 // {S|U}BFM Wd, Wn, #r, #s
4131 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4132
4133 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4134 // %2 = shl i16 %1, 4
4135 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4136 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4137 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4138 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4139
4140 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4141 // %2 = shl i16 %1, 8
4142 // Wd<32+7-24,32-24> = Wn<7:0>
4143 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4144 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4145 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4146
4147 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4148 // %2 = shl i16 %1, 12
4149 // Wd<32+3-20,32-20> = Wn<3:0>
4150 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4151 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4152 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4153
4154 unsigned ImmR = RegSize - Shift;
4155 // Limit the width to the length of the source type.
4156 unsigned ImmS = std::min<unsigned>(a: SrcBits - 1, b: DstBits - 1 - Shift);
4157 static const unsigned OpcTable[2][2] = {
4158 {AArch64::SBFMWri, AArch64::SBFMXri},
4159 {AArch64::UBFMWri, AArch64::UBFMXri}
4160 };
4161 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4162 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4163 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4164 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4165 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4166 .addImm(0)
4167 .addReg(Op0)
4168 .addImm(AArch64::sub_32);
4169 Op0 = TmpReg;
4170 }
4171 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4172}
4173
4174unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4175 unsigned Op1Reg) {
4176 unsigned Opc = 0;
4177 bool NeedTrunc = false;
4178 uint64_t Mask = 0;
4179 switch (RetVT.SimpleTy) {
4180 default: return 0;
4181 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4182 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4183 case MVT::i32: Opc = AArch64::LSRVWr; break;
4184 case MVT::i64: Opc = AArch64::LSRVXr; break;
4185 }
4186
4187 const TargetRegisterClass *RC =
4188 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4189 if (NeedTrunc) {
4190 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4191 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4192 }
4193 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4194 if (NeedTrunc)
4195 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4196 return ResultReg;
4197}
4198
4199unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4200 uint64_t Shift, bool IsZExt) {
4201 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4202 "Unexpected source/return type pair.");
4203 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4204 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4205 "Unexpected source value type.");
4206 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4207 RetVT == MVT::i64) && "Unexpected return value type.");
4208
4209 bool Is64Bit = (RetVT == MVT::i64);
4210 unsigned RegSize = Is64Bit ? 64 : 32;
4211 unsigned DstBits = RetVT.getSizeInBits();
4212 unsigned SrcBits = SrcVT.getSizeInBits();
4213 const TargetRegisterClass *RC =
4214 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4215
4216 // Just emit a copy for "zero" shifts.
4217 if (Shift == 0) {
4218 if (RetVT == SrcVT) {
4219 Register ResultReg = createResultReg(RC);
4220 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4221 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4222 .addReg(RegNo: Op0);
4223 return ResultReg;
4224 } else
4225 return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4226 }
4227
4228 // Don't deal with undefined shifts.
4229 if (Shift >= DstBits)
4230 return 0;
4231
4232 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4233 // {S|U}BFM Wd, Wn, #r, #s
4234 // Wd<s-r:0> = Wn<s:r> when r <= s
4235
4236 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4237 // %2 = lshr i16 %1, 4
4238 // Wd<7-4:0> = Wn<7:4>
4239 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4240 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4241 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4242
4243 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4244 // %2 = lshr i16 %1, 8
4245 // Wd<7-7,0> = Wn<7:7>
4246 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4247 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4249
4250 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4251 // %2 = lshr i16 %1, 12
4252 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4253 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4254 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4256
4257 if (Shift >= SrcBits && IsZExt)
4258 return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT);
4259
4260 // It is not possible to fold a sign-extend into the LShr instruction. In this
4261 // case emit a sign-extend.
4262 if (!IsZExt) {
4263 Op0 = emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4264 if (!Op0)
4265 return 0;
4266 SrcVT = RetVT;
4267 SrcBits = SrcVT.getSizeInBits();
4268 IsZExt = true;
4269 }
4270
4271 unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift);
4272 unsigned ImmS = SrcBits - 1;
4273 static const unsigned OpcTable[2][2] = {
4274 {AArch64::SBFMWri, AArch64::SBFMXri},
4275 {AArch64::UBFMWri, AArch64::UBFMXri}
4276 };
4277 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4278 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4279 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4281 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4282 .addImm(0)
4283 .addReg(Op0)
4284 .addImm(AArch64::sub_32);
4285 Op0 = TmpReg;
4286 }
4287 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4288}
4289
4290unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4291 unsigned Op1Reg) {
4292 unsigned Opc = 0;
4293 bool NeedTrunc = false;
4294 uint64_t Mask = 0;
4295 switch (RetVT.SimpleTy) {
4296 default: return 0;
4297 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4298 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4299 case MVT::i32: Opc = AArch64::ASRVWr; break;
4300 case MVT::i64: Opc = AArch64::ASRVXr; break;
4301 }
4302
4303 const TargetRegisterClass *RC =
4304 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4305 if (NeedTrunc) {
4306 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4307 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4308 }
4309 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4310 if (NeedTrunc)
4311 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4312 return ResultReg;
4313}
4314
4315unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4316 uint64_t Shift, bool IsZExt) {
4317 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4318 "Unexpected source/return type pair.");
4319 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4320 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4321 "Unexpected source value type.");
4322 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4323 RetVT == MVT::i64) && "Unexpected return value type.");
4324
4325 bool Is64Bit = (RetVT == MVT::i64);
4326 unsigned RegSize = Is64Bit ? 64 : 32;
4327 unsigned DstBits = RetVT.getSizeInBits();
4328 unsigned SrcBits = SrcVT.getSizeInBits();
4329 const TargetRegisterClass *RC =
4330 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4331
4332 // Just emit a copy for "zero" shifts.
4333 if (Shift == 0) {
4334 if (RetVT == SrcVT) {
4335 Register ResultReg = createResultReg(RC);
4336 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4337 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4338 .addReg(RegNo: Op0);
4339 return ResultReg;
4340 } else
4341 return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4342 }
4343
4344 // Don't deal with undefined shifts.
4345 if (Shift >= DstBits)
4346 return 0;
4347
4348 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4349 // {S|U}BFM Wd, Wn, #r, #s
4350 // Wd<s-r:0> = Wn<s:r> when r <= s
4351
4352 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4353 // %2 = ashr i16 %1, 4
4354 // Wd<7-4:0> = Wn<7:4>
4355 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4356 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4357 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4358
4359 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4360 // %2 = ashr i16 %1, 8
4361 // Wd<7-7,0> = Wn<7:7>
4362 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4363 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4364 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4365
4366 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4367 // %2 = ashr i16 %1, 12
4368 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4369 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4370 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4372
4373 if (Shift >= SrcBits && IsZExt)
4374 return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT);
4375
4376 unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift);
4377 unsigned ImmS = SrcBits - 1;
4378 static const unsigned OpcTable[2][2] = {
4379 {AArch64::SBFMWri, AArch64::SBFMXri},
4380 {AArch64::UBFMWri, AArch64::UBFMXri}
4381 };
4382 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4383 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4384 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4386 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4387 .addImm(0)
4388 .addReg(Op0)
4389 .addImm(AArch64::sub_32);
4390 Op0 = TmpReg;
4391 }
4392 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4393}
4394
4395unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4396 bool IsZExt) {
4397 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4398
4399 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4400 // DestVT are odd things, so test to make sure that they are both types we can
4401 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4402 // bail out to SelectionDAG.
4403 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4404 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4405 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4406 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4407 return 0;
4408
4409 unsigned Opc;
4410 unsigned Imm = 0;
4411
4412 switch (SrcVT.SimpleTy) {
4413 default:
4414 return 0;
4415 case MVT::i1:
4416 return emiti1Ext(SrcReg, DestVT, IsZExt);
4417 case MVT::i8:
4418 if (DestVT == MVT::i64)
4419 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4420 else
4421 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4422 Imm = 7;
4423 break;
4424 case MVT::i16:
4425 if (DestVT == MVT::i64)
4426 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4427 else
4428 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4429 Imm = 15;
4430 break;
4431 case MVT::i32:
4432 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4433 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4434 Imm = 31;
4435 break;
4436 }
4437
4438 // Handle i8 and i16 as i32.
4439 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4440 DestVT = MVT::i32;
4441 else if (DestVT == MVT::i64) {
4442 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4444 TII.get(AArch64::SUBREG_TO_REG), Src64)
4445 .addImm(0)
4446 .addReg(SrcReg)
4447 .addImm(AArch64::sub_32);
4448 SrcReg = Src64;
4449 }
4450
4451 const TargetRegisterClass *RC =
4452 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4453 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0: SrcReg, Imm1: 0, Imm2: Imm);
4454}
4455
4456static bool isZExtLoad(const MachineInstr *LI) {
4457 switch (LI->getOpcode()) {
4458 default:
4459 return false;
4460 case AArch64::LDURBBi:
4461 case AArch64::LDURHHi:
4462 case AArch64::LDURWi:
4463 case AArch64::LDRBBui:
4464 case AArch64::LDRHHui:
4465 case AArch64::LDRWui:
4466 case AArch64::LDRBBroX:
4467 case AArch64::LDRHHroX:
4468 case AArch64::LDRWroX:
4469 case AArch64::LDRBBroW:
4470 case AArch64::LDRHHroW:
4471 case AArch64::LDRWroW:
4472 return true;
4473 }
4474}
4475
4476static bool isSExtLoad(const MachineInstr *LI) {
4477 switch (LI->getOpcode()) {
4478 default:
4479 return false;
4480 case AArch64::LDURSBWi:
4481 case AArch64::LDURSHWi:
4482 case AArch64::LDURSBXi:
4483 case AArch64::LDURSHXi:
4484 case AArch64::LDURSWi:
4485 case AArch64::LDRSBWui:
4486 case AArch64::LDRSHWui:
4487 case AArch64::LDRSBXui:
4488 case AArch64::LDRSHXui:
4489 case AArch64::LDRSWui:
4490 case AArch64::LDRSBWroX:
4491 case AArch64::LDRSHWroX:
4492 case AArch64::LDRSBXroX:
4493 case AArch64::LDRSHXroX:
4494 case AArch64::LDRSWroX:
4495 case AArch64::LDRSBWroW:
4496 case AArch64::LDRSHWroW:
4497 case AArch64::LDRSBXroW:
4498 case AArch64::LDRSHXroW:
4499 case AArch64::LDRSWroW:
4500 return true;
4501 }
4502}
4503
4504bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4505 MVT SrcVT) {
4506 const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0));
4507 if (!LI || !LI->hasOneUse())
4508 return false;
4509
4510 // Check if the load instruction has already been selected.
4511 Register Reg = lookUpRegForValue(V: LI);
4512 if (!Reg)
4513 return false;
4514
4515 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4516 if (!MI)
4517 return false;
4518
4519 // Check if the correct load instruction has been emitted - SelectionDAG might
4520 // have emitted a zero-extending load, but we need a sign-extending load.
4521 bool IsZExt = isa<ZExtInst>(Val: I);
4522 const auto *LoadMI = MI;
4523 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4524 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4525 Register LoadReg = MI->getOperand(i: 1).getReg();
4526 LoadMI = MRI.getUniqueVRegDef(Reg: LoadReg);
4527 assert(LoadMI && "Expected valid instruction");
4528 }
4529 if (!(IsZExt && isZExtLoad(LI: LoadMI)) && !(!IsZExt && isSExtLoad(LI: LoadMI)))
4530 return false;
4531
4532 // Nothing to be done.
4533 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4534 updateValueMap(I, Reg);
4535 return true;
4536 }
4537
4538 if (IsZExt) {
4539 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4540 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4541 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4542 .addImm(0)
4543 .addReg(Reg, getKillRegState(true))
4544 .addImm(AArch64::sub_32);
4545 Reg = Reg64;
4546 } else {
4547 assert((MI->getOpcode() == TargetOpcode::COPY &&
4548 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4549 "Expected copy instruction");
4550 Reg = MI->getOperand(i: 1).getReg();
4551 MachineBasicBlock::iterator I(MI);
4552 removeDeadCode(I, E: std::next(x: I));
4553 }
4554 updateValueMap(I, Reg);
4555 return true;
4556}
4557
4558bool AArch64FastISel::selectIntExt(const Instruction *I) {
4559 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4560 "Unexpected integer extend instruction.");
4561 MVT RetVT;
4562 MVT SrcVT;
4563 if (!isTypeSupported(Ty: I->getType(), VT&: RetVT))
4564 return false;
4565
4566 if (!isTypeSupported(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT))
4567 return false;
4568
4569 // Try to optimize already sign-/zero-extended values from load instructions.
4570 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4571 return true;
4572
4573 Register SrcReg = getRegForValue(V: I->getOperand(i: 0));
4574 if (!SrcReg)
4575 return false;
4576
4577 // Try to optimize already sign-/zero-extended values from function arguments.
4578 bool IsZExt = isa<ZExtInst>(Val: I);
4579 if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) {
4580 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4581 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4582 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4583 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4584 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4585 .addImm(0)
4586 .addReg(SrcReg)
4587 .addImm(AArch64::sub_32);
4588 SrcReg = ResultReg;
4589 }
4590
4591 updateValueMap(I, Reg: SrcReg);
4592 return true;
4593 }
4594 }
4595
4596 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, DestVT: RetVT, IsZExt);
4597 if (!ResultReg)
4598 return false;
4599
4600 updateValueMap(I, Reg: ResultReg);
4601 return true;
4602}
4603
4604bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4605 EVT DestEVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
4606 if (!DestEVT.isSimple())
4607 return false;
4608
4609 MVT DestVT = DestEVT.getSimpleVT();
4610 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4611 return false;
4612
4613 unsigned DivOpc;
4614 bool Is64bit = (DestVT == MVT::i64);
4615 switch (ISDOpcode) {
4616 default:
4617 return false;
4618 case ISD::SREM:
4619 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4620 break;
4621 case ISD::UREM:
4622 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4623 break;
4624 }
4625 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4626 Register Src0Reg = getRegForValue(V: I->getOperand(i: 0));
4627 if (!Src0Reg)
4628 return false;
4629
4630 Register Src1Reg = getRegForValue(V: I->getOperand(i: 1));
4631 if (!Src1Reg)
4632 return false;
4633
4634 const TargetRegisterClass *RC =
4635 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4636 Register QuotReg = fastEmitInst_rr(MachineInstOpcode: DivOpc, RC, Op0: Src0Reg, Op1: Src1Reg);
4637 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4638 // The remainder is computed as numerator - (quotient * denominator) using the
4639 // MSUB instruction.
4640 Register ResultReg = fastEmitInst_rrr(MachineInstOpcode: MSubOpc, RC, Op0: QuotReg, Op1: Src1Reg, Op2: Src0Reg);
4641 updateValueMap(I, Reg: ResultReg);
4642 return true;
4643}
4644
4645bool AArch64FastISel::selectMul(const Instruction *I) {
4646 MVT VT;
4647 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true))
4648 return false;
4649
4650 if (VT.isVector())
4651 return selectBinaryOp(I, ISDOpcode: ISD::MUL);
4652
4653 const Value *Src0 = I->getOperand(i: 0);
4654 const Value *Src1 = I->getOperand(i: 1);
4655 if (const auto *C = dyn_cast<ConstantInt>(Val: Src0))
4656 if (C->getValue().isPowerOf2())
4657 std::swap(a&: Src0, b&: Src1);
4658
4659 // Try to simplify to a shift instruction.
4660 if (const auto *C = dyn_cast<ConstantInt>(Val: Src1))
4661 if (C->getValue().isPowerOf2()) {
4662 uint64_t ShiftVal = C->getValue().logBase2();
4663 MVT SrcVT = VT;
4664 bool IsZExt = true;
4665 if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Src0)) {
4666 if (!isIntExtFree(I: ZExt)) {
4667 MVT VT;
4668 if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT)) {
4669 SrcVT = VT;
4670 IsZExt = true;
4671 Src0 = ZExt->getOperand(i_nocapture: 0);
4672 }
4673 }
4674 } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Src0)) {
4675 if (!isIntExtFree(I: SExt)) {
4676 MVT VT;
4677 if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT)) {
4678 SrcVT = VT;
4679 IsZExt = false;
4680 Src0 = SExt->getOperand(i_nocapture: 0);
4681 }
4682 }
4683 }
4684
4685 Register Src0Reg = getRegForValue(V: Src0);
4686 if (!Src0Reg)
4687 return false;
4688
4689 unsigned ResultReg =
4690 emitLSL_ri(RetVT: VT, SrcVT, Op0: Src0Reg, Shift: ShiftVal, IsZExt);
4691
4692 if (ResultReg) {
4693 updateValueMap(I, Reg: ResultReg);
4694 return true;
4695 }
4696 }
4697
4698 Register Src0Reg = getRegForValue(V: I->getOperand(i: 0));
4699 if (!Src0Reg)
4700 return false;
4701
4702 Register Src1Reg = getRegForValue(V: I->getOperand(i: 1));
4703 if (!Src1Reg)
4704 return false;
4705
4706 unsigned ResultReg = emitMul_rr(RetVT: VT, Op0: Src0Reg, Op1: Src1Reg);
4707
4708 if (!ResultReg)
4709 return false;
4710
4711 updateValueMap(I, Reg: ResultReg);
4712 return true;
4713}
4714
4715bool AArch64FastISel::selectShift(const Instruction *I) {
4716 MVT RetVT;
4717 if (!isTypeSupported(Ty: I->getType(), VT&: RetVT, /*IsVectorAllowed=*/true))
4718 return false;
4719
4720 if (RetVT.isVector())
4721 return selectOperator(I, Opcode: I->getOpcode());
4722
4723 if (const auto *C = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) {
4724 unsigned ResultReg = 0;
4725 uint64_t ShiftVal = C->getZExtValue();
4726 MVT SrcVT = RetVT;
4727 bool IsZExt = I->getOpcode() != Instruction::AShr;
4728 const Value *Op0 = I->getOperand(i: 0);
4729 if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Op0)) {
4730 if (!isIntExtFree(I: ZExt)) {
4731 MVT TmpVT;
4732 if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT&: TmpVT)) {
4733 SrcVT = TmpVT;
4734 IsZExt = true;
4735 Op0 = ZExt->getOperand(i_nocapture: 0);
4736 }
4737 }
4738 } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Op0)) {
4739 if (!isIntExtFree(I: SExt)) {
4740 MVT TmpVT;
4741 if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT&: TmpVT)) {
4742 SrcVT = TmpVT;
4743 IsZExt = false;
4744 Op0 = SExt->getOperand(i_nocapture: 0);
4745 }
4746 }
4747 }
4748
4749 Register Op0Reg = getRegForValue(V: Op0);
4750 if (!Op0Reg)
4751 return false;
4752
4753 switch (I->getOpcode()) {
4754 default: llvm_unreachable("Unexpected instruction.");
4755 case Instruction::Shl:
4756 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4757 break;
4758 case Instruction::AShr:
4759 ResultReg = emitASR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4760 break;
4761 case Instruction::LShr:
4762 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4763 break;
4764 }
4765 if (!ResultReg)
4766 return false;
4767
4768 updateValueMap(I, Reg: ResultReg);
4769 return true;
4770 }
4771
4772 Register Op0Reg = getRegForValue(V: I->getOperand(i: 0));
4773 if (!Op0Reg)
4774 return false;
4775
4776 Register Op1Reg = getRegForValue(V: I->getOperand(i: 1));
4777 if (!Op1Reg)
4778 return false;
4779
4780 unsigned ResultReg = 0;
4781 switch (I->getOpcode()) {
4782 default: llvm_unreachable("Unexpected instruction.");
4783 case Instruction::Shl:
4784 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4785 break;
4786 case Instruction::AShr:
4787 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4788 break;
4789 case Instruction::LShr:
4790 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4791 break;
4792 }
4793
4794 if (!ResultReg)
4795 return false;
4796
4797 updateValueMap(I, Reg: ResultReg);
4798 return true;
4799}
4800
4801bool AArch64FastISel::selectBitCast(const Instruction *I) {
4802 MVT RetVT, SrcVT;
4803
4804 if (!isTypeLegal(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT))
4805 return false;
4806 if (!isTypeLegal(Ty: I->getType(), VT&: RetVT))
4807 return false;
4808
4809 unsigned Opc;
4810 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4811 Opc = AArch64::FMOVWSr;
4812 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4813 Opc = AArch64::FMOVXDr;
4814 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4815 Opc = AArch64::FMOVSWr;
4816 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4817 Opc = AArch64::FMOVDXr;
4818 else
4819 return false;
4820
4821 const TargetRegisterClass *RC = nullptr;
4822 switch (RetVT.SimpleTy) {
4823 default: llvm_unreachable("Unexpected value type.");
4824 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4825 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4826 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4827 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4828 }
4829 Register Op0Reg = getRegForValue(V: I->getOperand(i: 0));
4830 if (!Op0Reg)
4831 return false;
4832
4833 Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC, Op0: Op0Reg);
4834 if (!ResultReg)
4835 return false;
4836
4837 updateValueMap(I, Reg: ResultReg);
4838 return true;
4839}
4840
4841bool AArch64FastISel::selectFRem(const Instruction *I) {
4842 MVT RetVT;
4843 if (!isTypeLegal(Ty: I->getType(), VT&: RetVT))
4844 return false;
4845
4846 RTLIB::Libcall LC;
4847 switch (RetVT.SimpleTy) {
4848 default:
4849 return false;
4850 case MVT::f32:
4851 LC = RTLIB::REM_F32;
4852 break;
4853 case MVT::f64:
4854 LC = RTLIB::REM_F64;
4855 break;
4856 }
4857
4858 ArgListTy Args;
4859 Args.reserve(n: I->getNumOperands());
4860
4861 // Populate the argument list.
4862 for (auto &Arg : I->operands()) {
4863 ArgListEntry Entry;
4864 Entry.Val = Arg;
4865 Entry.Ty = Arg->getType();
4866 Args.push_back(x: Entry);
4867 }
4868
4869 CallLoweringInfo CLI;
4870 MCContext &Ctx = MF->getContext();
4871 CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: I->getType(),
4872 Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args));
4873 if (!lowerCallTo(CLI))
4874 return false;
4875 updateValueMap(I, Reg: CLI.ResultReg);
4876 return true;
4877}
4878
4879bool AArch64FastISel::selectSDiv(const Instruction *I) {
4880 MVT VT;
4881 if (!isTypeLegal(Ty: I->getType(), VT))
4882 return false;
4883
4884 if (!isa<ConstantInt>(Val: I->getOperand(i: 1)))
4885 return selectBinaryOp(I, ISDOpcode: ISD::SDIV);
4886
4887 const APInt &C = cast<ConstantInt>(Val: I->getOperand(i: 1))->getValue();
4888 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4889 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4890 return selectBinaryOp(I, ISDOpcode: ISD::SDIV);
4891
4892 unsigned Lg2 = C.countr_zero();
4893 Register Src0Reg = getRegForValue(V: I->getOperand(i: 0));
4894 if (!Src0Reg)
4895 return false;
4896
4897 if (cast<BinaryOperator>(Val: I)->isExact()) {
4898 unsigned ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: Src0Reg, Shift: Lg2);
4899 if (!ResultReg)
4900 return false;
4901 updateValueMap(I, Reg: ResultReg);
4902 return true;
4903 }
4904
4905 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4906 unsigned AddReg = emitAdd_ri_(VT, Op0: Src0Reg, Imm: Pow2MinusOne);
4907 if (!AddReg)
4908 return false;
4909
4910 // (Src0 < 0) ? Pow2 - 1 : 0;
4911 if (!emitICmp_ri(RetVT: VT, LHSReg: Src0Reg, Imm: 0))
4912 return false;
4913
4914 unsigned SelectOpc;
4915 const TargetRegisterClass *RC;
4916 if (VT == MVT::i64) {
4917 SelectOpc = AArch64::CSELXr;
4918 RC = &AArch64::GPR64RegClass;
4919 } else {
4920 SelectOpc = AArch64::CSELWr;
4921 RC = &AArch64::GPR32RegClass;
4922 }
4923 Register SelectReg = fastEmitInst_rri(MachineInstOpcode: SelectOpc, RC, Op0: AddReg, Op1: Src0Reg,
4924 Imm: AArch64CC::LT);
4925 if (!SelectReg)
4926 return false;
4927
4928 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4929 // negate the result.
4930 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4931 unsigned ResultReg;
4932 if (C.isNegative())
4933 ResultReg = emitAddSub_rs(/*UseAdd=*/false, RetVT: VT, LHSReg: ZeroReg, RHSReg: SelectReg,
4934 ShiftType: AArch64_AM::ASR, ShiftImm: Lg2);
4935 else
4936 ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: SelectReg, Shift: Lg2);
4937
4938 if (!ResultReg)
4939 return false;
4940
4941 updateValueMap(I, Reg: ResultReg);
4942 return true;
4943}
4944
4945/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4946/// have to duplicate it for AArch64, because otherwise we would fail during the
4947/// sign-extend emission.
4948unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4949 Register IdxN = getRegForValue(V: Idx);
4950 if (IdxN == 0)
4951 // Unhandled operand. Halt "fast" selection and bail.
4952 return 0;
4953
4954 // If the index is smaller or larger than intptr_t, truncate or extend it.
4955 MVT PtrVT = TLI.getPointerTy(DL);
4956 EVT IdxVT = EVT::getEVT(Ty: Idx->getType(), /*HandleUnknown=*/false);
4957 if (IdxVT.bitsLT(VT: PtrVT)) {
4958 IdxN = emitIntExt(SrcVT: IdxVT.getSimpleVT(), SrcReg: IdxN, DestVT: PtrVT, /*isZExt=*/IsZExt: false);
4959 } else if (IdxVT.bitsGT(VT: PtrVT))
4960 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4961 return IdxN;
4962}
4963
4964/// This is mostly a copy of the existing FastISel GEP code, but we have to
4965/// duplicate it for AArch64, because otherwise we would bail out even for
4966/// simple cases. This is because the standard fastEmit functions don't cover
4967/// MUL at all and ADD is lowered very inefficientily.
4968bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4969 if (Subtarget->isTargetILP32())
4970 return false;
4971
4972 Register N = getRegForValue(V: I->getOperand(i: 0));
4973 if (!N)
4974 return false;
4975
4976 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4977 // into a single N = N + TotalOffset.
4978 uint64_t TotalOffs = 0;
4979 MVT VT = TLI.getPointerTy(DL);
4980 for (gep_type_iterator GTI = gep_type_begin(GEP: I), E = gep_type_end(GEP: I);
4981 GTI != E; ++GTI) {
4982 const Value *Idx = GTI.getOperand();
4983 if (auto *StTy = GTI.getStructTypeOrNull()) {
4984 unsigned Field = cast<ConstantInt>(Val: Idx)->getZExtValue();
4985 // N = N + Offset
4986 if (Field)
4987 TotalOffs += DL.getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
4988 } else {
4989 // If this is a constant subscript, handle it quickly.
4990 if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) {
4991 if (CI->isZero())
4992 continue;
4993 // N = N + Offset
4994 TotalOffs += GTI.getSequentialElementStride(DL) *
4995 cast<ConstantInt>(Val: CI)->getSExtValue();
4996 continue;
4997 }
4998 if (TotalOffs) {
4999 N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs);
5000 if (!N)
5001 return false;
5002 TotalOffs = 0;
5003 }
5004
5005 // N = N + Idx * ElementSize;
5006 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5007 unsigned IdxN = getRegForGEPIndex(Idx);
5008 if (!IdxN)
5009 return false;
5010
5011 if (ElementSize != 1) {
5012 unsigned C = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm: ElementSize);
5013 if (!C)
5014 return false;
5015 IdxN = emitMul_rr(RetVT: VT, Op0: IdxN, Op1: C);
5016 if (!IdxN)
5017 return false;
5018 }
5019 N = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::ADD, Op0: N, Op1: IdxN);
5020 if (!N)
5021 return false;
5022 }
5023 }
5024 if (TotalOffs) {
5025 N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs);
5026 if (!N)
5027 return false;
5028 }
5029 updateValueMap(I, Reg: N);
5030 return true;
5031}
5032
5033bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5034 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5035 "cmpxchg survived AtomicExpand at optlevel > -O0");
5036
5037 auto *RetPairTy = cast<StructType>(Val: I->getType());
5038 Type *RetTy = RetPairTy->getTypeAtIndex(N: 0U);
5039 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5040 "cmpxchg has a non-i1 status result");
5041
5042 MVT VT;
5043 if (!isTypeLegal(Ty: RetTy, VT))
5044 return false;
5045
5046 const TargetRegisterClass *ResRC;
5047 unsigned Opc, CmpOpc;
5048 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5049 // extractvalue selection doesn't support that.
5050 if (VT == MVT::i32) {
5051 Opc = AArch64::CMP_SWAP_32;
5052 CmpOpc = AArch64::SUBSWrs;
5053 ResRC = &AArch64::GPR32RegClass;
5054 } else if (VT == MVT::i64) {
5055 Opc = AArch64::CMP_SWAP_64;
5056 CmpOpc = AArch64::SUBSXrs;
5057 ResRC = &AArch64::GPR64RegClass;
5058 } else {
5059 return false;
5060 }
5061
5062 const MCInstrDesc &II = TII.get(Opcode: Opc);
5063
5064 const Register AddrReg = constrainOperandRegClass(
5065 II, Op: getRegForValue(V: I->getPointerOperand()), OpNum: II.getNumDefs());
5066 const Register DesiredReg = constrainOperandRegClass(
5067 II, Op: getRegForValue(V: I->getCompareOperand()), OpNum: II.getNumDefs() + 1);
5068 const Register NewReg = constrainOperandRegClass(
5069 II, Op: getRegForValue(V: I->getNewValOperand()), OpNum: II.getNumDefs() + 2);
5070
5071 const Register ResultReg1 = createResultReg(RC: ResRC);
5072 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5073 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5074
5075 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5076 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
5077 .addDef(RegNo: ResultReg1)
5078 .addDef(RegNo: ScratchReg)
5079 .addUse(RegNo: AddrReg)
5080 .addUse(RegNo: DesiredReg)
5081 .addUse(RegNo: NewReg);
5082
5083 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5084 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5085 .addUse(ResultReg1)
5086 .addUse(DesiredReg)
5087 .addImm(0);
5088
5089 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5090 .addDef(ResultReg2)
5091 .addUse(AArch64::WZR)
5092 .addUse(AArch64::WZR)
5093 .addImm(AArch64CC::NE);
5094
5095 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5096 updateValueMap(I, Reg: ResultReg1, NumRegs: 2);
5097 return true;
5098}
5099
5100bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5101 if (TLI.fallBackToDAGISel(Inst: *I))
5102 return false;
5103 switch (I->getOpcode()) {
5104 default:
5105 break;
5106 case Instruction::Add:
5107 case Instruction::Sub:
5108 return selectAddSub(I);
5109 case Instruction::Mul:
5110 return selectMul(I);
5111 case Instruction::SDiv:
5112 return selectSDiv(I);
5113 case Instruction::SRem:
5114 if (!selectBinaryOp(I, ISDOpcode: ISD::SREM))
5115 return selectRem(I, ISDOpcode: ISD::SREM);
5116 return true;
5117 case Instruction::URem:
5118 if (!selectBinaryOp(I, ISDOpcode: ISD::UREM))
5119 return selectRem(I, ISDOpcode: ISD::UREM);
5120 return true;
5121 case Instruction::Shl:
5122 case Instruction::LShr:
5123 case Instruction::AShr:
5124 return selectShift(I);
5125 case Instruction::And:
5126 case Instruction::Or:
5127 case Instruction::Xor:
5128 return selectLogicalOp(I);
5129 case Instruction::Br:
5130 return selectBranch(I);
5131 case Instruction::IndirectBr:
5132 return selectIndirectBr(I);
5133 case Instruction::BitCast:
5134 if (!FastISel::selectBitCast(I))
5135 return selectBitCast(I);
5136 return true;
5137 case Instruction::FPToSI:
5138 if (!selectCast(I, Opcode: ISD::FP_TO_SINT))
5139 return selectFPToInt(I, /*Signed=*/true);
5140 return true;
5141 case Instruction::FPToUI:
5142 return selectFPToInt(I, /*Signed=*/false);
5143 case Instruction::ZExt:
5144 case Instruction::SExt:
5145 return selectIntExt(I);
5146 case Instruction::Trunc:
5147 if (!selectCast(I, Opcode: ISD::TRUNCATE))
5148 return selectTrunc(I);
5149 return true;
5150 case Instruction::FPExt:
5151 return selectFPExt(I);
5152 case Instruction::FPTrunc:
5153 return selectFPTrunc(I);
5154 case Instruction::SIToFP:
5155 if (!selectCast(I, Opcode: ISD::SINT_TO_FP))
5156 return selectIntToFP(I, /*Signed=*/true);
5157 return true;
5158 case Instruction::UIToFP:
5159 return selectIntToFP(I, /*Signed=*/false);
5160 case Instruction::Load:
5161 return selectLoad(I);
5162 case Instruction::Store:
5163 return selectStore(I);
5164 case Instruction::FCmp:
5165 case Instruction::ICmp:
5166 return selectCmp(I);
5167 case Instruction::Select:
5168 return selectSelect(I);
5169 case Instruction::Ret:
5170 return selectRet(I);
5171 case Instruction::FRem:
5172 return selectFRem(I);
5173 case Instruction::GetElementPtr:
5174 return selectGetElementPtr(I);
5175 case Instruction::AtomicCmpXchg:
5176 return selectAtomicCmpXchg(I: cast<AtomicCmpXchgInst>(Val: I));
5177 }
5178
5179 // fall-back to target-independent instruction selection.
5180 return selectOperator(I, Opcode: I->getOpcode());
5181}
5182
5183FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5184 const TargetLibraryInfo *LibInfo) {
5185
5186 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5187 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5188 CallerAttrs.hasStreamingInterfaceOrBody() ||
5189 CallerAttrs.hasStreamingCompatibleInterface())
5190 return nullptr;
5191 return new AArch64FastISel(FuncInfo, LibInfo);
5192}
5193

source code of llvm/lib/Target/AArch64/AArch64FastISel.cpp