1 | //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the AArch64-specific support for the FastISel class. Some |
10 | // of the target-specific code is generated by tablegen in the file |
11 | // AArch64GenFastISel.inc, which is #included here. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AArch64.h" |
16 | #include "AArch64CallingConvention.h" |
17 | #include "AArch64MachineFunctionInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "llvm/ADT/APFloat.h" |
23 | #include "llvm/ADT/APInt.h" |
24 | #include "llvm/ADT/DenseMap.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
27 | #include "llvm/CodeGen/CallingConvLower.h" |
28 | #include "llvm/CodeGen/FastISel.h" |
29 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
30 | #include "llvm/CodeGen/ISDOpcodes.h" |
31 | #include "llvm/CodeGen/MachineBasicBlock.h" |
32 | #include "llvm/CodeGen/MachineConstantPool.h" |
33 | #include "llvm/CodeGen/MachineFrameInfo.h" |
34 | #include "llvm/CodeGen/MachineInstr.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
38 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
39 | #include "llvm/CodeGen/ValueTypes.h" |
40 | #include "llvm/CodeGenTypes/MachineValueType.h" |
41 | #include "llvm/IR/Argument.h" |
42 | #include "llvm/IR/Attributes.h" |
43 | #include "llvm/IR/BasicBlock.h" |
44 | #include "llvm/IR/CallingConv.h" |
45 | #include "llvm/IR/Constant.h" |
46 | #include "llvm/IR/Constants.h" |
47 | #include "llvm/IR/DataLayout.h" |
48 | #include "llvm/IR/DerivedTypes.h" |
49 | #include "llvm/IR/Function.h" |
50 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
51 | #include "llvm/IR/GlobalValue.h" |
52 | #include "llvm/IR/InstrTypes.h" |
53 | #include "llvm/IR/Instruction.h" |
54 | #include "llvm/IR/Instructions.h" |
55 | #include "llvm/IR/IntrinsicInst.h" |
56 | #include "llvm/IR/Intrinsics.h" |
57 | #include "llvm/IR/IntrinsicsAArch64.h" |
58 | #include "llvm/IR/Operator.h" |
59 | #include "llvm/IR/Type.h" |
60 | #include "llvm/IR/User.h" |
61 | #include "llvm/IR/Value.h" |
62 | #include "llvm/MC/MCInstrDesc.h" |
63 | #include "llvm/MC/MCRegisterInfo.h" |
64 | #include "llvm/MC/MCSymbol.h" |
65 | #include "llvm/Support/AtomicOrdering.h" |
66 | #include "llvm/Support/Casting.h" |
67 | #include "llvm/Support/CodeGen.h" |
68 | #include "llvm/Support/Compiler.h" |
69 | #include "llvm/Support/ErrorHandling.h" |
70 | #include "llvm/Support/MathExtras.h" |
71 | #include <algorithm> |
72 | #include <cassert> |
73 | #include <cstdint> |
74 | #include <iterator> |
75 | #include <utility> |
76 | |
77 | using namespace llvm; |
78 | |
79 | namespace { |
80 | |
81 | class AArch64FastISel final : public FastISel { |
82 | class Address { |
83 | public: |
84 | using BaseKind = enum { |
85 | RegBase, |
86 | FrameIndexBase |
87 | }; |
88 | |
89 | private: |
90 | BaseKind Kind = RegBase; |
91 | AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
92 | union { |
93 | unsigned Reg; |
94 | int FI; |
95 | } Base; |
96 | unsigned OffsetReg = 0; |
97 | unsigned Shift = 0; |
98 | int64_t Offset = 0; |
99 | const GlobalValue *GV = nullptr; |
100 | |
101 | public: |
102 | Address() { Base.Reg = 0; } |
103 | |
104 | void setKind(BaseKind K) { Kind = K; } |
105 | BaseKind getKind() const { return Kind; } |
106 | void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
107 | AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
108 | bool isRegBase() const { return Kind == RegBase; } |
109 | bool isFIBase() const { return Kind == FrameIndexBase; } |
110 | |
111 | void setReg(unsigned Reg) { |
112 | assert(isRegBase() && "Invalid base register access!" ); |
113 | Base.Reg = Reg; |
114 | } |
115 | |
116 | unsigned getReg() const { |
117 | assert(isRegBase() && "Invalid base register access!" ); |
118 | return Base.Reg; |
119 | } |
120 | |
121 | void setOffsetReg(unsigned Reg) { |
122 | OffsetReg = Reg; |
123 | } |
124 | |
125 | unsigned getOffsetReg() const { |
126 | return OffsetReg; |
127 | } |
128 | |
129 | void setFI(unsigned FI) { |
130 | assert(isFIBase() && "Invalid base frame index access!" ); |
131 | Base.FI = FI; |
132 | } |
133 | |
134 | unsigned getFI() const { |
135 | assert(isFIBase() && "Invalid base frame index access!" ); |
136 | return Base.FI; |
137 | } |
138 | |
139 | void setOffset(int64_t O) { Offset = O; } |
140 | int64_t getOffset() { return Offset; } |
141 | void setShift(unsigned S) { Shift = S; } |
142 | unsigned getShift() { return Shift; } |
143 | |
144 | void setGlobalValue(const GlobalValue *G) { GV = G; } |
145 | const GlobalValue *getGlobalValue() { return GV; } |
146 | }; |
147 | |
148 | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
149 | /// make the right decision when generating code for different targets. |
150 | const AArch64Subtarget *Subtarget; |
151 | LLVMContext *Context; |
152 | |
153 | bool fastLowerArguments() override; |
154 | bool fastLowerCall(CallLoweringInfo &CLI) override; |
155 | bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
156 | |
157 | private: |
158 | // Selection routines. |
159 | bool selectAddSub(const Instruction *I); |
160 | bool selectLogicalOp(const Instruction *I); |
161 | bool selectLoad(const Instruction *I); |
162 | bool selectStore(const Instruction *I); |
163 | bool selectBranch(const Instruction *I); |
164 | bool selectIndirectBr(const Instruction *I); |
165 | bool selectCmp(const Instruction *I); |
166 | bool selectSelect(const Instruction *I); |
167 | bool selectFPExt(const Instruction *I); |
168 | bool selectFPTrunc(const Instruction *I); |
169 | bool selectFPToInt(const Instruction *I, bool Signed); |
170 | bool selectIntToFP(const Instruction *I, bool Signed); |
171 | bool selectRem(const Instruction *I, unsigned ISDOpcode); |
172 | bool selectRet(const Instruction *I); |
173 | bool selectTrunc(const Instruction *I); |
174 | bool selectIntExt(const Instruction *I); |
175 | bool selectMul(const Instruction *I); |
176 | bool selectShift(const Instruction *I); |
177 | bool selectBitCast(const Instruction *I); |
178 | bool selectFRem(const Instruction *I); |
179 | bool selectSDiv(const Instruction *I); |
180 | bool selectGetElementPtr(const Instruction *I); |
181 | bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
182 | |
183 | // Utility helper routines. |
184 | bool isTypeLegal(Type *Ty, MVT &VT); |
185 | bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
186 | bool isValueAvailable(const Value *V) const; |
187 | bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
188 | bool computeCallAddress(const Value *V, Address &Addr); |
189 | bool simplifyAddress(Address &Addr, MVT VT); |
190 | void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
191 | MachineMemOperand::Flags Flags, |
192 | unsigned ScaleFactor, MachineMemOperand *MMO); |
193 | bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); |
194 | bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
195 | MaybeAlign Alignment); |
196 | bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
197 | const Value *Cond); |
198 | bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
199 | bool optimizeSelect(const SelectInst *SI); |
200 | unsigned getRegForGEPIndex(const Value *Idx); |
201 | |
202 | // Emit helper routines. |
203 | unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
204 | const Value *RHS, bool SetFlags = false, |
205 | bool WantResult = true, bool IsZExt = false); |
206 | unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
207 | unsigned RHSReg, bool SetFlags = false, |
208 | bool WantResult = true); |
209 | unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
210 | uint64_t Imm, bool SetFlags = false, |
211 | bool WantResult = true); |
212 | unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
213 | unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, |
214 | uint64_t ShiftImm, bool SetFlags = false, |
215 | bool WantResult = true); |
216 | unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
217 | unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, |
218 | uint64_t ShiftImm, bool SetFlags = false, |
219 | bool WantResult = true); |
220 | |
221 | // Emit functions. |
222 | bool emitCompareAndBranch(const BranchInst *BI); |
223 | bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
224 | bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
225 | bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); |
226 | bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
227 | unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
228 | MachineMemOperand *MMO = nullptr); |
229 | bool emitStore(MVT VT, unsigned SrcReg, Address Addr, |
230 | MachineMemOperand *MMO = nullptr); |
231 | bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, |
232 | MachineMemOperand *MMO = nullptr); |
233 | unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); |
234 | unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); |
235 | unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
236 | bool SetFlags = false, bool WantResult = true, |
237 | bool IsZExt = false); |
238 | unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); |
239 | unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
240 | bool SetFlags = false, bool WantResult = true, |
241 | bool IsZExt = false); |
242 | unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, |
243 | bool WantResult = true); |
244 | unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, |
245 | AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
246 | bool WantResult = true); |
247 | unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
248 | const Value *RHS); |
249 | unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
250 | uint64_t Imm); |
251 | unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
252 | unsigned RHSReg, uint64_t ShiftImm); |
253 | unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); |
254 | unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
255 | unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
256 | unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
257 | unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
258 | unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
259 | bool IsZExt = true); |
260 | unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
261 | unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
262 | bool IsZExt = true); |
263 | unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
264 | unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
265 | bool IsZExt = false); |
266 | |
267 | unsigned materializeInt(const ConstantInt *CI, MVT VT); |
268 | unsigned materializeFP(const ConstantFP *CFP, MVT VT); |
269 | unsigned materializeGV(const GlobalValue *GV); |
270 | |
271 | // Call handling routines. |
272 | private: |
273 | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
274 | bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
275 | unsigned &NumBytes); |
276 | bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); |
277 | |
278 | public: |
279 | // Backend specific FastISel code. |
280 | unsigned fastMaterializeAlloca(const AllocaInst *AI) override; |
281 | unsigned fastMaterializeConstant(const Constant *C) override; |
282 | unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; |
283 | |
284 | explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
285 | const TargetLibraryInfo *LibInfo) |
286 | : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
287 | Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); |
288 | Context = &FuncInfo.Fn->getContext(); |
289 | } |
290 | |
291 | bool fastSelectInstruction(const Instruction *I) override; |
292 | |
293 | #include "AArch64GenFastISel.inc" |
294 | }; |
295 | |
296 | } // end anonymous namespace |
297 | |
298 | /// Check if the sign-/zero-extend will be a noop. |
299 | static bool isIntExtFree(const Instruction *I) { |
300 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
301 | "Unexpected integer extend instruction." ); |
302 | assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
303 | "Unexpected value type." ); |
304 | bool IsZExt = isa<ZExtInst>(Val: I); |
305 | |
306 | if (const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0))) |
307 | if (LI->hasOneUse()) |
308 | return true; |
309 | |
310 | if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) |
311 | if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) |
312 | return true; |
313 | |
314 | return false; |
315 | } |
316 | |
317 | /// Determine the implicit scale factor that is applied by a memory |
318 | /// operation for a given value type. |
319 | static unsigned getImplicitScaleFactor(MVT VT) { |
320 | switch (VT.SimpleTy) { |
321 | default: |
322 | return 0; // invalid |
323 | case MVT::i1: // fall-through |
324 | case MVT::i8: |
325 | return 1; |
326 | case MVT::i16: |
327 | return 2; |
328 | case MVT::i32: // fall-through |
329 | case MVT::f32: |
330 | return 4; |
331 | case MVT::i64: // fall-through |
332 | case MVT::f64: |
333 | return 8; |
334 | } |
335 | } |
336 | |
337 | CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
338 | if (CC == CallingConv::GHC) |
339 | return CC_AArch64_GHC; |
340 | if (CC == CallingConv::CFGuard_Check) |
341 | return CC_AArch64_Win64_CFGuard_Check; |
342 | if (Subtarget->isTargetDarwin()) |
343 | return CC_AArch64_DarwinPCS; |
344 | if (Subtarget->isTargetWindows()) |
345 | return CC_AArch64_Win64PCS; |
346 | return CC_AArch64_AAPCS; |
347 | } |
348 | |
349 | unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
350 | assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
351 | "Alloca should always return a pointer." ); |
352 | |
353 | // Don't handle dynamic allocas. |
354 | if (!FuncInfo.StaticAllocaMap.count(Val: AI)) |
355 | return 0; |
356 | |
357 | DenseMap<const AllocaInst *, int>::iterator SI = |
358 | FuncInfo.StaticAllocaMap.find(Val: AI); |
359 | |
360 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
361 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
362 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: ADDXri), |
363 | ResultReg) |
364 | .addFrameIndex(SI->second) |
365 | .addImm(0) |
366 | .addImm(0); |
367 | return ResultReg; |
368 | } |
369 | |
370 | return 0; |
371 | } |
372 | |
373 | unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
374 | if (VT > MVT::i64) |
375 | return 0; |
376 | |
377 | if (!CI->isZero()) |
378 | return fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm: CI->getZExtValue()); |
379 | |
380 | // Create a copy from the zero register to materialize a "0" value. |
381 | const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass |
382 | : &AArch64::GPR32RegClass; |
383 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
384 | Register ResultReg = createResultReg(RC); |
385 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY), |
386 | DestReg: ResultReg).addReg(RegNo: ZeroReg, flags: getKillRegState(B: true)); |
387 | return ResultReg; |
388 | } |
389 | |
390 | unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
391 | // Positive zero (+0.0) has to be materialized with a fmov from the zero |
392 | // register, because the immediate version of fmov cannot encode zero. |
393 | if (CFP->isNullValue()) |
394 | return fastMaterializeFloatZero(CF: CFP); |
395 | |
396 | if (VT != MVT::f32 && VT != MVT::f64) |
397 | return 0; |
398 | |
399 | const APFloat Val = CFP->getValueAPF(); |
400 | bool Is64Bit = (VT == MVT::f64); |
401 | // This checks to see if we can use FMOV instructions to materialize |
402 | // a constant, otherwise we have to materialize via the constant pool. |
403 | int Imm = |
404 | Is64Bit ? AArch64_AM::getFP64Imm(FPImm: Val) : AArch64_AM::getFP32Imm(FPImm: Val); |
405 | if (Imm != -1) { |
406 | unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; |
407 | return fastEmitInst_i(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Imm); |
408 | } |
409 | |
410 | // For the large code model materialize the FP constant in code. |
411 | if (TM.getCodeModel() == CodeModel::Large) { |
412 | unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
413 | const TargetRegisterClass *RC = Is64Bit ? |
414 | &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
415 | |
416 | Register TmpReg = createResultReg(RC); |
417 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc1), DestReg: TmpReg) |
418 | .addImm(Val: CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
419 | |
420 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
421 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
422 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
423 | .addReg(RegNo: TmpReg, flags: getKillRegState(B: true)); |
424 | |
425 | return ResultReg; |
426 | } |
427 | |
428 | // Materialize via constant pool. MachineConstantPool wants an explicit |
429 | // alignment. |
430 | Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType()); |
431 | |
432 | unsigned CPI = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment); |
433 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
434 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
435 | DestReg: ADRPReg).addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGE); |
436 | |
437 | unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; |
438 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
439 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
440 | .addReg(RegNo: ADRPReg) |
441 | .addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
442 | return ResultReg; |
443 | } |
444 | |
445 | unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { |
446 | // We can't handle thread-local variables quickly yet. |
447 | if (GV->isThreadLocal()) |
448 | return 0; |
449 | |
450 | // MachO still uses GOT for large code-model accesses, but ELF requires |
451 | // movz/movk sequences, which FastISel doesn't handle yet. |
452 | if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) |
453 | return 0; |
454 | |
455 | unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
456 | |
457 | EVT DestEVT = TLI.getValueType(DL, Ty: GV->getType(), AllowUnknown: true); |
458 | if (!DestEVT.isSimple()) |
459 | return 0; |
460 | |
461 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
462 | unsigned ResultReg; |
463 | |
464 | if (OpFlags & AArch64II::MO_GOT) { |
465 | // ADRP + LDRX |
466 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
467 | DestReg: ADRPReg) |
468 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags); |
469 | |
470 | unsigned LdrOpc; |
471 | if (Subtarget->isTargetILP32()) { |
472 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
473 | LdrOpc = AArch64::LDRWui; |
474 | } else { |
475 | ResultReg = createResultReg(RC: &AArch64::GPR64RegClass); |
476 | LdrOpc = AArch64::LDRXui; |
477 | } |
478 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: LdrOpc), |
479 | DestReg: ResultReg) |
480 | .addReg(RegNo: ADRPReg) |
481 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | |
482 | AArch64II::MO_NC | OpFlags); |
483 | if (!Subtarget->isTargetILP32()) |
484 | return ResultReg; |
485 | |
486 | // LDRWui produces a 32-bit register, but pointers in-register are 64-bits |
487 | // so we must extend the result on ILP32. |
488 | Register Result64 = createResultReg(RC: &AArch64::GPR64RegClass); |
489 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
490 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
491 | .addDef(RegNo: Result64) |
492 | .addImm(Val: 0) |
493 | .addReg(RegNo: ResultReg, flags: RegState::Kill) |
494 | .addImm(AArch64::Val: sub_32); |
495 | return Result64; |
496 | } else { |
497 | // ADRP + ADDX |
498 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
499 | DestReg: ADRPReg) |
500 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags); |
501 | |
502 | if (OpFlags & AArch64II::MO_TAGGED) { |
503 | // MO_TAGGED on the page indicates a tagged address. Set the tag now. |
504 | // We do so by creating a MOVK that sets bits 48-63 of the register to |
505 | // (global address + 0x100000000 - PC) >> 48. This assumes that we're in |
506 | // the small code model so we can assume a binary size of <= 4GB, which |
507 | // makes the untagged PC relative offset positive. The binary must also be |
508 | // loaded into address range [0, 2^48). Both of these properties need to |
509 | // be ensured at runtime when using tagged addresses. |
510 | // |
511 | // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that |
512 | // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands |
513 | // are not exactly 1:1 with FastISel so we cannot easily abstract this |
514 | // out. At some point, it would be nice to find a way to not have this |
515 | // duplciate code. |
516 | unsigned DstReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
517 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: MOVKXi), |
518 | DstReg) |
519 | .addReg(ADRPReg) |
520 | .addGlobalAddress(GV, /*Offset=*/0x100000000, |
521 | AArch64II::MO_PREL | AArch64II::MO_G3) |
522 | .addImm(48); |
523 | ADRPReg = DstReg; |
524 | } |
525 | |
526 | ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
527 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Opcode: ADDXri), |
528 | ResultReg) |
529 | .addReg(ADRPReg) |
530 | .addGlobalAddress(GV, 0, |
531 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) |
532 | .addImm(0); |
533 | } |
534 | return ResultReg; |
535 | } |
536 | |
537 | unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
538 | EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true); |
539 | |
540 | // Only handle simple types. |
541 | if (!CEVT.isSimple()) |
542 | return 0; |
543 | MVT VT = CEVT.getSimpleVT(); |
544 | // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, |
545 | // 'null' pointers need to have a somewhat special treatment. |
546 | if (isa<ConstantPointerNull>(Val: C)) { |
547 | assert(VT == MVT::i64 && "Expected 64-bit pointers" ); |
548 | return materializeInt(CI: ConstantInt::get(Ty: Type::getInt64Ty(C&: *Context), V: 0), VT); |
549 | } |
550 | |
551 | if (const auto *CI = dyn_cast<ConstantInt>(Val: C)) |
552 | return materializeInt(CI, VT); |
553 | else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) |
554 | return materializeFP(CFP, VT); |
555 | else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C)) |
556 | return materializeGV(GV); |
557 | |
558 | return 0; |
559 | } |
560 | |
561 | unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { |
562 | assert(CFP->isNullValue() && |
563 | "Floating-point constant is not a positive zero." ); |
564 | MVT VT; |
565 | if (!isTypeLegal(Ty: CFP->getType(), VT)) |
566 | return 0; |
567 | |
568 | if (VT != MVT::f32 && VT != MVT::f64) |
569 | return 0; |
570 | |
571 | bool Is64Bit = (VT == MVT::f64); |
572 | unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
573 | unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; |
574 | return fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Op0: ZReg); |
575 | } |
576 | |
577 | /// Check if the multiply is by a power-of-2 constant. |
578 | static bool isMulPowOf2(const Value *I) { |
579 | if (const auto *MI = dyn_cast<MulOperator>(Val: I)) { |
580 | if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 0))) |
581 | if (C->getValue().isPowerOf2()) |
582 | return true; |
583 | if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 1))) |
584 | if (C->getValue().isPowerOf2()) |
585 | return true; |
586 | } |
587 | return false; |
588 | } |
589 | |
590 | // Computes the address to get to an object. |
591 | bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
592 | { |
593 | const User *U = nullptr; |
594 | unsigned Opcode = Instruction::UserOp1; |
595 | if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) { |
596 | // Don't walk into other basic blocks unless the object is an alloca from |
597 | // another block, otherwise it may not have a virtual register assigned. |
598 | if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) || |
599 | FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
600 | Opcode = I->getOpcode(); |
601 | U = I; |
602 | } |
603 | } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) { |
604 | Opcode = C->getOpcode(); |
605 | U = C; |
606 | } |
607 | |
608 | if (auto *Ty = dyn_cast<PointerType>(Val: Obj->getType())) |
609 | if (Ty->getAddressSpace() > 255) |
610 | // Fast instruction selection doesn't support the special |
611 | // address spaces. |
612 | return false; |
613 | |
614 | switch (Opcode) { |
615 | default: |
616 | break; |
617 | case Instruction::BitCast: |
618 | // Look through bitcasts. |
619 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
620 | |
621 | case Instruction::IntToPtr: |
622 | // Look past no-op inttoptrs. |
623 | if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) == |
624 | TLI.getPointerTy(DL)) |
625 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
626 | break; |
627 | |
628 | case Instruction::PtrToInt: |
629 | // Look past no-op ptrtoints. |
630 | if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL)) |
631 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
632 | break; |
633 | |
634 | case Instruction::GetElementPtr: { |
635 | Address SavedAddr = Addr; |
636 | uint64_t TmpOffset = Addr.getOffset(); |
637 | |
638 | // Iterate through the GEP folding the constants into offsets where |
639 | // we can. |
640 | for (gep_type_iterator GTI = gep_type_begin(GEP: U), E = gep_type_end(GEP: U); |
641 | GTI != E; ++GTI) { |
642 | const Value *Op = GTI.getOperand(); |
643 | if (StructType *STy = GTI.getStructTypeOrNull()) { |
644 | const StructLayout *SL = DL.getStructLayout(Ty: STy); |
645 | unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue(); |
646 | TmpOffset += SL->getElementOffset(Idx); |
647 | } else { |
648 | uint64_t S = GTI.getSequentialElementStride(DL); |
649 | while (true) { |
650 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) { |
651 | // Constant-offset addressing. |
652 | TmpOffset += CI->getSExtValue() * S; |
653 | break; |
654 | } |
655 | if (canFoldAddIntoGEP(GEP: U, Add: Op)) { |
656 | // A compatible add with a constant operand. Fold the constant. |
657 | ConstantInt *CI = |
658 | cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1)); |
659 | TmpOffset += CI->getSExtValue() * S; |
660 | // Iterate on the other operand. |
661 | Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0); |
662 | continue; |
663 | } |
664 | // Unsupported |
665 | goto unsupported_gep; |
666 | } |
667 | } |
668 | } |
669 | |
670 | // Try to grab the base operand now. |
671 | Addr.setOffset(TmpOffset); |
672 | if (computeAddress(Obj: U->getOperand(i: 0), Addr, Ty)) |
673 | return true; |
674 | |
675 | // We failed, restore everything and try the other options. |
676 | Addr = SavedAddr; |
677 | |
678 | unsupported_gep: |
679 | break; |
680 | } |
681 | case Instruction::Alloca: { |
682 | const AllocaInst *AI = cast<AllocaInst>(Val: Obj); |
683 | DenseMap<const AllocaInst *, int>::iterator SI = |
684 | FuncInfo.StaticAllocaMap.find(Val: AI); |
685 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
686 | Addr.setKind(Address::FrameIndexBase); |
687 | Addr.setFI(SI->second); |
688 | return true; |
689 | } |
690 | break; |
691 | } |
692 | case Instruction::Add: { |
693 | // Adds of constants are common and easy enough. |
694 | const Value *LHS = U->getOperand(i: 0); |
695 | const Value *RHS = U->getOperand(i: 1); |
696 | |
697 | if (isa<ConstantInt>(Val: LHS)) |
698 | std::swap(a&: LHS, b&: RHS); |
699 | |
700 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) { |
701 | Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
702 | return computeAddress(Obj: LHS, Addr, Ty); |
703 | } |
704 | |
705 | Address Backup = Addr; |
706 | if (computeAddress(Obj: LHS, Addr, Ty) && computeAddress(Obj: RHS, Addr, Ty)) |
707 | return true; |
708 | Addr = Backup; |
709 | |
710 | break; |
711 | } |
712 | case Instruction::Sub: { |
713 | // Subs of constants are common and easy enough. |
714 | const Value *LHS = U->getOperand(i: 0); |
715 | const Value *RHS = U->getOperand(i: 1); |
716 | |
717 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) { |
718 | Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
719 | return computeAddress(Obj: LHS, Addr, Ty); |
720 | } |
721 | break; |
722 | } |
723 | case Instruction::Shl: { |
724 | if (Addr.getOffsetReg()) |
725 | break; |
726 | |
727 | const auto *CI = dyn_cast<ConstantInt>(Val: U->getOperand(i: 1)); |
728 | if (!CI) |
729 | break; |
730 | |
731 | unsigned Val = CI->getZExtValue(); |
732 | if (Val < 1 || Val > 3) |
733 | break; |
734 | |
735 | uint64_t NumBytes = 0; |
736 | if (Ty && Ty->isSized()) { |
737 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
738 | NumBytes = NumBits / 8; |
739 | if (!isPowerOf2_64(Value: NumBits)) |
740 | NumBytes = 0; |
741 | } |
742 | |
743 | if (NumBytes != (1ULL << Val)) |
744 | break; |
745 | |
746 | Addr.setShift(Val); |
747 | Addr.setExtendType(AArch64_AM::LSL); |
748 | |
749 | const Value *Src = U->getOperand(i: 0); |
750 | if (const auto *I = dyn_cast<Instruction>(Val: Src)) { |
751 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
752 | // Fold the zext or sext when it won't become a noop. |
753 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) { |
754 | if (!isIntExtFree(I: ZE) && |
755 | ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
756 | Addr.setExtendType(AArch64_AM::UXTW); |
757 | Src = ZE->getOperand(i_nocapture: 0); |
758 | } |
759 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) { |
760 | if (!isIntExtFree(I: SE) && |
761 | SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
762 | Addr.setExtendType(AArch64_AM::SXTW); |
763 | Src = SE->getOperand(i_nocapture: 0); |
764 | } |
765 | } |
766 | } |
767 | } |
768 | |
769 | if (const auto *AI = dyn_cast<BinaryOperator>(Val: Src)) |
770 | if (AI->getOpcode() == Instruction::And) { |
771 | const Value *LHS = AI->getOperand(i_nocapture: 0); |
772 | const Value *RHS = AI->getOperand(i_nocapture: 1); |
773 | |
774 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
775 | if (C->getValue() == 0xffffffff) |
776 | std::swap(a&: LHS, b&: RHS); |
777 | |
778 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
779 | if (C->getValue() == 0xffffffff) { |
780 | Addr.setExtendType(AArch64_AM::UXTW); |
781 | Register Reg = getRegForValue(V: LHS); |
782 | if (!Reg) |
783 | return false; |
784 | Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); |
785 | Addr.setOffsetReg(Reg); |
786 | return true; |
787 | } |
788 | } |
789 | |
790 | Register Reg = getRegForValue(V: Src); |
791 | if (!Reg) |
792 | return false; |
793 | Addr.setOffsetReg(Reg); |
794 | return true; |
795 | } |
796 | case Instruction::Mul: { |
797 | if (Addr.getOffsetReg()) |
798 | break; |
799 | |
800 | if (!isMulPowOf2(I: U)) |
801 | break; |
802 | |
803 | const Value *LHS = U->getOperand(i: 0); |
804 | const Value *RHS = U->getOperand(i: 1); |
805 | |
806 | // Canonicalize power-of-2 value to the RHS. |
807 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
808 | if (C->getValue().isPowerOf2()) |
809 | std::swap(a&: LHS, b&: RHS); |
810 | |
811 | assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt." ); |
812 | const auto *C = cast<ConstantInt>(Val: RHS); |
813 | unsigned Val = C->getValue().logBase2(); |
814 | if (Val < 1 || Val > 3) |
815 | break; |
816 | |
817 | uint64_t NumBytes = 0; |
818 | if (Ty && Ty->isSized()) { |
819 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
820 | NumBytes = NumBits / 8; |
821 | if (!isPowerOf2_64(Value: NumBits)) |
822 | NumBytes = 0; |
823 | } |
824 | |
825 | if (NumBytes != (1ULL << Val)) |
826 | break; |
827 | |
828 | Addr.setShift(Val); |
829 | Addr.setExtendType(AArch64_AM::LSL); |
830 | |
831 | const Value *Src = LHS; |
832 | if (const auto *I = dyn_cast<Instruction>(Val: Src)) { |
833 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
834 | // Fold the zext or sext when it won't become a noop. |
835 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) { |
836 | if (!isIntExtFree(I: ZE) && |
837 | ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
838 | Addr.setExtendType(AArch64_AM::UXTW); |
839 | Src = ZE->getOperand(i_nocapture: 0); |
840 | } |
841 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) { |
842 | if (!isIntExtFree(I: SE) && |
843 | SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
844 | Addr.setExtendType(AArch64_AM::SXTW); |
845 | Src = SE->getOperand(i_nocapture: 0); |
846 | } |
847 | } |
848 | } |
849 | } |
850 | |
851 | Register Reg = getRegForValue(V: Src); |
852 | if (!Reg) |
853 | return false; |
854 | Addr.setOffsetReg(Reg); |
855 | return true; |
856 | } |
857 | case Instruction::And: { |
858 | if (Addr.getOffsetReg()) |
859 | break; |
860 | |
861 | if (!Ty || DL.getTypeSizeInBits(Ty) != 8) |
862 | break; |
863 | |
864 | const Value *LHS = U->getOperand(i: 0); |
865 | const Value *RHS = U->getOperand(i: 1); |
866 | |
867 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
868 | if (C->getValue() == 0xffffffff) |
869 | std::swap(a&: LHS, b&: RHS); |
870 | |
871 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
872 | if (C->getValue() == 0xffffffff) { |
873 | Addr.setShift(0); |
874 | Addr.setExtendType(AArch64_AM::LSL); |
875 | Addr.setExtendType(AArch64_AM::UXTW); |
876 | |
877 | Register Reg = getRegForValue(V: LHS); |
878 | if (!Reg) |
879 | return false; |
880 | Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); |
881 | Addr.setOffsetReg(Reg); |
882 | return true; |
883 | } |
884 | break; |
885 | } |
886 | case Instruction::SExt: |
887 | case Instruction::ZExt: { |
888 | if (!Addr.getReg() || Addr.getOffsetReg()) |
889 | break; |
890 | |
891 | const Value *Src = nullptr; |
892 | // Fold the zext or sext when it won't become a noop. |
893 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: U)) { |
894 | if (!isIntExtFree(I: ZE) && ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
895 | Addr.setExtendType(AArch64_AM::UXTW); |
896 | Src = ZE->getOperand(i_nocapture: 0); |
897 | } |
898 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: U)) { |
899 | if (!isIntExtFree(I: SE) && SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
900 | Addr.setExtendType(AArch64_AM::SXTW); |
901 | Src = SE->getOperand(i_nocapture: 0); |
902 | } |
903 | } |
904 | |
905 | if (!Src) |
906 | break; |
907 | |
908 | Addr.setShift(0); |
909 | Register Reg = getRegForValue(V: Src); |
910 | if (!Reg) |
911 | return false; |
912 | Addr.setOffsetReg(Reg); |
913 | return true; |
914 | } |
915 | } // end switch |
916 | |
917 | if (Addr.isRegBase() && !Addr.getReg()) { |
918 | Register Reg = getRegForValue(V: Obj); |
919 | if (!Reg) |
920 | return false; |
921 | Addr.setReg(Reg); |
922 | return true; |
923 | } |
924 | |
925 | if (!Addr.getOffsetReg()) { |
926 | Register Reg = getRegForValue(V: Obj); |
927 | if (!Reg) |
928 | return false; |
929 | Addr.setOffsetReg(Reg); |
930 | return true; |
931 | } |
932 | |
933 | return false; |
934 | } |
935 | |
936 | bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
937 | const User *U = nullptr; |
938 | unsigned Opcode = Instruction::UserOp1; |
939 | bool InMBB = true; |
940 | |
941 | if (const auto *I = dyn_cast<Instruction>(Val: V)) { |
942 | Opcode = I->getOpcode(); |
943 | U = I; |
944 | InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
945 | } else if (const auto *C = dyn_cast<ConstantExpr>(Val: V)) { |
946 | Opcode = C->getOpcode(); |
947 | U = C; |
948 | } |
949 | |
950 | switch (Opcode) { |
951 | default: break; |
952 | case Instruction::BitCast: |
953 | // Look past bitcasts if its operand is in the same BB. |
954 | if (InMBB) |
955 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
956 | break; |
957 | case Instruction::IntToPtr: |
958 | // Look past no-op inttoptrs if its operand is in the same BB. |
959 | if (InMBB && |
960 | TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) == |
961 | TLI.getPointerTy(DL)) |
962 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
963 | break; |
964 | case Instruction::PtrToInt: |
965 | // Look past no-op ptrtoints if its operand is in the same BB. |
966 | if (InMBB && TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL)) |
967 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
968 | break; |
969 | } |
970 | |
971 | if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) { |
972 | Addr.setGlobalValue(GV); |
973 | return true; |
974 | } |
975 | |
976 | // If all else fails, try to materialize the value in a register. |
977 | if (!Addr.getGlobalValue()) { |
978 | Addr.setReg(getRegForValue(V)); |
979 | return Addr.getReg() != 0; |
980 | } |
981 | |
982 | return false; |
983 | } |
984 | |
985 | bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
986 | EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true); |
987 | |
988 | if (Subtarget->isTargetILP32() && Ty->isPointerTy()) |
989 | return false; |
990 | |
991 | // Only handle simple types. |
992 | if (evt == MVT::Other || !evt.isSimple()) |
993 | return false; |
994 | VT = evt.getSimpleVT(); |
995 | |
996 | // This is a legal type, but it's not something we handle in fast-isel. |
997 | if (VT == MVT::f128) |
998 | return false; |
999 | |
1000 | // Handle all other legal types, i.e. a register that will directly hold this |
1001 | // value. |
1002 | return TLI.isTypeLegal(VT); |
1003 | } |
1004 | |
1005 | /// Determine if the value type is supported by FastISel. |
1006 | /// |
1007 | /// FastISel for AArch64 can handle more value types than are legal. This adds |
1008 | /// simple value type such as i1, i8, and i16. |
1009 | bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
1010 | if (Ty->isVectorTy() && !IsVectorAllowed) |
1011 | return false; |
1012 | |
1013 | if (isTypeLegal(Ty, VT)) |
1014 | return true; |
1015 | |
1016 | // If this is a type than can be sign or zero-extended to a basic operation |
1017 | // go ahead and accept it now. |
1018 | if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) |
1019 | return true; |
1020 | |
1021 | return false; |
1022 | } |
1023 | |
1024 | bool AArch64FastISel::isValueAvailable(const Value *V) const { |
1025 | if (!isa<Instruction>(Val: V)) |
1026 | return true; |
1027 | |
1028 | const auto *I = cast<Instruction>(Val: V); |
1029 | return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; |
1030 | } |
1031 | |
1032 | bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
1033 | if (Subtarget->isTargetILP32()) |
1034 | return false; |
1035 | |
1036 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1037 | if (!ScaleFactor) |
1038 | return false; |
1039 | |
1040 | bool ImmediateOffsetNeedsLowering = false; |
1041 | bool RegisterOffsetNeedsLowering = false; |
1042 | int64_t Offset = Addr.getOffset(); |
1043 | if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(x: Offset)) |
1044 | ImmediateOffsetNeedsLowering = true; |
1045 | else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && |
1046 | !isUInt<12>(x: Offset / ScaleFactor)) |
1047 | ImmediateOffsetNeedsLowering = true; |
1048 | |
1049 | // Cannot encode an offset register and an immediate offset in the same |
1050 | // instruction. Fold the immediate offset into the load/store instruction and |
1051 | // emit an additional add to take care of the offset register. |
1052 | if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) |
1053 | RegisterOffsetNeedsLowering = true; |
1054 | |
1055 | // Cannot encode zero register as base. |
1056 | if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) |
1057 | RegisterOffsetNeedsLowering = true; |
1058 | |
1059 | // If this is a stack pointer and the offset needs to be simplified then put |
1060 | // the alloca address into a register, set the base type back to register and |
1061 | // continue. This should almost never happen. |
1062 | if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) |
1063 | { |
1064 | Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
1065 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), |
1066 | ResultReg) |
1067 | .addFrameIndex(Addr.getFI()) |
1068 | .addImm(0) |
1069 | .addImm(0); |
1070 | Addr.setKind(Address::RegBase); |
1071 | Addr.setReg(ResultReg); |
1072 | } |
1073 | |
1074 | if (RegisterOffsetNeedsLowering) { |
1075 | unsigned ResultReg = 0; |
1076 | if (Addr.getReg()) { |
1077 | if (Addr.getExtendType() == AArch64_AM::SXTW || |
1078 | Addr.getExtendType() == AArch64_AM::UXTW ) |
1079 | ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
1080 | Addr.getOffsetReg(), Addr.getExtendType(), |
1081 | Addr.getShift()); |
1082 | else |
1083 | ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
1084 | Addr.getOffsetReg(), AArch64_AM::LSL, |
1085 | Addr.getShift()); |
1086 | } else { |
1087 | if (Addr.getExtendType() == AArch64_AM::UXTW) |
1088 | ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
1089 | Addr.getShift(), /*IsZExt=*/true); |
1090 | else if (Addr.getExtendType() == AArch64_AM::SXTW) |
1091 | ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
1092 | Addr.getShift(), /*IsZExt=*/false); |
1093 | else |
1094 | ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), |
1095 | Addr.getShift()); |
1096 | } |
1097 | if (!ResultReg) |
1098 | return false; |
1099 | |
1100 | Addr.setReg(ResultReg); |
1101 | Addr.setOffsetReg(0); |
1102 | Addr.setShift(0); |
1103 | Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
1104 | } |
1105 | |
1106 | // Since the offset is too large for the load/store instruction get the |
1107 | // reg+offset into a register. |
1108 | if (ImmediateOffsetNeedsLowering) { |
1109 | unsigned ResultReg; |
1110 | if (Addr.getReg()) |
1111 | // Try to fold the immediate into the add instruction. |
1112 | ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); |
1113 | else |
1114 | ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); |
1115 | |
1116 | if (!ResultReg) |
1117 | return false; |
1118 | Addr.setReg(ResultReg); |
1119 | Addr.setOffset(0); |
1120 | } |
1121 | return true; |
1122 | } |
1123 | |
1124 | void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
1125 | const MachineInstrBuilder &MIB, |
1126 | MachineMemOperand::Flags Flags, |
1127 | unsigned ScaleFactor, |
1128 | MachineMemOperand *MMO) { |
1129 | int64_t Offset = Addr.getOffset() / ScaleFactor; |
1130 | // Frame base works a bit differently. Handle it separately. |
1131 | if (Addr.isFIBase()) { |
1132 | int FI = Addr.getFI(); |
1133 | // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
1134 | // and alignment should be based on the VT. |
1135 | MMO = FuncInfo.MF->getMachineMemOperand( |
1136 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags, |
1137 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1138 | // Now add the rest of the operands. |
1139 | MIB.addFrameIndex(Idx: FI).addImm(Val: Offset); |
1140 | } else { |
1141 | assert(Addr.isRegBase() && "Unexpected address kind." ); |
1142 | const MCInstrDesc &II = MIB->getDesc(); |
1143 | unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; |
1144 | Addr.setReg( |
1145 | constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: II.getNumDefs()+Idx)); |
1146 | Addr.setOffsetReg( |
1147 | constrainOperandRegClass(II, Op: Addr.getOffsetReg(), OpNum: II.getNumDefs()+Idx+1)); |
1148 | if (Addr.getOffsetReg()) { |
1149 | assert(Addr.getOffset() == 0 && "Unexpected offset" ); |
1150 | bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
1151 | Addr.getExtendType() == AArch64_AM::SXTX; |
1152 | MIB.addReg(RegNo: Addr.getReg()); |
1153 | MIB.addReg(RegNo: Addr.getOffsetReg()); |
1154 | MIB.addImm(Val: IsSigned); |
1155 | MIB.addImm(Val: Addr.getShift() != 0); |
1156 | } else |
1157 | MIB.addReg(RegNo: Addr.getReg()).addImm(Val: Offset); |
1158 | } |
1159 | |
1160 | if (MMO) |
1161 | MIB.addMemOperand(MMO); |
1162 | } |
1163 | |
1164 | unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
1165 | const Value *RHS, bool SetFlags, |
1166 | bool WantResult, bool IsZExt) { |
1167 | AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
1168 | bool NeedExtend = false; |
1169 | switch (RetVT.SimpleTy) { |
1170 | default: |
1171 | return 0; |
1172 | case MVT::i1: |
1173 | NeedExtend = true; |
1174 | break; |
1175 | case MVT::i8: |
1176 | NeedExtend = true; |
1177 | ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; |
1178 | break; |
1179 | case MVT::i16: |
1180 | NeedExtend = true; |
1181 | ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; |
1182 | break; |
1183 | case MVT::i32: // fall-through |
1184 | case MVT::i64: |
1185 | break; |
1186 | } |
1187 | MVT SrcVT = RetVT; |
1188 | RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); |
1189 | |
1190 | // Canonicalize immediates to the RHS first. |
1191 | if (UseAdd && isa<Constant>(Val: LHS) && !isa<Constant>(Val: RHS)) |
1192 | std::swap(a&: LHS, b&: RHS); |
1193 | |
1194 | // Canonicalize mul by power of 2 to the RHS. |
1195 | if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1196 | if (isMulPowOf2(I: LHS)) |
1197 | std::swap(a&: LHS, b&: RHS); |
1198 | |
1199 | // Canonicalize shift immediate to the RHS. |
1200 | if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1201 | if (const auto *SI = dyn_cast<BinaryOperator>(Val: LHS)) |
1202 | if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) |
1203 | if (SI->getOpcode() == Instruction::Shl || |
1204 | SI->getOpcode() == Instruction::LShr || |
1205 | SI->getOpcode() == Instruction::AShr ) |
1206 | std::swap(a&: LHS, b&: RHS); |
1207 | |
1208 | Register LHSReg = getRegForValue(V: LHS); |
1209 | if (!LHSReg) |
1210 | return 0; |
1211 | |
1212 | if (NeedExtend) |
1213 | LHSReg = emitIntExt(SrcVT, SrcReg: LHSReg, DestVT: RetVT, isZExt: IsZExt); |
1214 | |
1215 | unsigned ResultReg = 0; |
1216 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) { |
1217 | uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); |
1218 | if (C->isNegative()) |
1219 | ResultReg = emitAddSub_ri(UseAdd: !UseAdd, RetVT, LHSReg, Imm: -Imm, SetFlags, |
1220 | WantResult); |
1221 | else |
1222 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, |
1223 | WantResult); |
1224 | } else if (const auto *C = dyn_cast<Constant>(Val: RHS)) |
1225 | if (C->isNullValue()) |
1226 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm: 0, SetFlags, WantResult); |
1227 | |
1228 | if (ResultReg) |
1229 | return ResultReg; |
1230 | |
1231 | // Only extend the RHS within the instruction if there is a valid extend type. |
1232 | if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && |
1233 | isValueAvailable(V: RHS)) { |
1234 | Register RHSReg = getRegForValue(V: RHS); |
1235 | if (!RHSReg) |
1236 | return 0; |
1237 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtType: ExtendType, ShiftImm: 0, |
1238 | SetFlags, WantResult); |
1239 | } |
1240 | |
1241 | // Check if the mul can be folded into the instruction. |
1242 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1243 | if (isMulPowOf2(I: RHS)) { |
1244 | const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0); |
1245 | const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1); |
1246 | |
1247 | if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS)) |
1248 | if (C->getValue().isPowerOf2()) |
1249 | std::swap(a&: MulLHS, b&: MulRHS); |
1250 | |
1251 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt." ); |
1252 | uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2(); |
1253 | Register RHSReg = getRegForValue(V: MulLHS); |
1254 | if (!RHSReg) |
1255 | return 0; |
1256 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType: AArch64_AM::LSL, |
1257 | ShiftImm: ShiftVal, SetFlags, WantResult); |
1258 | if (ResultReg) |
1259 | return ResultReg; |
1260 | } |
1261 | } |
1262 | |
1263 | // Check if the shift can be folded into the instruction. |
1264 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1265 | if (const auto *SI = dyn_cast<BinaryOperator>(Val: RHS)) { |
1266 | if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) { |
1267 | AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
1268 | switch (SI->getOpcode()) { |
1269 | default: break; |
1270 | case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; |
1271 | case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; |
1272 | case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; |
1273 | } |
1274 | uint64_t ShiftVal = C->getZExtValue(); |
1275 | if (ShiftType != AArch64_AM::InvalidShiftExtend) { |
1276 | Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0)); |
1277 | if (!RHSReg) |
1278 | return 0; |
1279 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, |
1280 | ShiftImm: ShiftVal, SetFlags, WantResult); |
1281 | if (ResultReg) |
1282 | return ResultReg; |
1283 | } |
1284 | } |
1285 | } |
1286 | } |
1287 | |
1288 | Register RHSReg = getRegForValue(V: RHS); |
1289 | if (!RHSReg) |
1290 | return 0; |
1291 | |
1292 | if (NeedExtend) |
1293 | RHSReg = emitIntExt(SrcVT, SrcReg: RHSReg, DestVT: RetVT, isZExt: IsZExt); |
1294 | |
1295 | return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); |
1296 | } |
1297 | |
1298 | unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1299 | unsigned RHSReg, bool SetFlags, |
1300 | bool WantResult) { |
1301 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1302 | |
1303 | if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || |
1304 | RHSReg == AArch64::SP || RHSReg == AArch64::WSP) |
1305 | return 0; |
1306 | |
1307 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1308 | return 0; |
1309 | |
1310 | static const unsigned OpcTable[2][2][2] = { |
1311 | { { AArch64::SUBWrr, AArch64::SUBXrr }, |
1312 | { AArch64::ADDWrr, AArch64::ADDXrr } }, |
1313 | { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
1314 | { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
1315 | }; |
1316 | bool Is64Bit = RetVT == MVT::i64; |
1317 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1318 | const TargetRegisterClass *RC = |
1319 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1320 | unsigned ResultReg; |
1321 | if (WantResult) |
1322 | ResultReg = createResultReg(RC); |
1323 | else |
1324 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1325 | |
1326 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1327 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1328 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1329 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1330 | .addReg(RegNo: LHSReg) |
1331 | .addReg(RegNo: RHSReg); |
1332 | return ResultReg; |
1333 | } |
1334 | |
1335 | unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1336 | uint64_t Imm, bool SetFlags, |
1337 | bool WantResult) { |
1338 | assert(LHSReg && "Invalid register number." ); |
1339 | |
1340 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1341 | return 0; |
1342 | |
1343 | unsigned ShiftImm; |
1344 | if (isUInt<12>(x: Imm)) |
1345 | ShiftImm = 0; |
1346 | else if ((Imm & 0xfff000) == Imm) { |
1347 | ShiftImm = 12; |
1348 | Imm >>= 12; |
1349 | } else |
1350 | return 0; |
1351 | |
1352 | static const unsigned OpcTable[2][2][2] = { |
1353 | { { AArch64::SUBWri, AArch64::SUBXri }, |
1354 | { AArch64::ADDWri, AArch64::ADDXri } }, |
1355 | { { AArch64::SUBSWri, AArch64::SUBSXri }, |
1356 | { AArch64::ADDSWri, AArch64::ADDSXri } } |
1357 | }; |
1358 | bool Is64Bit = RetVT == MVT::i64; |
1359 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1360 | const TargetRegisterClass *RC; |
1361 | if (SetFlags) |
1362 | RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1363 | else |
1364 | RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
1365 | unsigned ResultReg; |
1366 | if (WantResult) |
1367 | ResultReg = createResultReg(RC); |
1368 | else |
1369 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1370 | |
1371 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1372 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1373 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1374 | .addReg(RegNo: LHSReg) |
1375 | .addImm(Val: Imm) |
1376 | .addImm(Val: getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm)); |
1377 | return ResultReg; |
1378 | } |
1379 | |
1380 | unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1381 | unsigned RHSReg, |
1382 | AArch64_AM::ShiftExtendType ShiftType, |
1383 | uint64_t ShiftImm, bool SetFlags, |
1384 | bool WantResult) { |
1385 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1386 | assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
1387 | RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
1388 | |
1389 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1390 | return 0; |
1391 | |
1392 | // Don't deal with undefined shifts. |
1393 | if (ShiftImm >= RetVT.getSizeInBits()) |
1394 | return 0; |
1395 | |
1396 | static const unsigned OpcTable[2][2][2] = { |
1397 | { { AArch64::SUBWrs, AArch64::SUBXrs }, |
1398 | { AArch64::ADDWrs, AArch64::ADDXrs } }, |
1399 | { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
1400 | { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
1401 | }; |
1402 | bool Is64Bit = RetVT == MVT::i64; |
1403 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1404 | const TargetRegisterClass *RC = |
1405 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1406 | unsigned ResultReg; |
1407 | if (WantResult) |
1408 | ResultReg = createResultReg(RC); |
1409 | else |
1410 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1411 | |
1412 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1413 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1414 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1415 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1416 | .addReg(RegNo: LHSReg) |
1417 | .addReg(RegNo: RHSReg) |
1418 | .addImm(Val: getShifterImm(ST: ShiftType, Imm: ShiftImm)); |
1419 | return ResultReg; |
1420 | } |
1421 | |
1422 | unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1423 | unsigned RHSReg, |
1424 | AArch64_AM::ShiftExtendType ExtType, |
1425 | uint64_t ShiftImm, bool SetFlags, |
1426 | bool WantResult) { |
1427 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1428 | assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
1429 | RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
1430 | |
1431 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1432 | return 0; |
1433 | |
1434 | if (ShiftImm >= 4) |
1435 | return 0; |
1436 | |
1437 | static const unsigned OpcTable[2][2][2] = { |
1438 | { { AArch64::SUBWrx, AArch64::SUBXrx }, |
1439 | { AArch64::ADDWrx, AArch64::ADDXrx } }, |
1440 | { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
1441 | { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
1442 | }; |
1443 | bool Is64Bit = RetVT == MVT::i64; |
1444 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1445 | const TargetRegisterClass *RC = nullptr; |
1446 | if (SetFlags) |
1447 | RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1448 | else |
1449 | RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
1450 | unsigned ResultReg; |
1451 | if (WantResult) |
1452 | ResultReg = createResultReg(RC); |
1453 | else |
1454 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1455 | |
1456 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1457 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1458 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1459 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1460 | .addReg(RegNo: LHSReg) |
1461 | .addReg(RegNo: RHSReg) |
1462 | .addImm(Val: getArithExtendImm(ET: ExtType, Imm: ShiftImm)); |
1463 | return ResultReg; |
1464 | } |
1465 | |
1466 | bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
1467 | Type *Ty = LHS->getType(); |
1468 | EVT EVT = TLI.getValueType(DL, Ty, AllowUnknown: true); |
1469 | if (!EVT.isSimple()) |
1470 | return false; |
1471 | MVT VT = EVT.getSimpleVT(); |
1472 | |
1473 | switch (VT.SimpleTy) { |
1474 | default: |
1475 | return false; |
1476 | case MVT::i1: |
1477 | case MVT::i8: |
1478 | case MVT::i16: |
1479 | case MVT::i32: |
1480 | case MVT::i64: |
1481 | return emitICmp(RetVT: VT, LHS, RHS, IsZExt); |
1482 | case MVT::f32: |
1483 | case MVT::f64: |
1484 | return emitFCmp(RetVT: VT, LHS, RHS); |
1485 | } |
1486 | } |
1487 | |
1488 | bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
1489 | bool IsZExt) { |
1490 | return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
1491 | IsZExt) != 0; |
1492 | } |
1493 | |
1494 | bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { |
1495 | return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, |
1496 | /*SetFlags=*/true, /*WantResult=*/false) != 0; |
1497 | } |
1498 | |
1499 | bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
1500 | if (RetVT != MVT::f32 && RetVT != MVT::f64) |
1501 | return false; |
1502 | |
1503 | // Check to see if the 2nd operand is a constant that we can encode directly |
1504 | // in the compare. |
1505 | bool UseImm = false; |
1506 | if (const auto *CFP = dyn_cast<ConstantFP>(Val: RHS)) |
1507 | if (CFP->isZero() && !CFP->isNegative()) |
1508 | UseImm = true; |
1509 | |
1510 | Register LHSReg = getRegForValue(V: LHS); |
1511 | if (!LHSReg) |
1512 | return false; |
1513 | |
1514 | if (UseImm) { |
1515 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; |
1516 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
1517 | .addReg(RegNo: LHSReg); |
1518 | return true; |
1519 | } |
1520 | |
1521 | Register RHSReg = getRegForValue(V: RHS); |
1522 | if (!RHSReg) |
1523 | return false; |
1524 | |
1525 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; |
1526 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
1527 | .addReg(RegNo: LHSReg) |
1528 | .addReg(RegNo: RHSReg); |
1529 | return true; |
1530 | } |
1531 | |
1532 | unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
1533 | bool SetFlags, bool WantResult, bool IsZExt) { |
1534 | return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
1535 | IsZExt); |
1536 | } |
1537 | |
1538 | /// This method is a wrapper to simplify add emission. |
1539 | /// |
1540 | /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
1541 | /// that fails, then try to materialize the immediate into a register and use |
1542 | /// emitAddSub_rr instead. |
1543 | unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { |
1544 | unsigned ResultReg; |
1545 | if (Imm < 0) |
1546 | ResultReg = emitAddSub_ri(UseAdd: false, RetVT: VT, LHSReg: Op0, Imm: -Imm); |
1547 | else |
1548 | ResultReg = emitAddSub_ri(UseAdd: true, RetVT: VT, LHSReg: Op0, Imm); |
1549 | |
1550 | if (ResultReg) |
1551 | return ResultReg; |
1552 | |
1553 | unsigned CReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm); |
1554 | if (!CReg) |
1555 | return 0; |
1556 | |
1557 | ResultReg = emitAddSub_rr(UseAdd: true, RetVT: VT, LHSReg: Op0, RHSReg: CReg); |
1558 | return ResultReg; |
1559 | } |
1560 | |
1561 | unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
1562 | bool SetFlags, bool WantResult, bool IsZExt) { |
1563 | return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
1564 | IsZExt); |
1565 | } |
1566 | |
1567 | unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, |
1568 | unsigned RHSReg, bool WantResult) { |
1569 | return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, |
1570 | /*SetFlags=*/true, WantResult); |
1571 | } |
1572 | |
1573 | unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, |
1574 | unsigned RHSReg, |
1575 | AArch64_AM::ShiftExtendType ShiftType, |
1576 | uint64_t ShiftImm, bool WantResult) { |
1577 | return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, |
1578 | ShiftImm, /*SetFlags=*/true, WantResult); |
1579 | } |
1580 | |
1581 | unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
1582 | const Value *LHS, const Value *RHS) { |
1583 | // Canonicalize immediates to the RHS first. |
1584 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS)) |
1585 | std::swap(a&: LHS, b&: RHS); |
1586 | |
1587 | // Canonicalize mul by power-of-2 to the RHS. |
1588 | if (LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1589 | if (isMulPowOf2(I: LHS)) |
1590 | std::swap(a&: LHS, b&: RHS); |
1591 | |
1592 | // Canonicalize shift immediate to the RHS. |
1593 | if (LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1594 | if (const auto *SI = dyn_cast<ShlOperator>(Val: LHS)) |
1595 | if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) |
1596 | std::swap(a&: LHS, b&: RHS); |
1597 | |
1598 | Register LHSReg = getRegForValue(V: LHS); |
1599 | if (!LHSReg) |
1600 | return 0; |
1601 | |
1602 | unsigned ResultReg = 0; |
1603 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) { |
1604 | uint64_t Imm = C->getZExtValue(); |
1605 | ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); |
1606 | } |
1607 | if (ResultReg) |
1608 | return ResultReg; |
1609 | |
1610 | // Check if the mul can be folded into the instruction. |
1611 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1612 | if (isMulPowOf2(I: RHS)) { |
1613 | const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0); |
1614 | const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1); |
1615 | |
1616 | if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS)) |
1617 | if (C->getValue().isPowerOf2()) |
1618 | std::swap(a&: MulLHS, b&: MulRHS); |
1619 | |
1620 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt." ); |
1621 | uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2(); |
1622 | |
1623 | Register RHSReg = getRegForValue(V: MulLHS); |
1624 | if (!RHSReg) |
1625 | return 0; |
1626 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal); |
1627 | if (ResultReg) |
1628 | return ResultReg; |
1629 | } |
1630 | } |
1631 | |
1632 | // Check if the shift can be folded into the instruction. |
1633 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1634 | if (const auto *SI = dyn_cast<ShlOperator>(Val: RHS)) |
1635 | if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) { |
1636 | uint64_t ShiftVal = C->getZExtValue(); |
1637 | Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0)); |
1638 | if (!RHSReg) |
1639 | return 0; |
1640 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal); |
1641 | if (ResultReg) |
1642 | return ResultReg; |
1643 | } |
1644 | } |
1645 | |
1646 | Register RHSReg = getRegForValue(V: RHS); |
1647 | if (!RHSReg) |
1648 | return 0; |
1649 | |
1650 | MVT VT = std::max(MVT::i32, RetVT.SimpleTy); |
1651 | ResultReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISDOpc, Op0: LHSReg, Op1: RHSReg); |
1652 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1653 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1654 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
1655 | } |
1656 | return ResultReg; |
1657 | } |
1658 | |
1659 | unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
1660 | unsigned LHSReg, uint64_t Imm) { |
1661 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1662 | "ISD nodes are not consecutive!" ); |
1663 | static const unsigned OpcTable[3][2] = { |
1664 | { AArch64::ANDWri, AArch64::ANDXri }, |
1665 | { AArch64::ORRWri, AArch64::ORRXri }, |
1666 | { AArch64::EORWri, AArch64::EORXri } |
1667 | }; |
1668 | const TargetRegisterClass *RC; |
1669 | unsigned Opc; |
1670 | unsigned RegSize; |
1671 | switch (RetVT.SimpleTy) { |
1672 | default: |
1673 | return 0; |
1674 | case MVT::i1: |
1675 | case MVT::i8: |
1676 | case MVT::i16: |
1677 | case MVT::i32: { |
1678 | unsigned Idx = ISDOpc - ISD::AND; |
1679 | Opc = OpcTable[Idx][0]; |
1680 | RC = &AArch64::GPR32spRegClass; |
1681 | RegSize = 32; |
1682 | break; |
1683 | } |
1684 | case MVT::i64: |
1685 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1686 | RC = &AArch64::GPR64spRegClass; |
1687 | RegSize = 64; |
1688 | break; |
1689 | } |
1690 | |
1691 | if (!AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) |
1692 | return 0; |
1693 | |
1694 | Register ResultReg = |
1695 | fastEmitInst_ri(MachineInstOpcode: Opc, RC, Op0: LHSReg, |
1696 | Imm: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
1697 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { |
1698 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1699 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
1700 | } |
1701 | return ResultReg; |
1702 | } |
1703 | |
1704 | unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
1705 | unsigned LHSReg, unsigned RHSReg, |
1706 | uint64_t ShiftImm) { |
1707 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1708 | "ISD nodes are not consecutive!" ); |
1709 | static const unsigned OpcTable[3][2] = { |
1710 | { AArch64::ANDWrs, AArch64::ANDXrs }, |
1711 | { AArch64::ORRWrs, AArch64::ORRXrs }, |
1712 | { AArch64::EORWrs, AArch64::EORXrs } |
1713 | }; |
1714 | |
1715 | // Don't deal with undefined shifts. |
1716 | if (ShiftImm >= RetVT.getSizeInBits()) |
1717 | return 0; |
1718 | |
1719 | const TargetRegisterClass *RC; |
1720 | unsigned Opc; |
1721 | switch (RetVT.SimpleTy) { |
1722 | default: |
1723 | return 0; |
1724 | case MVT::i1: |
1725 | case MVT::i8: |
1726 | case MVT::i16: |
1727 | case MVT::i32: |
1728 | Opc = OpcTable[ISDOpc - ISD::AND][0]; |
1729 | RC = &AArch64::GPR32RegClass; |
1730 | break; |
1731 | case MVT::i64: |
1732 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1733 | RC = &AArch64::GPR64RegClass; |
1734 | break; |
1735 | } |
1736 | Register ResultReg = |
1737 | fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: LHSReg, Op1: RHSReg, |
1738 | Imm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm)); |
1739 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1740 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1741 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
1742 | } |
1743 | return ResultReg; |
1744 | } |
1745 | |
1746 | unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, |
1747 | uint64_t Imm) { |
1748 | return emitLogicalOp_ri(ISDOpc: ISD::AND, RetVT, LHSReg, Imm); |
1749 | } |
1750 | |
1751 | unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
1752 | bool WantZExt, MachineMemOperand *MMO) { |
1753 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
1754 | return 0; |
1755 | |
1756 | // Simplify this down to something we can handle. |
1757 | if (!simplifyAddress(Addr, VT)) |
1758 | return 0; |
1759 | |
1760 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1761 | if (!ScaleFactor) |
1762 | llvm_unreachable("Unexpected value type." ); |
1763 | |
1764 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
1765 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
1766 | bool UseScaled = true; |
1767 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
1768 | UseScaled = false; |
1769 | ScaleFactor = 1; |
1770 | } |
1771 | |
1772 | static const unsigned GPOpcTable[2][8][4] = { |
1773 | // Sign-extend. |
1774 | { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
1775 | AArch64::LDURXi }, |
1776 | { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
1777 | AArch64::LDURXi }, |
1778 | { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
1779 | AArch64::LDRXui }, |
1780 | { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
1781 | AArch64::LDRXui }, |
1782 | { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
1783 | AArch64::LDRXroX }, |
1784 | { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
1785 | AArch64::LDRXroX }, |
1786 | { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
1787 | AArch64::LDRXroW }, |
1788 | { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
1789 | AArch64::LDRXroW } |
1790 | }, |
1791 | // Zero-extend. |
1792 | { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1793 | AArch64::LDURXi }, |
1794 | { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1795 | AArch64::LDURXi }, |
1796 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1797 | AArch64::LDRXui }, |
1798 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1799 | AArch64::LDRXui }, |
1800 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1801 | AArch64::LDRXroX }, |
1802 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1803 | AArch64::LDRXroX }, |
1804 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1805 | AArch64::LDRXroW }, |
1806 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1807 | AArch64::LDRXroW } |
1808 | } |
1809 | }; |
1810 | |
1811 | static const unsigned FPOpcTable[4][2] = { |
1812 | { AArch64::LDURSi, AArch64::LDURDi }, |
1813 | { AArch64::LDRSui, AArch64::LDRDui }, |
1814 | { AArch64::LDRSroX, AArch64::LDRDroX }, |
1815 | { AArch64::LDRSroW, AArch64::LDRDroW } |
1816 | }; |
1817 | |
1818 | unsigned Opc; |
1819 | const TargetRegisterClass *RC; |
1820 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
1821 | Addr.getOffsetReg(); |
1822 | unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
1823 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
1824 | Addr.getExtendType() == AArch64_AM::SXTW) |
1825 | Idx++; |
1826 | |
1827 | bool IsRet64Bit = RetVT == MVT::i64; |
1828 | switch (VT.SimpleTy) { |
1829 | default: |
1830 | llvm_unreachable("Unexpected value type." ); |
1831 | case MVT::i1: // Intentional fall-through. |
1832 | case MVT::i8: |
1833 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
1834 | RC = (IsRet64Bit && !WantZExt) ? |
1835 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1836 | break; |
1837 | case MVT::i16: |
1838 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
1839 | RC = (IsRet64Bit && !WantZExt) ? |
1840 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1841 | break; |
1842 | case MVT::i32: |
1843 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
1844 | RC = (IsRet64Bit && !WantZExt) ? |
1845 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1846 | break; |
1847 | case MVT::i64: |
1848 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
1849 | RC = &AArch64::GPR64RegClass; |
1850 | break; |
1851 | case MVT::f32: |
1852 | Opc = FPOpcTable[Idx][0]; |
1853 | RC = &AArch64::FPR32RegClass; |
1854 | break; |
1855 | case MVT::f64: |
1856 | Opc = FPOpcTable[Idx][1]; |
1857 | RC = &AArch64::FPR64RegClass; |
1858 | break; |
1859 | } |
1860 | |
1861 | // Create the base instruction, then add the operands. |
1862 | Register ResultReg = createResultReg(RC); |
1863 | MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
1864 | MCID: TII.get(Opcode: Opc), DestReg: ResultReg); |
1865 | addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOLoad, ScaleFactor, MMO); |
1866 | |
1867 | // Loading an i1 requires special handling. |
1868 | if (VT == MVT::i1) { |
1869 | unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); |
1870 | assert(ANDReg && "Unexpected AND instruction emission failure." ); |
1871 | ResultReg = ANDReg; |
1872 | } |
1873 | |
1874 | // For zero-extending loads to 64bit we emit a 32bit load and then convert |
1875 | // the 32bit reg to a 64bit reg. |
1876 | if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { |
1877 | Register Reg64 = createResultReg(&AArch64::GPR64RegClass); |
1878 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
1879 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
1880 | .addImm(0) |
1881 | .addReg(ResultReg, getKillRegState(true)) |
1882 | .addImm(AArch64::sub_32); |
1883 | ResultReg = Reg64; |
1884 | } |
1885 | return ResultReg; |
1886 | } |
1887 | |
1888 | bool AArch64FastISel::selectAddSub(const Instruction *I) { |
1889 | MVT VT; |
1890 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
1891 | return false; |
1892 | |
1893 | if (VT.isVector()) |
1894 | return selectOperator(I, Opcode: I->getOpcode()); |
1895 | |
1896 | unsigned ResultReg; |
1897 | switch (I->getOpcode()) { |
1898 | default: |
1899 | llvm_unreachable("Unexpected instruction." ); |
1900 | case Instruction::Add: |
1901 | ResultReg = emitAdd(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1902 | break; |
1903 | case Instruction::Sub: |
1904 | ResultReg = emitSub(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1905 | break; |
1906 | } |
1907 | if (!ResultReg) |
1908 | return false; |
1909 | |
1910 | updateValueMap(I, Reg: ResultReg); |
1911 | return true; |
1912 | } |
1913 | |
1914 | bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
1915 | MVT VT; |
1916 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
1917 | return false; |
1918 | |
1919 | if (VT.isVector()) |
1920 | return selectOperator(I, Opcode: I->getOpcode()); |
1921 | |
1922 | unsigned ResultReg; |
1923 | switch (I->getOpcode()) { |
1924 | default: |
1925 | llvm_unreachable("Unexpected instruction." ); |
1926 | case Instruction::And: |
1927 | ResultReg = emitLogicalOp(ISDOpc: ISD::AND, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1928 | break; |
1929 | case Instruction::Or: |
1930 | ResultReg = emitLogicalOp(ISDOpc: ISD::OR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1931 | break; |
1932 | case Instruction::Xor: |
1933 | ResultReg = emitLogicalOp(ISDOpc: ISD::XOR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1934 | break; |
1935 | } |
1936 | if (!ResultReg) |
1937 | return false; |
1938 | |
1939 | updateValueMap(I, Reg: ResultReg); |
1940 | return true; |
1941 | } |
1942 | |
1943 | bool AArch64FastISel::selectLoad(const Instruction *I) { |
1944 | MVT VT; |
1945 | // Verify we have a legal type before going any further. Currently, we handle |
1946 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
1947 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
1948 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true) || |
1949 | cast<LoadInst>(Val: I)->isAtomic()) |
1950 | return false; |
1951 | |
1952 | const Value *SV = I->getOperand(i: 0); |
1953 | if (TLI.supportSwiftError()) { |
1954 | // Swifterror values can come from either a function parameter with |
1955 | // swifterror attribute or an alloca with swifterror attribute. |
1956 | if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) { |
1957 | if (Arg->hasSwiftErrorAttr()) |
1958 | return false; |
1959 | } |
1960 | |
1961 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) { |
1962 | if (Alloca->isSwiftError()) |
1963 | return false; |
1964 | } |
1965 | } |
1966 | |
1967 | // See if we can handle this address. |
1968 | Address Addr; |
1969 | if (!computeAddress(Obj: I->getOperand(i: 0), Addr, Ty: I->getType())) |
1970 | return false; |
1971 | |
1972 | // Fold the following sign-/zero-extend into the load instruction. |
1973 | bool WantZExt = true; |
1974 | MVT RetVT = VT; |
1975 | const Value *IntExtVal = nullptr; |
1976 | if (I->hasOneUse()) { |
1977 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I->use_begin()->getUser())) { |
1978 | if (isTypeSupported(Ty: ZE->getType(), VT&: RetVT)) |
1979 | IntExtVal = ZE; |
1980 | else |
1981 | RetVT = VT; |
1982 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I->use_begin()->getUser())) { |
1983 | if (isTypeSupported(Ty: SE->getType(), VT&: RetVT)) |
1984 | IntExtVal = SE; |
1985 | else |
1986 | RetVT = VT; |
1987 | WantZExt = false; |
1988 | } |
1989 | } |
1990 | |
1991 | unsigned ResultReg = |
1992 | emitLoad(VT, RetVT, Addr, WantZExt, MMO: createMachineMemOperandFor(I)); |
1993 | if (!ResultReg) |
1994 | return false; |
1995 | |
1996 | // There are a few different cases we have to handle, because the load or the |
1997 | // sign-/zero-extend might not be selected by FastISel if we fall-back to |
1998 | // SelectionDAG. There is also an ordering issue when both instructions are in |
1999 | // different basic blocks. |
2000 | // 1.) The load instruction is selected by FastISel, but the integer extend |
2001 | // not. This usually happens when the integer extend is in a different |
2002 | // basic block and SelectionDAG took over for that basic block. |
2003 | // 2.) The load instruction is selected before the integer extend. This only |
2004 | // happens when the integer extend is in a different basic block. |
2005 | // 3.) The load instruction is selected by SelectionDAG and the integer extend |
2006 | // by FastISel. This happens if there are instructions between the load |
2007 | // and the integer extend that couldn't be selected by FastISel. |
2008 | if (IntExtVal) { |
2009 | // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
2010 | // could select it. Emit a copy to subreg if necessary. FastISel will remove |
2011 | // it when it selects the integer extend. |
2012 | Register Reg = lookUpRegForValue(V: IntExtVal); |
2013 | auto *MI = MRI.getUniqueVRegDef(Reg); |
2014 | if (!MI) { |
2015 | if (RetVT == MVT::i64 && VT <= MVT::i32) { |
2016 | if (WantZExt) { |
2017 | // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
2018 | MachineBasicBlock::iterator I(std::prev(x: FuncInfo.InsertPt)); |
2019 | ResultReg = std::prev(x: I)->getOperand(i: 0).getReg(); |
2020 | removeDeadCode(I, E: std::next(x: I)); |
2021 | } else |
2022 | ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, |
2023 | AArch64::sub_32); |
2024 | } |
2025 | updateValueMap(I, Reg: ResultReg); |
2026 | return true; |
2027 | } |
2028 | |
2029 | // The integer extend has already been emitted - delete all the instructions |
2030 | // that have been emitted by the integer extend lowering code and use the |
2031 | // result from the load instruction directly. |
2032 | while (MI) { |
2033 | Reg = 0; |
2034 | for (auto &Opnd : MI->uses()) { |
2035 | if (Opnd.isReg()) { |
2036 | Reg = Opnd.getReg(); |
2037 | break; |
2038 | } |
2039 | } |
2040 | MachineBasicBlock::iterator I(MI); |
2041 | removeDeadCode(I, E: std::next(x: I)); |
2042 | MI = nullptr; |
2043 | if (Reg) |
2044 | MI = MRI.getUniqueVRegDef(Reg); |
2045 | } |
2046 | updateValueMap(I: IntExtVal, Reg: ResultReg); |
2047 | return true; |
2048 | } |
2049 | |
2050 | updateValueMap(I, Reg: ResultReg); |
2051 | return true; |
2052 | } |
2053 | |
2054 | bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, |
2055 | unsigned AddrReg, |
2056 | MachineMemOperand *MMO) { |
2057 | unsigned Opc; |
2058 | switch (VT.SimpleTy) { |
2059 | default: return false; |
2060 | case MVT::i8: Opc = AArch64::STLRB; break; |
2061 | case MVT::i16: Opc = AArch64::STLRH; break; |
2062 | case MVT::i32: Opc = AArch64::STLRW; break; |
2063 | case MVT::i64: Opc = AArch64::STLRX; break; |
2064 | } |
2065 | |
2066 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2067 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: 0); |
2068 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: 1); |
2069 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
2070 | .addReg(RegNo: SrcReg) |
2071 | .addReg(RegNo: AddrReg) |
2072 | .addMemOperand(MMO); |
2073 | return true; |
2074 | } |
2075 | |
2076 | bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, |
2077 | MachineMemOperand *MMO) { |
2078 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
2079 | return false; |
2080 | |
2081 | // Simplify this down to something we can handle. |
2082 | if (!simplifyAddress(Addr, VT)) |
2083 | return false; |
2084 | |
2085 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
2086 | if (!ScaleFactor) |
2087 | llvm_unreachable("Unexpected value type." ); |
2088 | |
2089 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
2090 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
2091 | bool UseScaled = true; |
2092 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
2093 | UseScaled = false; |
2094 | ScaleFactor = 1; |
2095 | } |
2096 | |
2097 | static const unsigned OpcTable[4][6] = { |
2098 | { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
2099 | AArch64::STURSi, AArch64::STURDi }, |
2100 | { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
2101 | AArch64::STRSui, AArch64::STRDui }, |
2102 | { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
2103 | AArch64::STRSroX, AArch64::STRDroX }, |
2104 | { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
2105 | AArch64::STRSroW, AArch64::STRDroW } |
2106 | }; |
2107 | |
2108 | unsigned Opc; |
2109 | bool VTIsi1 = false; |
2110 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
2111 | Addr.getOffsetReg(); |
2112 | unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
2113 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
2114 | Addr.getExtendType() == AArch64_AM::SXTW) |
2115 | Idx++; |
2116 | |
2117 | switch (VT.SimpleTy) { |
2118 | default: llvm_unreachable("Unexpected value type." ); |
2119 | case MVT::i1: VTIsi1 = true; [[fallthrough]]; |
2120 | case MVT::i8: Opc = OpcTable[Idx][0]; break; |
2121 | case MVT::i16: Opc = OpcTable[Idx][1]; break; |
2122 | case MVT::i32: Opc = OpcTable[Idx][2]; break; |
2123 | case MVT::i64: Opc = OpcTable[Idx][3]; break; |
2124 | case MVT::f32: Opc = OpcTable[Idx][4]; break; |
2125 | case MVT::f64: Opc = OpcTable[Idx][5]; break; |
2126 | } |
2127 | |
2128 | // Storing an i1 requires special handling. |
2129 | if (VTIsi1 && SrcReg != AArch64::WZR) { |
2130 | unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); |
2131 | assert(ANDReg && "Unexpected AND instruction emission failure." ); |
2132 | SrcReg = ANDReg; |
2133 | } |
2134 | // Create the base instruction, then add the operands. |
2135 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2136 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs()); |
2137 | MachineInstrBuilder MIB = |
2138 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: SrcReg); |
2139 | addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOStore, ScaleFactor, MMO); |
2140 | |
2141 | return true; |
2142 | } |
2143 | |
2144 | bool AArch64FastISel::selectStore(const Instruction *I) { |
2145 | MVT VT; |
2146 | const Value *Op0 = I->getOperand(i: 0); |
2147 | // Verify we have a legal type before going any further. Currently, we handle |
2148 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
2149 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
2150 | if (!isTypeSupported(Ty: Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
2151 | return false; |
2152 | |
2153 | const Value *PtrV = I->getOperand(i: 1); |
2154 | if (TLI.supportSwiftError()) { |
2155 | // Swifterror values can come from either a function parameter with |
2156 | // swifterror attribute or an alloca with swifterror attribute. |
2157 | if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) { |
2158 | if (Arg->hasSwiftErrorAttr()) |
2159 | return false; |
2160 | } |
2161 | |
2162 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) { |
2163 | if (Alloca->isSwiftError()) |
2164 | return false; |
2165 | } |
2166 | } |
2167 | |
2168 | // Get the value to be stored into a register. Use the zero register directly |
2169 | // when possible to avoid an unnecessary copy and a wasted register. |
2170 | unsigned SrcReg = 0; |
2171 | if (const auto *CI = dyn_cast<ConstantInt>(Val: Op0)) { |
2172 | if (CI->isZero()) |
2173 | SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
2174 | } else if (const auto *CF = dyn_cast<ConstantFP>(Val: Op0)) { |
2175 | if (CF->isZero() && !CF->isNegative()) { |
2176 | VT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
2177 | SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
2178 | } |
2179 | } |
2180 | |
2181 | if (!SrcReg) |
2182 | SrcReg = getRegForValue(V: Op0); |
2183 | |
2184 | if (!SrcReg) |
2185 | return false; |
2186 | |
2187 | auto *SI = cast<StoreInst>(Val: I); |
2188 | |
2189 | // Try to emit a STLR for seq_cst/release. |
2190 | if (SI->isAtomic()) { |
2191 | AtomicOrdering Ord = SI->getOrdering(); |
2192 | // The non-atomic instructions are sufficient for relaxed stores. |
2193 | if (isReleaseOrStronger(AO: Ord)) { |
2194 | // The STLR addressing mode only supports a base reg; pass that directly. |
2195 | Register AddrReg = getRegForValue(V: PtrV); |
2196 | return emitStoreRelease(VT, SrcReg, AddrReg, |
2197 | MMO: createMachineMemOperandFor(I)); |
2198 | } |
2199 | } |
2200 | |
2201 | // See if we can handle this address. |
2202 | Address Addr; |
2203 | if (!computeAddress(Obj: PtrV, Addr, Ty: Op0->getType())) |
2204 | return false; |
2205 | |
2206 | if (!emitStore(VT, SrcReg, Addr, MMO: createMachineMemOperandFor(I))) |
2207 | return false; |
2208 | return true; |
2209 | } |
2210 | |
2211 | static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
2212 | switch (Pred) { |
2213 | case CmpInst::FCMP_ONE: |
2214 | case CmpInst::FCMP_UEQ: |
2215 | default: |
2216 | // AL is our "false" for now. The other two need more compares. |
2217 | return AArch64CC::AL; |
2218 | case CmpInst::ICMP_EQ: |
2219 | case CmpInst::FCMP_OEQ: |
2220 | return AArch64CC::EQ; |
2221 | case CmpInst::ICMP_SGT: |
2222 | case CmpInst::FCMP_OGT: |
2223 | return AArch64CC::GT; |
2224 | case CmpInst::ICMP_SGE: |
2225 | case CmpInst::FCMP_OGE: |
2226 | return AArch64CC::GE; |
2227 | case CmpInst::ICMP_UGT: |
2228 | case CmpInst::FCMP_UGT: |
2229 | return AArch64CC::HI; |
2230 | case CmpInst::FCMP_OLT: |
2231 | return AArch64CC::MI; |
2232 | case CmpInst::ICMP_ULE: |
2233 | case CmpInst::FCMP_OLE: |
2234 | return AArch64CC::LS; |
2235 | case CmpInst::FCMP_ORD: |
2236 | return AArch64CC::VC; |
2237 | case CmpInst::FCMP_UNO: |
2238 | return AArch64CC::VS; |
2239 | case CmpInst::FCMP_UGE: |
2240 | return AArch64CC::PL; |
2241 | case CmpInst::ICMP_SLT: |
2242 | case CmpInst::FCMP_ULT: |
2243 | return AArch64CC::LT; |
2244 | case CmpInst::ICMP_SLE: |
2245 | case CmpInst::FCMP_ULE: |
2246 | return AArch64CC::LE; |
2247 | case CmpInst::FCMP_UNE: |
2248 | case CmpInst::ICMP_NE: |
2249 | return AArch64CC::NE; |
2250 | case CmpInst::ICMP_UGE: |
2251 | return AArch64CC::HS; |
2252 | case CmpInst::ICMP_ULT: |
2253 | return AArch64CC::LO; |
2254 | } |
2255 | } |
2256 | |
2257 | /// Try to emit a combined compare-and-branch instruction. |
2258 | bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
2259 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions |
2260 | // will not be produced, as they are conditional branch instructions that do |
2261 | // not set flags. |
2262 | if (FuncInfo.MF->getFunction().hasFnAttribute( |
2263 | Attribute::SpeculativeLoadHardening)) |
2264 | return false; |
2265 | |
2266 | assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction" ); |
2267 | const CmpInst *CI = cast<CmpInst>(Val: BI->getCondition()); |
2268 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2269 | |
2270 | const Value *LHS = CI->getOperand(i_nocapture: 0); |
2271 | const Value *RHS = CI->getOperand(i_nocapture: 1); |
2272 | |
2273 | MVT VT; |
2274 | if (!isTypeSupported(Ty: LHS->getType(), VT)) |
2275 | return false; |
2276 | |
2277 | unsigned BW = VT.getSizeInBits(); |
2278 | if (BW > 64) |
2279 | return false; |
2280 | |
2281 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(i: 0)]; |
2282 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(i: 1)]; |
2283 | |
2284 | // Try to take advantage of fallthrough opportunities. |
2285 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2286 | std::swap(a&: TBB, b&: FBB); |
2287 | Predicate = CmpInst::getInversePredicate(pred: Predicate); |
2288 | } |
2289 | |
2290 | int TestBit = -1; |
2291 | bool IsCmpNE; |
2292 | switch (Predicate) { |
2293 | default: |
2294 | return false; |
2295 | case CmpInst::ICMP_EQ: |
2296 | case CmpInst::ICMP_NE: |
2297 | if (isa<Constant>(Val: LHS) && cast<Constant>(Val: LHS)->isNullValue()) |
2298 | std::swap(a&: LHS, b&: RHS); |
2299 | |
2300 | if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue()) |
2301 | return false; |
2302 | |
2303 | if (const auto *AI = dyn_cast<BinaryOperator>(Val: LHS)) |
2304 | if (AI->getOpcode() == Instruction::And && isValueAvailable(V: AI)) { |
2305 | const Value *AndLHS = AI->getOperand(i_nocapture: 0); |
2306 | const Value *AndRHS = AI->getOperand(i_nocapture: 1); |
2307 | |
2308 | if (const auto *C = dyn_cast<ConstantInt>(Val: AndLHS)) |
2309 | if (C->getValue().isPowerOf2()) |
2310 | std::swap(a&: AndLHS, b&: AndRHS); |
2311 | |
2312 | if (const auto *C = dyn_cast<ConstantInt>(Val: AndRHS)) |
2313 | if (C->getValue().isPowerOf2()) { |
2314 | TestBit = C->getValue().logBase2(); |
2315 | LHS = AndLHS; |
2316 | } |
2317 | } |
2318 | |
2319 | if (VT == MVT::i1) |
2320 | TestBit = 0; |
2321 | |
2322 | IsCmpNE = Predicate == CmpInst::ICMP_NE; |
2323 | break; |
2324 | case CmpInst::ICMP_SLT: |
2325 | case CmpInst::ICMP_SGE: |
2326 | if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue()) |
2327 | return false; |
2328 | |
2329 | TestBit = BW - 1; |
2330 | IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
2331 | break; |
2332 | case CmpInst::ICMP_SGT: |
2333 | case CmpInst::ICMP_SLE: |
2334 | if (!isa<ConstantInt>(Val: RHS)) |
2335 | return false; |
2336 | |
2337 | if (cast<ConstantInt>(Val: RHS)->getValue() != APInt(BW, -1, true)) |
2338 | return false; |
2339 | |
2340 | TestBit = BW - 1; |
2341 | IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
2342 | break; |
2343 | } // end switch |
2344 | |
2345 | static const unsigned OpcTable[2][2][2] = { |
2346 | { {AArch64::CBZW, AArch64::CBZX }, |
2347 | {AArch64::CBNZW, AArch64::CBNZX} }, |
2348 | { {AArch64::TBZW, AArch64::TBZX }, |
2349 | {AArch64::TBNZW, AArch64::TBNZX} } |
2350 | }; |
2351 | |
2352 | bool IsBitTest = TestBit != -1; |
2353 | bool Is64Bit = BW == 64; |
2354 | if (TestBit < 32 && TestBit >= 0) |
2355 | Is64Bit = false; |
2356 | |
2357 | unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
2358 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2359 | |
2360 | Register SrcReg = getRegForValue(V: LHS); |
2361 | if (!SrcReg) |
2362 | return false; |
2363 | |
2364 | if (BW == 64 && !Is64Bit) |
2365 | SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); |
2366 | |
2367 | if ((BW < 32) && !IsBitTest) |
2368 | SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); |
2369 | |
2370 | // Emit the combined compare and branch instruction. |
2371 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs()); |
2372 | MachineInstrBuilder MIB = |
2373 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
2374 | .addReg(RegNo: SrcReg); |
2375 | if (IsBitTest) |
2376 | MIB.addImm(Val: TestBit); |
2377 | MIB.addMBB(MBB: TBB); |
2378 | |
2379 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2380 | return true; |
2381 | } |
2382 | |
2383 | bool AArch64FastISel::selectBranch(const Instruction *I) { |
2384 | const BranchInst *BI = cast<BranchInst>(Val: I); |
2385 | if (BI->isUnconditional()) { |
2386 | MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(i: 0)]; |
2387 | fastEmitBranch(MSucc, DbgLoc: BI->getDebugLoc()); |
2388 | return true; |
2389 | } |
2390 | |
2391 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(i: 0)]; |
2392 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(i: 1)]; |
2393 | |
2394 | if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) { |
2395 | if (CI->hasOneUse() && isValueAvailable(V: CI)) { |
2396 | // Try to optimize or fold the cmp. |
2397 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2398 | switch (Predicate) { |
2399 | default: |
2400 | break; |
2401 | case CmpInst::FCMP_FALSE: |
2402 | fastEmitBranch(MSucc: FBB, DbgLoc: MIMD.getDL()); |
2403 | return true; |
2404 | case CmpInst::FCMP_TRUE: |
2405 | fastEmitBranch(MSucc: TBB, DbgLoc: MIMD.getDL()); |
2406 | return true; |
2407 | } |
2408 | |
2409 | // Try to emit a combined compare-and-branch first. |
2410 | if (emitCompareAndBranch(BI)) |
2411 | return true; |
2412 | |
2413 | // Try to take advantage of fallthrough opportunities. |
2414 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2415 | std::swap(a&: TBB, b&: FBB); |
2416 | Predicate = CmpInst::getInversePredicate(pred: Predicate); |
2417 | } |
2418 | |
2419 | // Emit the cmp. |
2420 | if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned())) |
2421 | return false; |
2422 | |
2423 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
2424 | // instruction. |
2425 | AArch64CC::CondCode CC = getCompareCC(Pred: Predicate); |
2426 | AArch64CC::CondCode = AArch64CC::AL; |
2427 | switch (Predicate) { |
2428 | default: |
2429 | break; |
2430 | case CmpInst::FCMP_UEQ: |
2431 | ExtraCC = AArch64CC::EQ; |
2432 | CC = AArch64CC::VS; |
2433 | break; |
2434 | case CmpInst::FCMP_ONE: |
2435 | ExtraCC = AArch64CC::MI; |
2436 | CC = AArch64CC::GT; |
2437 | break; |
2438 | } |
2439 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2440 | |
2441 | // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
2442 | if (ExtraCC != AArch64CC::AL) { |
2443 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) |
2444 | .addImm(ExtraCC) |
2445 | .addMBB(TBB); |
2446 | } |
2447 | |
2448 | // Emit the branch. |
2449 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) |
2450 | .addImm(CC) |
2451 | .addMBB(TBB); |
2452 | |
2453 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2454 | return true; |
2455 | } |
2456 | } else if (const auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) { |
2457 | uint64_t Imm = CI->getZExtValue(); |
2458 | MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; |
2459 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) |
2460 | .addMBB(Target); |
2461 | |
2462 | // Obtain the branch probability and add the target to the successor list. |
2463 | if (FuncInfo.BPI) { |
2464 | auto BranchProbability = FuncInfo.BPI->getEdgeProbability( |
2465 | Src: BI->getParent(), Dst: Target->getBasicBlock()); |
2466 | FuncInfo.MBB->addSuccessor(Succ: Target, Prob: BranchProbability); |
2467 | } else |
2468 | FuncInfo.MBB->addSuccessorWithoutProb(Succ: Target); |
2469 | return true; |
2470 | } else { |
2471 | AArch64CC::CondCode CC = AArch64CC::NE; |
2472 | if (foldXALUIntrinsic(CC, I, Cond: BI->getCondition())) { |
2473 | // Fake request the condition, otherwise the intrinsic might be completely |
2474 | // optimized away. |
2475 | Register CondReg = getRegForValue(V: BI->getCondition()); |
2476 | if (!CondReg) |
2477 | return false; |
2478 | |
2479 | // Emit the branch. |
2480 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) |
2481 | .addImm(CC) |
2482 | .addMBB(TBB); |
2483 | |
2484 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2485 | return true; |
2486 | } |
2487 | } |
2488 | |
2489 | Register CondReg = getRegForValue(V: BI->getCondition()); |
2490 | if (CondReg == 0) |
2491 | return false; |
2492 | |
2493 | // i1 conditions come as i32 values, test the lowest bit with tb(n)z. |
2494 | unsigned Opcode = AArch64::TBNZW; |
2495 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2496 | std::swap(a&: TBB, b&: FBB); |
2497 | Opcode = AArch64::TBZW; |
2498 | } |
2499 | |
2500 | const MCInstrDesc &II = TII.get(Opcode); |
2501 | Register ConstrainedCondReg |
2502 | = constrainOperandRegClass(II, Op: CondReg, OpNum: II.getNumDefs()); |
2503 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
2504 | .addReg(RegNo: ConstrainedCondReg) |
2505 | .addImm(Val: 0) |
2506 | .addMBB(MBB: TBB); |
2507 | |
2508 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2509 | return true; |
2510 | } |
2511 | |
2512 | bool AArch64FastISel::selectIndirectBr(const Instruction *I) { |
2513 | const IndirectBrInst *BI = cast<IndirectBrInst>(Val: I); |
2514 | Register AddrReg = getRegForValue(V: BI->getOperand(i_nocapture: 0)); |
2515 | if (AddrReg == 0) |
2516 | return false; |
2517 | |
2518 | // Emit the indirect branch. |
2519 | const MCInstrDesc &II = TII.get(AArch64::BR); |
2520 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs()); |
2521 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: AddrReg); |
2522 | |
2523 | // Make sure the CFG is up-to-date. |
2524 | for (const auto *Succ : BI->successors()) |
2525 | FuncInfo.MBB->addSuccessor(Succ: FuncInfo.MBBMap[Succ]); |
2526 | |
2527 | return true; |
2528 | } |
2529 | |
2530 | bool AArch64FastISel::selectCmp(const Instruction *I) { |
2531 | const CmpInst *CI = cast<CmpInst>(Val: I); |
2532 | |
2533 | // Vectors of i1 are weird: bail out. |
2534 | if (CI->getType()->isVectorTy()) |
2535 | return false; |
2536 | |
2537 | // Try to optimize or fold the cmp. |
2538 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2539 | unsigned ResultReg = 0; |
2540 | switch (Predicate) { |
2541 | default: |
2542 | break; |
2543 | case CmpInst::FCMP_FALSE: |
2544 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
2545 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
2546 | TII.get(TargetOpcode::COPY), ResultReg) |
2547 | .addReg(AArch64::WZR, getKillRegState(true)); |
2548 | break; |
2549 | case CmpInst::FCMP_TRUE: |
2550 | ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); |
2551 | break; |
2552 | } |
2553 | |
2554 | if (ResultReg) { |
2555 | updateValueMap(I, Reg: ResultReg); |
2556 | return true; |
2557 | } |
2558 | |
2559 | // Emit the cmp. |
2560 | if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned())) |
2561 | return false; |
2562 | |
2563 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
2564 | |
2565 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These |
2566 | // condition codes are inverted, because they are used by CSINC. |
2567 | static unsigned CondCodeTable[2][2] = { |
2568 | { AArch64CC::NE, AArch64CC::VC }, |
2569 | { AArch64CC::PL, AArch64CC::LE } |
2570 | }; |
2571 | unsigned *CondCodes = nullptr; |
2572 | switch (Predicate) { |
2573 | default: |
2574 | break; |
2575 | case CmpInst::FCMP_UEQ: |
2576 | CondCodes = &CondCodeTable[0][0]; |
2577 | break; |
2578 | case CmpInst::FCMP_ONE: |
2579 | CondCodes = &CondCodeTable[1][0]; |
2580 | break; |
2581 | } |
2582 | |
2583 | if (CondCodes) { |
2584 | Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); |
2585 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), |
2586 | TmpReg1) |
2587 | .addReg(AArch64::WZR, getKillRegState(true)) |
2588 | .addReg(AArch64::WZR, getKillRegState(true)) |
2589 | .addImm(CondCodes[0]); |
2590 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), |
2591 | ResultReg) |
2592 | .addReg(TmpReg1, getKillRegState(true)) |
2593 | .addReg(AArch64::WZR, getKillRegState(true)) |
2594 | .addImm(CondCodes[1]); |
2595 | |
2596 | updateValueMap(I, Reg: ResultReg); |
2597 | return true; |
2598 | } |
2599 | |
2600 | // Now set a register based on the comparison. |
2601 | AArch64CC::CondCode CC = getCompareCC(Pred: Predicate); |
2602 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2603 | AArch64CC::CondCode invertedCC = getInvertedCondCode(Code: CC); |
2604 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), |
2605 | ResultReg) |
2606 | .addReg(AArch64::WZR, getKillRegState(true)) |
2607 | .addReg(AArch64::WZR, getKillRegState(true)) |
2608 | .addImm(invertedCC); |
2609 | |
2610 | updateValueMap(I, Reg: ResultReg); |
2611 | return true; |
2612 | } |
2613 | |
2614 | /// Optimize selects of i1 if one of the operands has a 'true' or 'false' |
2615 | /// value. |
2616 | bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { |
2617 | if (!SI->getType()->isIntegerTy(Bitwidth: 1)) |
2618 | return false; |
2619 | |
2620 | const Value *Src1Val, *Src2Val; |
2621 | unsigned Opc = 0; |
2622 | bool = false; |
2623 | if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getTrueValue())) { |
2624 | if (CI->isOne()) { |
2625 | Src1Val = SI->getCondition(); |
2626 | Src2Val = SI->getFalseValue(); |
2627 | Opc = AArch64::ORRWrr; |
2628 | } else { |
2629 | assert(CI->isZero()); |
2630 | Src1Val = SI->getFalseValue(); |
2631 | Src2Val = SI->getCondition(); |
2632 | Opc = AArch64::BICWrr; |
2633 | } |
2634 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getFalseValue())) { |
2635 | if (CI->isOne()) { |
2636 | Src1Val = SI->getCondition(); |
2637 | Src2Val = SI->getTrueValue(); |
2638 | Opc = AArch64::ORRWrr; |
2639 | NeedExtraOp = true; |
2640 | } else { |
2641 | assert(CI->isZero()); |
2642 | Src1Val = SI->getCondition(); |
2643 | Src2Val = SI->getTrueValue(); |
2644 | Opc = AArch64::ANDWrr; |
2645 | } |
2646 | } |
2647 | |
2648 | if (!Opc) |
2649 | return false; |
2650 | |
2651 | Register Src1Reg = getRegForValue(V: Src1Val); |
2652 | if (!Src1Reg) |
2653 | return false; |
2654 | |
2655 | Register Src2Reg = getRegForValue(V: Src2Val); |
2656 | if (!Src2Reg) |
2657 | return false; |
2658 | |
2659 | if (NeedExtraOp) |
2660 | Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); |
2661 | |
2662 | Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, |
2663 | Src2Reg); |
2664 | updateValueMap(I: SI, Reg: ResultReg); |
2665 | return true; |
2666 | } |
2667 | |
2668 | bool AArch64FastISel::selectSelect(const Instruction *I) { |
2669 | assert(isa<SelectInst>(I) && "Expected a select instruction." ); |
2670 | MVT VT; |
2671 | if (!isTypeSupported(Ty: I->getType(), VT)) |
2672 | return false; |
2673 | |
2674 | unsigned Opc; |
2675 | const TargetRegisterClass *RC; |
2676 | switch (VT.SimpleTy) { |
2677 | default: |
2678 | return false; |
2679 | case MVT::i1: |
2680 | case MVT::i8: |
2681 | case MVT::i16: |
2682 | case MVT::i32: |
2683 | Opc = AArch64::CSELWr; |
2684 | RC = &AArch64::GPR32RegClass; |
2685 | break; |
2686 | case MVT::i64: |
2687 | Opc = AArch64::CSELXr; |
2688 | RC = &AArch64::GPR64RegClass; |
2689 | break; |
2690 | case MVT::f32: |
2691 | Opc = AArch64::FCSELSrrr; |
2692 | RC = &AArch64::FPR32RegClass; |
2693 | break; |
2694 | case MVT::f64: |
2695 | Opc = AArch64::FCSELDrrr; |
2696 | RC = &AArch64::FPR64RegClass; |
2697 | break; |
2698 | } |
2699 | |
2700 | const SelectInst *SI = cast<SelectInst>(Val: I); |
2701 | const Value *Cond = SI->getCondition(); |
2702 | AArch64CC::CondCode CC = AArch64CC::NE; |
2703 | AArch64CC::CondCode = AArch64CC::AL; |
2704 | |
2705 | if (optimizeSelect(SI)) |
2706 | return true; |
2707 | |
2708 | // Try to pickup the flags, so we don't have to emit another compare. |
2709 | if (foldXALUIntrinsic(CC, I, Cond)) { |
2710 | // Fake request the condition to force emission of the XALU intrinsic. |
2711 | Register CondReg = getRegForValue(V: Cond); |
2712 | if (!CondReg) |
2713 | return false; |
2714 | } else if (isa<CmpInst>(Val: Cond) && cast<CmpInst>(Val: Cond)->hasOneUse() && |
2715 | isValueAvailable(V: Cond)) { |
2716 | const auto *Cmp = cast<CmpInst>(Val: Cond); |
2717 | // Try to optimize or fold the cmp. |
2718 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI: Cmp); |
2719 | const Value *FoldSelect = nullptr; |
2720 | switch (Predicate) { |
2721 | default: |
2722 | break; |
2723 | case CmpInst::FCMP_FALSE: |
2724 | FoldSelect = SI->getFalseValue(); |
2725 | break; |
2726 | case CmpInst::FCMP_TRUE: |
2727 | FoldSelect = SI->getTrueValue(); |
2728 | break; |
2729 | } |
2730 | |
2731 | if (FoldSelect) { |
2732 | Register SrcReg = getRegForValue(V: FoldSelect); |
2733 | if (!SrcReg) |
2734 | return false; |
2735 | |
2736 | updateValueMap(I, Reg: SrcReg); |
2737 | return true; |
2738 | } |
2739 | |
2740 | // Emit the cmp. |
2741 | if (!emitCmp(LHS: Cmp->getOperand(i_nocapture: 0), RHS: Cmp->getOperand(i_nocapture: 1), IsZExt: Cmp->isUnsigned())) |
2742 | return false; |
2743 | |
2744 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. |
2745 | CC = getCompareCC(Pred: Predicate); |
2746 | switch (Predicate) { |
2747 | default: |
2748 | break; |
2749 | case CmpInst::FCMP_UEQ: |
2750 | ExtraCC = AArch64CC::EQ; |
2751 | CC = AArch64CC::VS; |
2752 | break; |
2753 | case CmpInst::FCMP_ONE: |
2754 | ExtraCC = AArch64CC::MI; |
2755 | CC = AArch64CC::GT; |
2756 | break; |
2757 | } |
2758 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2759 | } else { |
2760 | Register CondReg = getRegForValue(V: Cond); |
2761 | if (!CondReg) |
2762 | return false; |
2763 | |
2764 | const MCInstrDesc &II = TII.get(AArch64::ANDSWri); |
2765 | CondReg = constrainOperandRegClass(II, Op: CondReg, OpNum: 1); |
2766 | |
2767 | // Emit a TST instruction (ANDS wzr, reg, #imm). |
2768 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, |
2769 | AArch64::WZR) |
2770 | .addReg(CondReg) |
2771 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); |
2772 | } |
2773 | |
2774 | Register Src1Reg = getRegForValue(V: SI->getTrueValue()); |
2775 | Register Src2Reg = getRegForValue(V: SI->getFalseValue()); |
2776 | |
2777 | if (!Src1Reg || !Src2Reg) |
2778 | return false; |
2779 | |
2780 | if (ExtraCC != AArch64CC::AL) |
2781 | Src2Reg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: ExtraCC); |
2782 | |
2783 | Register ResultReg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: CC); |
2784 | updateValueMap(I, Reg: ResultReg); |
2785 | return true; |
2786 | } |
2787 | |
2788 | bool AArch64FastISel::selectFPExt(const Instruction *I) { |
2789 | Value *V = I->getOperand(i: 0); |
2790 | if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) |
2791 | return false; |
2792 | |
2793 | Register Op = getRegForValue(V); |
2794 | if (Op == 0) |
2795 | return false; |
2796 | |
2797 | Register ResultReg = createResultReg(&AArch64::FPR64RegClass); |
2798 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), |
2799 | ResultReg).addReg(Op); |
2800 | updateValueMap(I, Reg: ResultReg); |
2801 | return true; |
2802 | } |
2803 | |
2804 | bool AArch64FastISel::selectFPTrunc(const Instruction *I) { |
2805 | Value *V = I->getOperand(i: 0); |
2806 | if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) |
2807 | return false; |
2808 | |
2809 | Register Op = getRegForValue(V); |
2810 | if (Op == 0) |
2811 | return false; |
2812 | |
2813 | Register ResultReg = createResultReg(&AArch64::FPR32RegClass); |
2814 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), |
2815 | ResultReg).addReg(Op); |
2816 | updateValueMap(I, Reg: ResultReg); |
2817 | return true; |
2818 | } |
2819 | |
2820 | // FPToUI and FPToSI |
2821 | bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { |
2822 | MVT DestVT; |
2823 | if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector()) |
2824 | return false; |
2825 | |
2826 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
2827 | if (SrcReg == 0) |
2828 | return false; |
2829 | |
2830 | EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true); |
2831 | if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16) |
2832 | return false; |
2833 | |
2834 | unsigned Opc; |
2835 | if (SrcVT == MVT::f64) { |
2836 | if (Signed) |
2837 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; |
2838 | else |
2839 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; |
2840 | } else { |
2841 | if (Signed) |
2842 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; |
2843 | else |
2844 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; |
2845 | } |
2846 | Register ResultReg = createResultReg( |
2847 | DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2848 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
2849 | .addReg(RegNo: SrcReg); |
2850 | updateValueMap(I, Reg: ResultReg); |
2851 | return true; |
2852 | } |
2853 | |
2854 | bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { |
2855 | MVT DestVT; |
2856 | if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector()) |
2857 | return false; |
2858 | // Let regular ISEL handle FP16 |
2859 | if (DestVT == MVT::f16 || DestVT == MVT::bf16) |
2860 | return false; |
2861 | |
2862 | assert((DestVT == MVT::f32 || DestVT == MVT::f64) && |
2863 | "Unexpected value type." ); |
2864 | |
2865 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
2866 | if (!SrcReg) |
2867 | return false; |
2868 | |
2869 | EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true); |
2870 | |
2871 | // Handle sign-extension. |
2872 | if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { |
2873 | SrcReg = |
2874 | emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); |
2875 | if (!SrcReg) |
2876 | return false; |
2877 | } |
2878 | |
2879 | unsigned Opc; |
2880 | if (SrcVT == MVT::i64) { |
2881 | if (Signed) |
2882 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; |
2883 | else |
2884 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; |
2885 | } else { |
2886 | if (Signed) |
2887 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; |
2888 | else |
2889 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; |
2890 | } |
2891 | |
2892 | Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT: DestVT), Op0: SrcReg); |
2893 | updateValueMap(I, Reg: ResultReg); |
2894 | return true; |
2895 | } |
2896 | |
2897 | bool AArch64FastISel::fastLowerArguments() { |
2898 | if (!FuncInfo.CanLowerReturn) |
2899 | return false; |
2900 | |
2901 | const Function *F = FuncInfo.Fn; |
2902 | if (F->isVarArg()) |
2903 | return false; |
2904 | |
2905 | CallingConv::ID CC = F->getCallingConv(); |
2906 | if (CC != CallingConv::C && CC != CallingConv::Swift) |
2907 | return false; |
2908 | |
2909 | if (Subtarget->hasCustomCallingConv()) |
2910 | return false; |
2911 | |
2912 | // Only handle simple cases of up to 8 GPR and FPR each. |
2913 | unsigned GPRCnt = 0; |
2914 | unsigned FPRCnt = 0; |
2915 | for (auto const &Arg : F->args()) { |
2916 | if (Arg.hasAttribute(Attribute::ByVal) || |
2917 | Arg.hasAttribute(Attribute::InReg) || |
2918 | Arg.hasAttribute(Attribute::StructRet) || |
2919 | Arg.hasAttribute(Attribute::SwiftSelf) || |
2920 | Arg.hasAttribute(Attribute::SwiftAsync) || |
2921 | Arg.hasAttribute(Attribute::SwiftError) || |
2922 | Arg.hasAttribute(Attribute::Nest)) |
2923 | return false; |
2924 | |
2925 | Type *ArgTy = Arg.getType(); |
2926 | if (ArgTy->isStructTy() || ArgTy->isArrayTy()) |
2927 | return false; |
2928 | |
2929 | EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy); |
2930 | if (!ArgVT.isSimple()) |
2931 | return false; |
2932 | |
2933 | MVT VT = ArgVT.getSimpleVT().SimpleTy; |
2934 | if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) |
2935 | return false; |
2936 | |
2937 | if (VT.isVector() && |
2938 | (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) |
2939 | return false; |
2940 | |
2941 | if (VT >= MVT::i1 && VT <= MVT::i64) |
2942 | ++GPRCnt; |
2943 | else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || |
2944 | VT.is128BitVector()) |
2945 | ++FPRCnt; |
2946 | else |
2947 | return false; |
2948 | |
2949 | if (GPRCnt > 8 || FPRCnt > 8) |
2950 | return false; |
2951 | } |
2952 | |
2953 | static const MCPhysReg Registers[6][8] = { |
2954 | { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, |
2955 | AArch64::W5, AArch64::W6, AArch64::W7 }, |
2956 | { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, |
2957 | AArch64::X5, AArch64::X6, AArch64::X7 }, |
2958 | { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, |
2959 | AArch64::H5, AArch64::H6, AArch64::H7 }, |
2960 | { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, |
2961 | AArch64::S5, AArch64::S6, AArch64::S7 }, |
2962 | { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, |
2963 | AArch64::D5, AArch64::D6, AArch64::D7 }, |
2964 | { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, |
2965 | AArch64::Q5, AArch64::Q6, AArch64::Q7 } |
2966 | }; |
2967 | |
2968 | unsigned GPRIdx = 0; |
2969 | unsigned FPRIdx = 0; |
2970 | for (auto const &Arg : F->args()) { |
2971 | MVT VT = TLI.getSimpleValueType(DL, Ty: Arg.getType()); |
2972 | unsigned SrcReg; |
2973 | const TargetRegisterClass *RC; |
2974 | if (VT >= MVT::i1 && VT <= MVT::i32) { |
2975 | SrcReg = Registers[0][GPRIdx++]; |
2976 | RC = &AArch64::GPR32RegClass; |
2977 | VT = MVT::i32; |
2978 | } else if (VT == MVT::i64) { |
2979 | SrcReg = Registers[1][GPRIdx++]; |
2980 | RC = &AArch64::GPR64RegClass; |
2981 | } else if (VT == MVT::f16 || VT == MVT::bf16) { |
2982 | SrcReg = Registers[2][FPRIdx++]; |
2983 | RC = &AArch64::FPR16RegClass; |
2984 | } else if (VT == MVT::f32) { |
2985 | SrcReg = Registers[3][FPRIdx++]; |
2986 | RC = &AArch64::FPR32RegClass; |
2987 | } else if ((VT == MVT::f64) || VT.is64BitVector()) { |
2988 | SrcReg = Registers[4][FPRIdx++]; |
2989 | RC = &AArch64::FPR64RegClass; |
2990 | } else if (VT.is128BitVector()) { |
2991 | SrcReg = Registers[5][FPRIdx++]; |
2992 | RC = &AArch64::FPR128RegClass; |
2993 | } else |
2994 | llvm_unreachable("Unexpected value type." ); |
2995 | |
2996 | Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC); |
2997 | // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. |
2998 | // Without this, EmitLiveInCopies may eliminate the livein if its only |
2999 | // use is a bitcast (which isn't turned into an instruction). |
3000 | Register ResultReg = createResultReg(RC); |
3001 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3002 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
3003 | .addReg(RegNo: DstReg, flags: getKillRegState(B: true)); |
3004 | updateValueMap(I: &Arg, Reg: ResultReg); |
3005 | } |
3006 | return true; |
3007 | } |
3008 | |
3009 | bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, |
3010 | SmallVectorImpl<MVT> &OutVTs, |
3011 | unsigned &NumBytes) { |
3012 | CallingConv::ID CC = CLI.CallConv; |
3013 | SmallVector<CCValAssign, 16> ArgLocs; |
3014 | CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); |
3015 | CCInfo.AnalyzeCallOperands(ArgVTs&: OutVTs, Flags&: CLI.OutFlags, Fn: CCAssignFnForCall(CC)); |
3016 | |
3017 | // Get a count of how many bytes are to be pushed on the stack. |
3018 | NumBytes = CCInfo.getStackSize(); |
3019 | |
3020 | // Issue CALLSEQ_START |
3021 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
3022 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackDown)) |
3023 | .addImm(Val: NumBytes).addImm(Val: 0); |
3024 | |
3025 | // Process the args. |
3026 | for (CCValAssign &VA : ArgLocs) { |
3027 | const Value *ArgVal = CLI.OutVals[VA.getValNo()]; |
3028 | MVT ArgVT = OutVTs[VA.getValNo()]; |
3029 | |
3030 | Register ArgReg = getRegForValue(V: ArgVal); |
3031 | if (!ArgReg) |
3032 | return false; |
3033 | |
3034 | // Handle arg promotion: SExt, ZExt, AExt. |
3035 | switch (VA.getLocInfo()) { |
3036 | case CCValAssign::Full: |
3037 | break; |
3038 | case CCValAssign::SExt: { |
3039 | MVT DestVT = VA.getLocVT(); |
3040 | MVT SrcVT = ArgVT; |
3041 | ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/false); |
3042 | if (!ArgReg) |
3043 | return false; |
3044 | break; |
3045 | } |
3046 | case CCValAssign::AExt: |
3047 | // Intentional fall-through. |
3048 | case CCValAssign::ZExt: { |
3049 | MVT DestVT = VA.getLocVT(); |
3050 | MVT SrcVT = ArgVT; |
3051 | ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/true); |
3052 | if (!ArgReg) |
3053 | return false; |
3054 | break; |
3055 | } |
3056 | default: |
3057 | llvm_unreachable("Unknown arg promotion!" ); |
3058 | } |
3059 | |
3060 | // Now copy/store arg to correct locations. |
3061 | if (VA.isRegLoc() && !VA.needsCustom()) { |
3062 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3063 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: ArgReg); |
3064 | CLI.OutRegs.push_back(Elt: VA.getLocReg()); |
3065 | } else if (VA.needsCustom()) { |
3066 | // FIXME: Handle custom args. |
3067 | return false; |
3068 | } else { |
3069 | assert(VA.isMemLoc() && "Assuming store on stack." ); |
3070 | |
3071 | // Don't emit stores for undef values. |
3072 | if (isa<UndefValue>(Val: ArgVal)) |
3073 | continue; |
3074 | |
3075 | // Need to store on the stack. |
3076 | unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; |
3077 | |
3078 | unsigned BEAlign = 0; |
3079 | if (ArgSize < 8 && !Subtarget->isLittleEndian()) |
3080 | BEAlign = 8 - ArgSize; |
3081 | |
3082 | Address Addr; |
3083 | Addr.setKind(Address::RegBase); |
3084 | Addr.setReg(AArch64::SP); |
3085 | Addr.setOffset(VA.getLocMemOffset() + BEAlign); |
3086 | |
3087 | Align Alignment = DL.getABITypeAlign(Ty: ArgVal->getType()); |
3088 | MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( |
3089 | PtrInfo: MachinePointerInfo::getStack(MF&: *FuncInfo.MF, Offset: Addr.getOffset()), |
3090 | F: MachineMemOperand::MOStore, Size: ArgVT.getStoreSize(), BaseAlignment: Alignment); |
3091 | |
3092 | if (!emitStore(VT: ArgVT, SrcReg: ArgReg, Addr, MMO)) |
3093 | return false; |
3094 | } |
3095 | } |
3096 | return true; |
3097 | } |
3098 | |
3099 | bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { |
3100 | CallingConv::ID CC = CLI.CallConv; |
3101 | |
3102 | // Issue CALLSEQ_END |
3103 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
3104 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackUp)) |
3105 | .addImm(Val: NumBytes).addImm(Val: 0); |
3106 | |
3107 | // Now the return values. |
3108 | SmallVector<CCValAssign, 16> RVLocs; |
3109 | CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); |
3110 | CCInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: CCAssignFnForCall(CC)); |
3111 | |
3112 | Register ResultReg = FuncInfo.CreateRegs(Ty: CLI.RetTy); |
3113 | for (unsigned i = 0; i != RVLocs.size(); ++i) { |
3114 | CCValAssign &VA = RVLocs[i]; |
3115 | MVT CopyVT = VA.getValVT(); |
3116 | unsigned CopyReg = ResultReg + i; |
3117 | |
3118 | // TODO: Handle big-endian results |
3119 | if (CopyVT.isVector() && !Subtarget->isLittleEndian()) |
3120 | return false; |
3121 | |
3122 | // Copy result out of their specified physreg. |
3123 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY), |
3124 | DestReg: CopyReg) |
3125 | .addReg(RegNo: VA.getLocReg()); |
3126 | CLI.InRegs.push_back(Elt: VA.getLocReg()); |
3127 | } |
3128 | |
3129 | CLI.ResultReg = ResultReg; |
3130 | CLI.NumResultRegs = RVLocs.size(); |
3131 | |
3132 | return true; |
3133 | } |
3134 | |
3135 | bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { |
3136 | CallingConv::ID CC = CLI.CallConv; |
3137 | bool IsTailCall = CLI.IsTailCall; |
3138 | bool IsVarArg = CLI.IsVarArg; |
3139 | const Value *Callee = CLI.Callee; |
3140 | MCSymbol *Symbol = CLI.Symbol; |
3141 | |
3142 | if (!Callee && !Symbol) |
3143 | return false; |
3144 | |
3145 | // Allow SelectionDAG isel to handle calls to functions like setjmp that need |
3146 | // a bti instruction following the call. |
3147 | if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && |
3148 | !Subtarget->noBTIAtReturnTwice() && |
3149 | MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
3150 | return false; |
3151 | |
3152 | // Allow SelectionDAG isel to handle indirect calls with KCFI checks. |
3153 | if (CLI.CB && CLI.CB->isIndirectCall() && |
3154 | CLI.CB->getOperandBundle(ID: LLVMContext::OB_kcfi)) |
3155 | return false; |
3156 | |
3157 | // Allow SelectionDAG isel to handle tail calls. |
3158 | if (IsTailCall) |
3159 | return false; |
3160 | |
3161 | // FIXME: we could and should support this, but for now correctness at -O0 is |
3162 | // more important. |
3163 | if (Subtarget->isTargetILP32()) |
3164 | return false; |
3165 | |
3166 | CodeModel::Model CM = TM.getCodeModel(); |
3167 | // Only support the small-addressing and large code models. |
3168 | if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) |
3169 | return false; |
3170 | |
3171 | // FIXME: Add large code model support for ELF. |
3172 | if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) |
3173 | return false; |
3174 | |
3175 | // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind |
3176 | // attribute. Check "RtLibUseGOT" instead. |
3177 | if (MF->getFunction().getParent()->getRtLibUseGOT()) |
3178 | return false; |
3179 | |
3180 | // Let SDISel handle vararg functions. |
3181 | if (IsVarArg) |
3182 | return false; |
3183 | |
3184 | if (Subtarget->isWindowsArm64EC()) |
3185 | return false; |
3186 | |
3187 | for (auto Flag : CLI.OutFlags) |
3188 | if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || |
3189 | Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) |
3190 | return false; |
3191 | |
3192 | // Set up the argument vectors. |
3193 | SmallVector<MVT, 16> OutVTs; |
3194 | OutVTs.reserve(N: CLI.OutVals.size()); |
3195 | |
3196 | for (auto *Val : CLI.OutVals) { |
3197 | MVT VT; |
3198 | if (!isTypeLegal(Val->getType(), VT) && |
3199 | !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) |
3200 | return false; |
3201 | |
3202 | // We don't handle vector parameters yet. |
3203 | if (VT.isVector() || VT.getSizeInBits() > 64) |
3204 | return false; |
3205 | |
3206 | OutVTs.push_back(Elt: VT); |
3207 | } |
3208 | |
3209 | Address Addr; |
3210 | if (Callee && !computeCallAddress(V: Callee, Addr)) |
3211 | return false; |
3212 | |
3213 | // The weak function target may be zero; in that case we must use indirect |
3214 | // addressing via a stub on windows as it may be out of range for a |
3215 | // PC-relative jump. |
3216 | if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && |
3217 | Addr.getGlobalValue()->hasExternalWeakLinkage()) |
3218 | return false; |
3219 | |
3220 | // Handle the arguments now that we've gotten them. |
3221 | unsigned NumBytes; |
3222 | if (!processCallArgs(CLI, OutVTs, NumBytes)) |
3223 | return false; |
3224 | |
3225 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3226 | if (RegInfo->isAnyArgRegReserved(MF: *MF)) |
3227 | RegInfo->emitReservedArgRegCallError(MF: *MF); |
3228 | |
3229 | // Issue the call. |
3230 | MachineInstrBuilder MIB; |
3231 | if (Subtarget->useSmallAddressing()) { |
3232 | const MCInstrDesc &II = |
3233 | TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); |
3234 | MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II); |
3235 | if (Symbol) |
3236 | MIB.addSym(Sym: Symbol, TargetFlags: 0); |
3237 | else if (Addr.getGlobalValue()) |
3238 | MIB.addGlobalAddress(GV: Addr.getGlobalValue(), Offset: 0, TargetFlags: 0); |
3239 | else if (Addr.getReg()) { |
3240 | Register Reg = constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: 0); |
3241 | MIB.addReg(RegNo: Reg); |
3242 | } else |
3243 | return false; |
3244 | } else { |
3245 | unsigned CallReg = 0; |
3246 | if (Symbol) { |
3247 | Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
3248 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), |
3249 | ADRPReg) |
3250 | .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); |
3251 | |
3252 | CallReg = createResultReg(&AArch64::GPR64RegClass); |
3253 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
3254 | TII.get(AArch64::LDRXui), CallReg) |
3255 | .addReg(ADRPReg) |
3256 | .addSym(Symbol, |
3257 | AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
3258 | } else if (Addr.getGlobalValue()) |
3259 | CallReg = materializeGV(GV: Addr.getGlobalValue()); |
3260 | else if (Addr.getReg()) |
3261 | CallReg = Addr.getReg(); |
3262 | |
3263 | if (!CallReg) |
3264 | return false; |
3265 | |
3266 | const MCInstrDesc &II = TII.get(Opcode: getBLRCallOpcode(MF: *MF)); |
3267 | CallReg = constrainOperandRegClass(II, Op: CallReg, OpNum: 0); |
3268 | MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: CallReg); |
3269 | } |
3270 | |
3271 | // Add implicit physical register uses to the call. |
3272 | for (auto Reg : CLI.OutRegs) |
3273 | MIB.addReg(RegNo: Reg, flags: RegState::Implicit); |
3274 | |
3275 | // Add a register mask with the call-preserved registers. |
3276 | // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
3277 | MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC)); |
3278 | |
3279 | CLI.Call = MIB; |
3280 | |
3281 | // Finish off the call including any return values. |
3282 | return finishCall(CLI, NumBytes); |
3283 | } |
3284 | |
3285 | bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { |
3286 | if (Alignment) |
3287 | return Len / Alignment->value() <= 4; |
3288 | else |
3289 | return Len < 32; |
3290 | } |
3291 | |
3292 | bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, |
3293 | uint64_t Len, MaybeAlign Alignment) { |
3294 | // Make sure we don't bloat code by inlining very large memcpy's. |
3295 | if (!isMemCpySmall(Len, Alignment)) |
3296 | return false; |
3297 | |
3298 | int64_t UnscaledOffset = 0; |
3299 | Address OrigDest = Dest; |
3300 | Address OrigSrc = Src; |
3301 | |
3302 | while (Len) { |
3303 | MVT VT; |
3304 | if (!Alignment || *Alignment >= 8) { |
3305 | if (Len >= 8) |
3306 | VT = MVT::i64; |
3307 | else if (Len >= 4) |
3308 | VT = MVT::i32; |
3309 | else if (Len >= 2) |
3310 | VT = MVT::i16; |
3311 | else { |
3312 | VT = MVT::i8; |
3313 | } |
3314 | } else { |
3315 | assert(Alignment && "Alignment is set in this branch" ); |
3316 | // Bound based on alignment. |
3317 | if (Len >= 4 && *Alignment == 4) |
3318 | VT = MVT::i32; |
3319 | else if (Len >= 2 && *Alignment == 2) |
3320 | VT = MVT::i16; |
3321 | else { |
3322 | VT = MVT::i8; |
3323 | } |
3324 | } |
3325 | |
3326 | unsigned ResultReg = emitLoad(VT, RetVT: VT, Addr: Src); |
3327 | if (!ResultReg) |
3328 | return false; |
3329 | |
3330 | if (!emitStore(VT, SrcReg: ResultReg, Addr: Dest)) |
3331 | return false; |
3332 | |
3333 | int64_t Size = VT.getSizeInBits() / 8; |
3334 | Len -= Size; |
3335 | UnscaledOffset += Size; |
3336 | |
3337 | // We need to recompute the unscaled offset for each iteration. |
3338 | Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); |
3339 | Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); |
3340 | } |
3341 | |
3342 | return true; |
3343 | } |
3344 | |
3345 | /// Check if it is possible to fold the condition from the XALU intrinsic |
3346 | /// into the user. The condition code will only be updated on success. |
3347 | bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, |
3348 | const Instruction *I, |
3349 | const Value *Cond) { |
3350 | if (!isa<ExtractValueInst>(Val: Cond)) |
3351 | return false; |
3352 | |
3353 | const auto *EV = cast<ExtractValueInst>(Val: Cond); |
3354 | if (!isa<IntrinsicInst>(Val: EV->getAggregateOperand())) |
3355 | return false; |
3356 | |
3357 | const auto *II = cast<IntrinsicInst>(Val: EV->getAggregateOperand()); |
3358 | MVT RetVT; |
3359 | const Function *Callee = II->getCalledFunction(); |
3360 | Type *RetTy = |
3361 | cast<StructType>(Val: Callee->getReturnType())->getTypeAtIndex(N: 0U); |
3362 | if (!isTypeLegal(Ty: RetTy, VT&: RetVT)) |
3363 | return false; |
3364 | |
3365 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
3366 | return false; |
3367 | |
3368 | const Value *LHS = II->getArgOperand(i: 0); |
3369 | const Value *RHS = II->getArgOperand(i: 1); |
3370 | |
3371 | // Canonicalize immediate to the RHS. |
3372 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative()) |
3373 | std::swap(a&: LHS, b&: RHS); |
3374 | |
3375 | // Simplify multiplies. |
3376 | Intrinsic::ID IID = II->getIntrinsicID(); |
3377 | switch (IID) { |
3378 | default: |
3379 | break; |
3380 | case Intrinsic::smul_with_overflow: |
3381 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3382 | if (C->getValue() == 2) |
3383 | IID = Intrinsic::sadd_with_overflow; |
3384 | break; |
3385 | case Intrinsic::umul_with_overflow: |
3386 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3387 | if (C->getValue() == 2) |
3388 | IID = Intrinsic::uadd_with_overflow; |
3389 | break; |
3390 | } |
3391 | |
3392 | AArch64CC::CondCode TmpCC; |
3393 | switch (IID) { |
3394 | default: |
3395 | return false; |
3396 | case Intrinsic::sadd_with_overflow: |
3397 | case Intrinsic::ssub_with_overflow: |
3398 | TmpCC = AArch64CC::VS; |
3399 | break; |
3400 | case Intrinsic::uadd_with_overflow: |
3401 | TmpCC = AArch64CC::HS; |
3402 | break; |
3403 | case Intrinsic::usub_with_overflow: |
3404 | TmpCC = AArch64CC::LO; |
3405 | break; |
3406 | case Intrinsic::smul_with_overflow: |
3407 | case Intrinsic::umul_with_overflow: |
3408 | TmpCC = AArch64CC::NE; |
3409 | break; |
3410 | } |
3411 | |
3412 | // Check if both instructions are in the same basic block. |
3413 | if (!isValueAvailable(V: II)) |
3414 | return false; |
3415 | |
3416 | // Make sure nothing is in the way |
3417 | BasicBlock::const_iterator Start(I); |
3418 | BasicBlock::const_iterator End(II); |
3419 | for (auto Itr = std::prev(x: Start); Itr != End; --Itr) { |
3420 | // We only expect extractvalue instructions between the intrinsic and the |
3421 | // instruction to be selected. |
3422 | if (!isa<ExtractValueInst>(Val: Itr)) |
3423 | return false; |
3424 | |
3425 | // Check that the extractvalue operand comes from the intrinsic. |
3426 | const auto *EVI = cast<ExtractValueInst>(Val&: Itr); |
3427 | if (EVI->getAggregateOperand() != II) |
3428 | return false; |
3429 | } |
3430 | |
3431 | CC = TmpCC; |
3432 | return true; |
3433 | } |
3434 | |
3435 | bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { |
3436 | // FIXME: Handle more intrinsics. |
3437 | switch (II->getIntrinsicID()) { |
3438 | default: return false; |
3439 | case Intrinsic::frameaddress: { |
3440 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3441 | MFI.setFrameAddressIsTaken(true); |
3442 | |
3443 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3444 | Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF)); |
3445 | Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3446 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3447 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: SrcReg).addReg(RegNo: FramePtr); |
3448 | // Recursively load frame address |
3449 | // ldr x0, [fp] |
3450 | // ldr x0, [x0] |
3451 | // ldr x0, [x0] |
3452 | // ... |
3453 | unsigned DestReg; |
3454 | unsigned Depth = cast<ConstantInt>(Val: II->getOperand(i_nocapture: 0))->getZExtValue(); |
3455 | while (Depth--) { |
3456 | DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, |
3457 | SrcReg, 0); |
3458 | assert(DestReg && "Unexpected LDR instruction emission failure." ); |
3459 | SrcReg = DestReg; |
3460 | } |
3461 | |
3462 | updateValueMap(I: II, Reg: SrcReg); |
3463 | return true; |
3464 | } |
3465 | case Intrinsic::sponentry: { |
3466 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3467 | |
3468 | // SP = FP + Fixed Object + 16 |
3469 | int FI = MFI.CreateFixedObject(Size: 4, SPOffset: 0, IsImmutable: false); |
3470 | Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
3471 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
3472 | TII.get(AArch64::ADDXri), ResultReg) |
3473 | .addFrameIndex(FI) |
3474 | .addImm(0) |
3475 | .addImm(0); |
3476 | |
3477 | updateValueMap(I: II, Reg: ResultReg); |
3478 | return true; |
3479 | } |
3480 | case Intrinsic::memcpy: |
3481 | case Intrinsic::memmove: { |
3482 | const auto *MTI = cast<MemTransferInst>(Val: II); |
3483 | // Don't handle volatile. |
3484 | if (MTI->isVolatile()) |
3485 | return false; |
3486 | |
3487 | // Disable inlining for memmove before calls to ComputeAddress. Otherwise, |
3488 | // we would emit dead code because we don't currently handle memmoves. |
3489 | bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); |
3490 | if (isa<ConstantInt>(Val: MTI->getLength()) && IsMemCpy) { |
3491 | // Small memcpy's are common enough that we want to do them without a call |
3492 | // if possible. |
3493 | uint64_t Len = cast<ConstantInt>(Val: MTI->getLength())->getZExtValue(); |
3494 | MaybeAlign Alignment; |
3495 | if (MTI->getDestAlign() || MTI->getSourceAlign()) |
3496 | Alignment = std::min(a: MTI->getDestAlign().valueOrOne(), |
3497 | b: MTI->getSourceAlign().valueOrOne()); |
3498 | if (isMemCpySmall(Len, Alignment)) { |
3499 | Address Dest, Src; |
3500 | if (!computeAddress(Obj: MTI->getRawDest(), Addr&: Dest) || |
3501 | !computeAddress(Obj: MTI->getRawSource(), Addr&: Src)) |
3502 | return false; |
3503 | if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) |
3504 | return true; |
3505 | } |
3506 | } |
3507 | |
3508 | if (!MTI->getLength()->getType()->isIntegerTy(Bitwidth: 64)) |
3509 | return false; |
3510 | |
3511 | if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) |
3512 | // Fast instruction selection doesn't support the special |
3513 | // address spaces. |
3514 | return false; |
3515 | |
3516 | const char *IntrMemName = isa<MemCpyInst>(Val: II) ? "memcpy" : "memmove" ; |
3517 | return lowerCallTo(CI: II, SymName: IntrMemName, NumArgs: II->arg_size() - 1); |
3518 | } |
3519 | case Intrinsic::memset: { |
3520 | const MemSetInst *MSI = cast<MemSetInst>(Val: II); |
3521 | // Don't handle volatile. |
3522 | if (MSI->isVolatile()) |
3523 | return false; |
3524 | |
3525 | if (!MSI->getLength()->getType()->isIntegerTy(Bitwidth: 64)) |
3526 | return false; |
3527 | |
3528 | if (MSI->getDestAddressSpace() > 255) |
3529 | // Fast instruction selection doesn't support the special |
3530 | // address spaces. |
3531 | return false; |
3532 | |
3533 | return lowerCallTo(CI: II, SymName: "memset" , NumArgs: II->arg_size() - 1); |
3534 | } |
3535 | case Intrinsic::sin: |
3536 | case Intrinsic::cos: |
3537 | case Intrinsic::pow: { |
3538 | MVT RetVT; |
3539 | if (!isTypeLegal(Ty: II->getType(), VT&: RetVT)) |
3540 | return false; |
3541 | |
3542 | if (RetVT != MVT::f32 && RetVT != MVT::f64) |
3543 | return false; |
3544 | |
3545 | static const RTLIB::Libcall LibCallTable[3][2] = { |
3546 | { RTLIB::SIN_F32, RTLIB::SIN_F64 }, |
3547 | { RTLIB::COS_F32, RTLIB::COS_F64 }, |
3548 | { RTLIB::POW_F32, RTLIB::POW_F64 } |
3549 | }; |
3550 | RTLIB::Libcall LC; |
3551 | bool Is64Bit = RetVT == MVT::f64; |
3552 | switch (II->getIntrinsicID()) { |
3553 | default: |
3554 | llvm_unreachable("Unexpected intrinsic." ); |
3555 | case Intrinsic::sin: |
3556 | LC = LibCallTable[0][Is64Bit]; |
3557 | break; |
3558 | case Intrinsic::cos: |
3559 | LC = LibCallTable[1][Is64Bit]; |
3560 | break; |
3561 | case Intrinsic::pow: |
3562 | LC = LibCallTable[2][Is64Bit]; |
3563 | break; |
3564 | } |
3565 | |
3566 | ArgListTy Args; |
3567 | Args.reserve(n: II->arg_size()); |
3568 | |
3569 | // Populate the argument list. |
3570 | for (auto &Arg : II->args()) { |
3571 | ArgListEntry Entry; |
3572 | Entry.Val = Arg; |
3573 | Entry.Ty = Arg->getType(); |
3574 | Args.push_back(x: Entry); |
3575 | } |
3576 | |
3577 | CallLoweringInfo CLI; |
3578 | MCContext &Ctx = MF->getContext(); |
3579 | CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: II->getType(), |
3580 | Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args)); |
3581 | if (!lowerCallTo(CLI)) |
3582 | return false; |
3583 | updateValueMap(I: II, Reg: CLI.ResultReg); |
3584 | return true; |
3585 | } |
3586 | case Intrinsic::fabs: { |
3587 | MVT VT; |
3588 | if (!isTypeLegal(Ty: II->getType(), VT)) |
3589 | return false; |
3590 | |
3591 | unsigned Opc; |
3592 | switch (VT.SimpleTy) { |
3593 | default: |
3594 | return false; |
3595 | case MVT::f32: |
3596 | Opc = AArch64::FABSSr; |
3597 | break; |
3598 | case MVT::f64: |
3599 | Opc = AArch64::FABSDr; |
3600 | break; |
3601 | } |
3602 | Register SrcReg = getRegForValue(V: II->getOperand(i_nocapture: 0)); |
3603 | if (!SrcReg) |
3604 | return false; |
3605 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
3606 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
3607 | .addReg(RegNo: SrcReg); |
3608 | updateValueMap(I: II, Reg: ResultReg); |
3609 | return true; |
3610 | } |
3611 | case Intrinsic::trap: |
3612 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) |
3613 | .addImm(1); |
3614 | return true; |
3615 | case Intrinsic::debugtrap: |
3616 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) |
3617 | .addImm(0xF000); |
3618 | return true; |
3619 | |
3620 | case Intrinsic::sqrt: { |
3621 | Type *RetTy = II->getCalledFunction()->getReturnType(); |
3622 | |
3623 | MVT VT; |
3624 | if (!isTypeLegal(Ty: RetTy, VT)) |
3625 | return false; |
3626 | |
3627 | Register Op0Reg = getRegForValue(V: II->getOperand(i_nocapture: 0)); |
3628 | if (!Op0Reg) |
3629 | return false; |
3630 | |
3631 | unsigned ResultReg = fastEmit_r(VT, RetVT: VT, Opcode: ISD::FSQRT, Op0: Op0Reg); |
3632 | if (!ResultReg) |
3633 | return false; |
3634 | |
3635 | updateValueMap(I: II, Reg: ResultReg); |
3636 | return true; |
3637 | } |
3638 | case Intrinsic::sadd_with_overflow: |
3639 | case Intrinsic::uadd_with_overflow: |
3640 | case Intrinsic::ssub_with_overflow: |
3641 | case Intrinsic::usub_with_overflow: |
3642 | case Intrinsic::smul_with_overflow: |
3643 | case Intrinsic::umul_with_overflow: { |
3644 | // This implements the basic lowering of the xalu with overflow intrinsics. |
3645 | const Function *Callee = II->getCalledFunction(); |
3646 | auto *Ty = cast<StructType>(Val: Callee->getReturnType()); |
3647 | Type *RetTy = Ty->getTypeAtIndex(N: 0U); |
3648 | |
3649 | MVT VT; |
3650 | if (!isTypeLegal(Ty: RetTy, VT)) |
3651 | return false; |
3652 | |
3653 | if (VT != MVT::i32 && VT != MVT::i64) |
3654 | return false; |
3655 | |
3656 | const Value *LHS = II->getArgOperand(i: 0); |
3657 | const Value *RHS = II->getArgOperand(i: 1); |
3658 | // Canonicalize immediate to the RHS. |
3659 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative()) |
3660 | std::swap(a&: LHS, b&: RHS); |
3661 | |
3662 | // Simplify multiplies. |
3663 | Intrinsic::ID IID = II->getIntrinsicID(); |
3664 | switch (IID) { |
3665 | default: |
3666 | break; |
3667 | case Intrinsic::smul_with_overflow: |
3668 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3669 | if (C->getValue() == 2) { |
3670 | IID = Intrinsic::sadd_with_overflow; |
3671 | RHS = LHS; |
3672 | } |
3673 | break; |
3674 | case Intrinsic::umul_with_overflow: |
3675 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3676 | if (C->getValue() == 2) { |
3677 | IID = Intrinsic::uadd_with_overflow; |
3678 | RHS = LHS; |
3679 | } |
3680 | break; |
3681 | } |
3682 | |
3683 | unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; |
3684 | AArch64CC::CondCode CC = AArch64CC::Invalid; |
3685 | switch (IID) { |
3686 | default: llvm_unreachable("Unexpected intrinsic!" ); |
3687 | case Intrinsic::sadd_with_overflow: |
3688 | ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3689 | CC = AArch64CC::VS; |
3690 | break; |
3691 | case Intrinsic::uadd_with_overflow: |
3692 | ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3693 | CC = AArch64CC::HS; |
3694 | break; |
3695 | case Intrinsic::ssub_with_overflow: |
3696 | ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3697 | CC = AArch64CC::VS; |
3698 | break; |
3699 | case Intrinsic::usub_with_overflow: |
3700 | ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3701 | CC = AArch64CC::LO; |
3702 | break; |
3703 | case Intrinsic::smul_with_overflow: { |
3704 | CC = AArch64CC::NE; |
3705 | Register LHSReg = getRegForValue(V: LHS); |
3706 | if (!LHSReg) |
3707 | return false; |
3708 | |
3709 | Register RHSReg = getRegForValue(V: RHS); |
3710 | if (!RHSReg) |
3711 | return false; |
3712 | |
3713 | if (VT == MVT::i32) { |
3714 | MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); |
3715 | Register MulSubReg = |
3716 | fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); |
3717 | // cmp xreg, wreg, sxtw |
3718 | emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, |
3719 | AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, |
3720 | /*WantResult=*/false); |
3721 | MulReg = MulSubReg; |
3722 | } else { |
3723 | assert(VT == MVT::i64 && "Unexpected value type." ); |
3724 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3725 | // reused in the next instruction. |
3726 | MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg); |
3727 | unsigned SMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHS, Op0: LHSReg, Op1: RHSReg); |
3728 | emitSubs_rs(RetVT: VT, LHSReg: SMULHReg, RHSReg: MulReg, ShiftType: AArch64_AM::ASR, ShiftImm: 63, |
3729 | /*WantResult=*/false); |
3730 | } |
3731 | break; |
3732 | } |
3733 | case Intrinsic::umul_with_overflow: { |
3734 | CC = AArch64CC::NE; |
3735 | Register LHSReg = getRegForValue(V: LHS); |
3736 | if (!LHSReg) |
3737 | return false; |
3738 | |
3739 | Register RHSReg = getRegForValue(V: RHS); |
3740 | if (!RHSReg) |
3741 | return false; |
3742 | |
3743 | if (VT == MVT::i32) { |
3744 | MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); |
3745 | // tst xreg, #0xffffffff00000000 |
3746 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
3747 | TII.get(AArch64::ANDSXri), AArch64::XZR) |
3748 | .addReg(MulReg) |
3749 | .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); |
3750 | MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); |
3751 | } else { |
3752 | assert(VT == MVT::i64 && "Unexpected value type." ); |
3753 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3754 | // reused in the next instruction. |
3755 | MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg); |
3756 | unsigned UMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHU, Op0: LHSReg, Op1: RHSReg); |
3757 | emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); |
3758 | } |
3759 | break; |
3760 | } |
3761 | } |
3762 | |
3763 | if (MulReg) { |
3764 | ResultReg1 = createResultReg(RC: TLI.getRegClassFor(VT)); |
3765 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3766 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg1).addReg(RegNo: MulReg); |
3767 | } |
3768 | |
3769 | if (!ResultReg1) |
3770 | return false; |
3771 | |
3772 | ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, |
3773 | AArch64::WZR, AArch64::WZR, |
3774 | getInvertedCondCode(CC)); |
3775 | (void)ResultReg2; |
3776 | assert((ResultReg1 + 1) == ResultReg2 && |
3777 | "Nonconsecutive result registers." ); |
3778 | updateValueMap(I: II, Reg: ResultReg1, NumRegs: 2); |
3779 | return true; |
3780 | } |
3781 | case Intrinsic::aarch64_crc32b: |
3782 | case Intrinsic::aarch64_crc32h: |
3783 | case Intrinsic::aarch64_crc32w: |
3784 | case Intrinsic::aarch64_crc32x: |
3785 | case Intrinsic::aarch64_crc32cb: |
3786 | case Intrinsic::aarch64_crc32ch: |
3787 | case Intrinsic::aarch64_crc32cw: |
3788 | case Intrinsic::aarch64_crc32cx: { |
3789 | if (!Subtarget->hasCRC()) |
3790 | return false; |
3791 | |
3792 | unsigned Opc; |
3793 | switch (II->getIntrinsicID()) { |
3794 | default: |
3795 | llvm_unreachable("Unexpected intrinsic!" ); |
3796 | case Intrinsic::aarch64_crc32b: |
3797 | Opc = AArch64::CRC32Brr; |
3798 | break; |
3799 | case Intrinsic::aarch64_crc32h: |
3800 | Opc = AArch64::CRC32Hrr; |
3801 | break; |
3802 | case Intrinsic::aarch64_crc32w: |
3803 | Opc = AArch64::CRC32Wrr; |
3804 | break; |
3805 | case Intrinsic::aarch64_crc32x: |
3806 | Opc = AArch64::CRC32Xrr; |
3807 | break; |
3808 | case Intrinsic::aarch64_crc32cb: |
3809 | Opc = AArch64::CRC32CBrr; |
3810 | break; |
3811 | case Intrinsic::aarch64_crc32ch: |
3812 | Opc = AArch64::CRC32CHrr; |
3813 | break; |
3814 | case Intrinsic::aarch64_crc32cw: |
3815 | Opc = AArch64::CRC32CWrr; |
3816 | break; |
3817 | case Intrinsic::aarch64_crc32cx: |
3818 | Opc = AArch64::CRC32CXrr; |
3819 | break; |
3820 | } |
3821 | |
3822 | Register LHSReg = getRegForValue(V: II->getArgOperand(i: 0)); |
3823 | Register RHSReg = getRegForValue(V: II->getArgOperand(i: 1)); |
3824 | if (!LHSReg || !RHSReg) |
3825 | return false; |
3826 | |
3827 | Register ResultReg = |
3828 | fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); |
3829 | updateValueMap(I: II, Reg: ResultReg); |
3830 | return true; |
3831 | } |
3832 | } |
3833 | return false; |
3834 | } |
3835 | |
3836 | bool AArch64FastISel::selectRet(const Instruction *I) { |
3837 | const ReturnInst *Ret = cast<ReturnInst>(Val: I); |
3838 | const Function &F = *I->getParent()->getParent(); |
3839 | |
3840 | if (!FuncInfo.CanLowerReturn) |
3841 | return false; |
3842 | |
3843 | if (F.isVarArg()) |
3844 | return false; |
3845 | |
3846 | if (TLI.supportSwiftError() && |
3847 | F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) |
3848 | return false; |
3849 | |
3850 | if (TLI.supportSplitCSR(MF: FuncInfo.MF)) |
3851 | return false; |
3852 | |
3853 | // Build a list of return value registers. |
3854 | SmallVector<unsigned, 4> RetRegs; |
3855 | |
3856 | if (Ret->getNumOperands() > 0) { |
3857 | CallingConv::ID CC = F.getCallingConv(); |
3858 | SmallVector<ISD::OutputArg, 4> Outs; |
3859 | GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL); |
3860 | |
3861 | // Analyze operands of the call, assigning locations to each operand. |
3862 | SmallVector<CCValAssign, 16> ValLocs; |
3863 | CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); |
3864 | CCInfo.AnalyzeReturn(Outs, Fn: RetCC_AArch64_AAPCS); |
3865 | |
3866 | // Only handle a single return value for now. |
3867 | if (ValLocs.size() != 1) |
3868 | return false; |
3869 | |
3870 | CCValAssign &VA = ValLocs[0]; |
3871 | const Value *RV = Ret->getOperand(i_nocapture: 0); |
3872 | |
3873 | // Don't bother handling odd stuff for now. |
3874 | if ((VA.getLocInfo() != CCValAssign::Full) && |
3875 | (VA.getLocInfo() != CCValAssign::BCvt)) |
3876 | return false; |
3877 | |
3878 | // Only handle register returns for now. |
3879 | if (!VA.isRegLoc()) |
3880 | return false; |
3881 | |
3882 | Register Reg = getRegForValue(V: RV); |
3883 | if (Reg == 0) |
3884 | return false; |
3885 | |
3886 | unsigned SrcReg = Reg + VA.getValNo(); |
3887 | Register DestReg = VA.getLocReg(); |
3888 | // Avoid a cross-class copy. This is very unlikely. |
3889 | if (!MRI.getRegClass(Reg: SrcReg)->contains(Reg: DestReg)) |
3890 | return false; |
3891 | |
3892 | EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType()); |
3893 | if (!RVEVT.isSimple()) |
3894 | return false; |
3895 | |
3896 | // Vectors (of > 1 lane) in big endian need tricky handling. |
3897 | if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && |
3898 | !Subtarget->isLittleEndian()) |
3899 | return false; |
3900 | |
3901 | MVT RVVT = RVEVT.getSimpleVT(); |
3902 | if (RVVT == MVT::f128) |
3903 | return false; |
3904 | |
3905 | MVT DestVT = VA.getValVT(); |
3906 | // Special handling for extended integers. |
3907 | if (RVVT != DestVT) { |
3908 | if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) |
3909 | return false; |
3910 | |
3911 | if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) |
3912 | return false; |
3913 | |
3914 | bool IsZExt = Outs[0].Flags.isZExt(); |
3915 | SrcReg = emitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: IsZExt); |
3916 | if (SrcReg == 0) |
3917 | return false; |
3918 | } |
3919 | |
3920 | // "Callee" (i.e. value producer) zero extends pointers at function |
3921 | // boundary. |
3922 | if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) |
3923 | SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); |
3924 | |
3925 | // Make the copy. |
3926 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3927 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg).addReg(RegNo: SrcReg); |
3928 | |
3929 | // Add register to return instruction. |
3930 | RetRegs.push_back(Elt: VA.getLocReg()); |
3931 | } |
3932 | |
3933 | MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
3934 | TII.get(AArch64::RET_ReallyLR)); |
3935 | for (unsigned RetReg : RetRegs) |
3936 | MIB.addReg(RegNo: RetReg, flags: RegState::Implicit); |
3937 | return true; |
3938 | } |
3939 | |
3940 | bool AArch64FastISel::selectTrunc(const Instruction *I) { |
3941 | Type *DestTy = I->getType(); |
3942 | Value *Op = I->getOperand(i: 0); |
3943 | Type *SrcTy = Op->getType(); |
3944 | |
3945 | EVT SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true); |
3946 | EVT DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true); |
3947 | if (!SrcEVT.isSimple()) |
3948 | return false; |
3949 | if (!DestEVT.isSimple()) |
3950 | return false; |
3951 | |
3952 | MVT SrcVT = SrcEVT.getSimpleVT(); |
3953 | MVT DestVT = DestEVT.getSimpleVT(); |
3954 | |
3955 | if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && |
3956 | SrcVT != MVT::i8) |
3957 | return false; |
3958 | if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && |
3959 | DestVT != MVT::i1) |
3960 | return false; |
3961 | |
3962 | Register SrcReg = getRegForValue(V: Op); |
3963 | if (!SrcReg) |
3964 | return false; |
3965 | |
3966 | // If we're truncating from i64 to a smaller non-legal type then generate an |
3967 | // AND. Otherwise, we know the high bits are undefined and a truncate only |
3968 | // generate a COPY. We cannot mark the source register also as result |
3969 | // register, because this can incorrectly transfer the kill flag onto the |
3970 | // source register. |
3971 | unsigned ResultReg; |
3972 | if (SrcVT == MVT::i64) { |
3973 | uint64_t Mask = 0; |
3974 | switch (DestVT.SimpleTy) { |
3975 | default: |
3976 | // Trunc i64 to i32 is handled by the target-independent fast-isel. |
3977 | return false; |
3978 | case MVT::i1: |
3979 | Mask = 0x1; |
3980 | break; |
3981 | case MVT::i8: |
3982 | Mask = 0xff; |
3983 | break; |
3984 | case MVT::i16: |
3985 | Mask = 0xffff; |
3986 | break; |
3987 | } |
3988 | // Issue an extract_subreg to get the lower 32-bits. |
3989 | Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, |
3990 | AArch64::sub_32); |
3991 | // Create the AND instruction which performs the actual truncation. |
3992 | ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); |
3993 | assert(ResultReg && "Unexpected AND instruction emission failure." ); |
3994 | } else { |
3995 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
3996 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3997 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
3998 | .addReg(RegNo: SrcReg); |
3999 | } |
4000 | |
4001 | updateValueMap(I, Reg: ResultReg); |
4002 | return true; |
4003 | } |
4004 | |
4005 | unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { |
4006 | assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || |
4007 | DestVT == MVT::i64) && |
4008 | "Unexpected value type." ); |
4009 | // Handle i8 and i16 as i32. |
4010 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4011 | DestVT = MVT::i32; |
4012 | |
4013 | if (IsZExt) { |
4014 | unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); |
4015 | assert(ResultReg && "Unexpected AND instruction emission failure." ); |
4016 | if (DestVT == MVT::i64) { |
4017 | // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the |
4018 | // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. |
4019 | Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
4020 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4021 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
4022 | .addImm(0) |
4023 | .addReg(ResultReg) |
4024 | .addImm(AArch64::sub_32); |
4025 | ResultReg = Reg64; |
4026 | } |
4027 | return ResultReg; |
4028 | } else { |
4029 | if (DestVT == MVT::i64) { |
4030 | // FIXME: We're SExt i1 to i64. |
4031 | return 0; |
4032 | } |
4033 | return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, |
4034 | 0, 0); |
4035 | } |
4036 | } |
4037 | |
4038 | unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { |
4039 | unsigned Opc, ZReg; |
4040 | switch (RetVT.SimpleTy) { |
4041 | default: return 0; |
4042 | case MVT::i8: |
4043 | case MVT::i16: |
4044 | case MVT::i32: |
4045 | RetVT = MVT::i32; |
4046 | Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; |
4047 | case MVT::i64: |
4048 | Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; |
4049 | } |
4050 | |
4051 | const TargetRegisterClass *RC = |
4052 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4053 | return fastEmitInst_rrr(MachineInstOpcode: Opc, RC, Op0, Op1, Op2: ZReg); |
4054 | } |
4055 | |
4056 | unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { |
4057 | if (RetVT != MVT::i64) |
4058 | return 0; |
4059 | |
4060 | return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, |
4061 | Op0, Op1, AArch64::XZR); |
4062 | } |
4063 | |
4064 | unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { |
4065 | if (RetVT != MVT::i64) |
4066 | return 0; |
4067 | |
4068 | return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, |
4069 | Op0, Op1, AArch64::XZR); |
4070 | } |
4071 | |
4072 | unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, |
4073 | unsigned Op1Reg) { |
4074 | unsigned Opc = 0; |
4075 | bool NeedTrunc = false; |
4076 | uint64_t Mask = 0; |
4077 | switch (RetVT.SimpleTy) { |
4078 | default: return 0; |
4079 | case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; |
4080 | case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; |
4081 | case MVT::i32: Opc = AArch64::LSLVWr; break; |
4082 | case MVT::i64: Opc = AArch64::LSLVXr; break; |
4083 | } |
4084 | |
4085 | const TargetRegisterClass *RC = |
4086 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4087 | if (NeedTrunc) |
4088 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); |
4089 | |
4090 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4091 | if (NeedTrunc) |
4092 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
4093 | return ResultReg; |
4094 | } |
4095 | |
4096 | unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4097 | uint64_t Shift, bool IsZExt) { |
4098 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4099 | "Unexpected source/return type pair." ); |
4100 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4101 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4102 | "Unexpected source value type." ); |
4103 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4104 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4105 | |
4106 | bool Is64Bit = (RetVT == MVT::i64); |
4107 | unsigned RegSize = Is64Bit ? 64 : 32; |
4108 | unsigned DstBits = RetVT.getSizeInBits(); |
4109 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4110 | const TargetRegisterClass *RC = |
4111 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4112 | |
4113 | // Just emit a copy for "zero" shifts. |
4114 | if (Shift == 0) { |
4115 | if (RetVT == SrcVT) { |
4116 | Register ResultReg = createResultReg(RC); |
4117 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4118 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4119 | .addReg(RegNo: Op0); |
4120 | return ResultReg; |
4121 | } else |
4122 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4123 | } |
4124 | |
4125 | // Don't deal with undefined shifts. |
4126 | if (Shift >= DstBits) |
4127 | return 0; |
4128 | |
4129 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4130 | // {S|U}BFM Wd, Wn, #r, #s |
4131 | // Wd<32+s-r,32-r> = Wn<s:0> when r > s |
4132 | |
4133 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4134 | // %2 = shl i16 %1, 4 |
4135 | // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 |
4136 | // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext |
4137 | // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext |
4138 | // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext |
4139 | |
4140 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4141 | // %2 = shl i16 %1, 8 |
4142 | // Wd<32+7-24,32-24> = Wn<7:0> |
4143 | // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext |
4144 | // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext |
4145 | // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext |
4146 | |
4147 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4148 | // %2 = shl i16 %1, 12 |
4149 | // Wd<32+3-20,32-20> = Wn<3:0> |
4150 | // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext |
4151 | // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext |
4152 | // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext |
4153 | |
4154 | unsigned ImmR = RegSize - Shift; |
4155 | // Limit the width to the length of the source type. |
4156 | unsigned ImmS = std::min<unsigned>(a: SrcBits - 1, b: DstBits - 1 - Shift); |
4157 | static const unsigned OpcTable[2][2] = { |
4158 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4159 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4160 | }; |
4161 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4162 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4163 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4164 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4165 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4166 | .addImm(0) |
4167 | .addReg(Op0) |
4168 | .addImm(AArch64::sub_32); |
4169 | Op0 = TmpReg; |
4170 | } |
4171 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4172 | } |
4173 | |
4174 | unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, |
4175 | unsigned Op1Reg) { |
4176 | unsigned Opc = 0; |
4177 | bool NeedTrunc = false; |
4178 | uint64_t Mask = 0; |
4179 | switch (RetVT.SimpleTy) { |
4180 | default: return 0; |
4181 | case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; |
4182 | case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4183 | case MVT::i32: Opc = AArch64::LSRVWr; break; |
4184 | case MVT::i64: Opc = AArch64::LSRVXr; break; |
4185 | } |
4186 | |
4187 | const TargetRegisterClass *RC = |
4188 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4189 | if (NeedTrunc) { |
4190 | Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); |
4191 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); |
4192 | } |
4193 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4194 | if (NeedTrunc) |
4195 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
4196 | return ResultReg; |
4197 | } |
4198 | |
4199 | unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4200 | uint64_t Shift, bool IsZExt) { |
4201 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4202 | "Unexpected source/return type pair." ); |
4203 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4204 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4205 | "Unexpected source value type." ); |
4206 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4207 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4208 | |
4209 | bool Is64Bit = (RetVT == MVT::i64); |
4210 | unsigned RegSize = Is64Bit ? 64 : 32; |
4211 | unsigned DstBits = RetVT.getSizeInBits(); |
4212 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4213 | const TargetRegisterClass *RC = |
4214 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4215 | |
4216 | // Just emit a copy for "zero" shifts. |
4217 | if (Shift == 0) { |
4218 | if (RetVT == SrcVT) { |
4219 | Register ResultReg = createResultReg(RC); |
4220 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4221 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4222 | .addReg(RegNo: Op0); |
4223 | return ResultReg; |
4224 | } else |
4225 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4226 | } |
4227 | |
4228 | // Don't deal with undefined shifts. |
4229 | if (Shift >= DstBits) |
4230 | return 0; |
4231 | |
4232 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4233 | // {S|U}BFM Wd, Wn, #r, #s |
4234 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4235 | |
4236 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4237 | // %2 = lshr i16 %1, 4 |
4238 | // Wd<7-4:0> = Wn<7:4> |
4239 | // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext |
4240 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4241 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4242 | |
4243 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4244 | // %2 = lshr i16 %1, 8 |
4245 | // Wd<7-7,0> = Wn<7:7> |
4246 | // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext |
4247 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4248 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4249 | |
4250 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4251 | // %2 = lshr i16 %1, 12 |
4252 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4253 | // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext |
4254 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4255 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4256 | |
4257 | if (Shift >= SrcBits && IsZExt) |
4258 | return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT); |
4259 | |
4260 | // It is not possible to fold a sign-extend into the LShr instruction. In this |
4261 | // case emit a sign-extend. |
4262 | if (!IsZExt) { |
4263 | Op0 = emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4264 | if (!Op0) |
4265 | return 0; |
4266 | SrcVT = RetVT; |
4267 | SrcBits = SrcVT.getSizeInBits(); |
4268 | IsZExt = true; |
4269 | } |
4270 | |
4271 | unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift); |
4272 | unsigned ImmS = SrcBits - 1; |
4273 | static const unsigned OpcTable[2][2] = { |
4274 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4275 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4276 | }; |
4277 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4278 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4279 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4280 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4281 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4282 | .addImm(0) |
4283 | .addReg(Op0) |
4284 | .addImm(AArch64::sub_32); |
4285 | Op0 = TmpReg; |
4286 | } |
4287 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4288 | } |
4289 | |
4290 | unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, |
4291 | unsigned Op1Reg) { |
4292 | unsigned Opc = 0; |
4293 | bool NeedTrunc = false; |
4294 | uint64_t Mask = 0; |
4295 | switch (RetVT.SimpleTy) { |
4296 | default: return 0; |
4297 | case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; |
4298 | case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4299 | case MVT::i32: Opc = AArch64::ASRVWr; break; |
4300 | case MVT::i64: Opc = AArch64::ASRVXr; break; |
4301 | } |
4302 | |
4303 | const TargetRegisterClass *RC = |
4304 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4305 | if (NeedTrunc) { |
4306 | Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); |
4307 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); |
4308 | } |
4309 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4310 | if (NeedTrunc) |
4311 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
4312 | return ResultReg; |
4313 | } |
4314 | |
4315 | unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4316 | uint64_t Shift, bool IsZExt) { |
4317 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4318 | "Unexpected source/return type pair." ); |
4319 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4320 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4321 | "Unexpected source value type." ); |
4322 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4323 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4324 | |
4325 | bool Is64Bit = (RetVT == MVT::i64); |
4326 | unsigned RegSize = Is64Bit ? 64 : 32; |
4327 | unsigned DstBits = RetVT.getSizeInBits(); |
4328 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4329 | const TargetRegisterClass *RC = |
4330 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4331 | |
4332 | // Just emit a copy for "zero" shifts. |
4333 | if (Shift == 0) { |
4334 | if (RetVT == SrcVT) { |
4335 | Register ResultReg = createResultReg(RC); |
4336 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4337 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4338 | .addReg(RegNo: Op0); |
4339 | return ResultReg; |
4340 | } else |
4341 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4342 | } |
4343 | |
4344 | // Don't deal with undefined shifts. |
4345 | if (Shift >= DstBits) |
4346 | return 0; |
4347 | |
4348 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4349 | // {S|U}BFM Wd, Wn, #r, #s |
4350 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4351 | |
4352 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4353 | // %2 = ashr i16 %1, 4 |
4354 | // Wd<7-4:0> = Wn<7:4> |
4355 | // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext |
4356 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4357 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4358 | |
4359 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4360 | // %2 = ashr i16 %1, 8 |
4361 | // Wd<7-7,0> = Wn<7:7> |
4362 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4363 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4364 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4365 | |
4366 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4367 | // %2 = ashr i16 %1, 12 |
4368 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4369 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4370 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4371 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4372 | |
4373 | if (Shift >= SrcBits && IsZExt) |
4374 | return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT); |
4375 | |
4376 | unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift); |
4377 | unsigned ImmS = SrcBits - 1; |
4378 | static const unsigned OpcTable[2][2] = { |
4379 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4380 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4381 | }; |
4382 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4383 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4384 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4385 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4386 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4387 | .addImm(0) |
4388 | .addReg(Op0) |
4389 | .addImm(AArch64::sub_32); |
4390 | Op0 = TmpReg; |
4391 | } |
4392 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4393 | } |
4394 | |
4395 | unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, |
4396 | bool IsZExt) { |
4397 | assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?" ); |
4398 | |
4399 | // FastISel does not have plumbing to deal with extensions where the SrcVT or |
4400 | // DestVT are odd things, so test to make sure that they are both types we can |
4401 | // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise |
4402 | // bail out to SelectionDAG. |
4403 | if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && |
4404 | (DestVT != MVT::i32) && (DestVT != MVT::i64)) || |
4405 | ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && |
4406 | (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) |
4407 | return 0; |
4408 | |
4409 | unsigned Opc; |
4410 | unsigned Imm = 0; |
4411 | |
4412 | switch (SrcVT.SimpleTy) { |
4413 | default: |
4414 | return 0; |
4415 | case MVT::i1: |
4416 | return emiti1Ext(SrcReg, DestVT, IsZExt); |
4417 | case MVT::i8: |
4418 | if (DestVT == MVT::i64) |
4419 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4420 | else |
4421 | Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
4422 | Imm = 7; |
4423 | break; |
4424 | case MVT::i16: |
4425 | if (DestVT == MVT::i64) |
4426 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4427 | else |
4428 | Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
4429 | Imm = 15; |
4430 | break; |
4431 | case MVT::i32: |
4432 | assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?" ); |
4433 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4434 | Imm = 31; |
4435 | break; |
4436 | } |
4437 | |
4438 | // Handle i8 and i16 as i32. |
4439 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4440 | DestVT = MVT::i32; |
4441 | else if (DestVT == MVT::i64) { |
4442 | Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
4443 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4444 | TII.get(AArch64::SUBREG_TO_REG), Src64) |
4445 | .addImm(0) |
4446 | .addReg(SrcReg) |
4447 | .addImm(AArch64::sub_32); |
4448 | SrcReg = Src64; |
4449 | } |
4450 | |
4451 | const TargetRegisterClass *RC = |
4452 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4453 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0: SrcReg, Imm1: 0, Imm2: Imm); |
4454 | } |
4455 | |
4456 | static bool isZExtLoad(const MachineInstr *LI) { |
4457 | switch (LI->getOpcode()) { |
4458 | default: |
4459 | return false; |
4460 | case AArch64::LDURBBi: |
4461 | case AArch64::LDURHHi: |
4462 | case AArch64::LDURWi: |
4463 | case AArch64::LDRBBui: |
4464 | case AArch64::LDRHHui: |
4465 | case AArch64::LDRWui: |
4466 | case AArch64::LDRBBroX: |
4467 | case AArch64::LDRHHroX: |
4468 | case AArch64::LDRWroX: |
4469 | case AArch64::LDRBBroW: |
4470 | case AArch64::LDRHHroW: |
4471 | case AArch64::LDRWroW: |
4472 | return true; |
4473 | } |
4474 | } |
4475 | |
4476 | static bool isSExtLoad(const MachineInstr *LI) { |
4477 | switch (LI->getOpcode()) { |
4478 | default: |
4479 | return false; |
4480 | case AArch64::LDURSBWi: |
4481 | case AArch64::LDURSHWi: |
4482 | case AArch64::LDURSBXi: |
4483 | case AArch64::LDURSHXi: |
4484 | case AArch64::LDURSWi: |
4485 | case AArch64::LDRSBWui: |
4486 | case AArch64::LDRSHWui: |
4487 | case AArch64::LDRSBXui: |
4488 | case AArch64::LDRSHXui: |
4489 | case AArch64::LDRSWui: |
4490 | case AArch64::LDRSBWroX: |
4491 | case AArch64::LDRSHWroX: |
4492 | case AArch64::LDRSBXroX: |
4493 | case AArch64::LDRSHXroX: |
4494 | case AArch64::LDRSWroX: |
4495 | case AArch64::LDRSBWroW: |
4496 | case AArch64::LDRSHWroW: |
4497 | case AArch64::LDRSBXroW: |
4498 | case AArch64::LDRSHXroW: |
4499 | case AArch64::LDRSWroW: |
4500 | return true; |
4501 | } |
4502 | } |
4503 | |
4504 | bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, |
4505 | MVT SrcVT) { |
4506 | const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0)); |
4507 | if (!LI || !LI->hasOneUse()) |
4508 | return false; |
4509 | |
4510 | // Check if the load instruction has already been selected. |
4511 | Register Reg = lookUpRegForValue(V: LI); |
4512 | if (!Reg) |
4513 | return false; |
4514 | |
4515 | MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
4516 | if (!MI) |
4517 | return false; |
4518 | |
4519 | // Check if the correct load instruction has been emitted - SelectionDAG might |
4520 | // have emitted a zero-extending load, but we need a sign-extending load. |
4521 | bool IsZExt = isa<ZExtInst>(Val: I); |
4522 | const auto *LoadMI = MI; |
4523 | if (LoadMI->getOpcode() == TargetOpcode::COPY && |
4524 | LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { |
4525 | Register LoadReg = MI->getOperand(i: 1).getReg(); |
4526 | LoadMI = MRI.getUniqueVRegDef(Reg: LoadReg); |
4527 | assert(LoadMI && "Expected valid instruction" ); |
4528 | } |
4529 | if (!(IsZExt && isZExtLoad(LI: LoadMI)) && !(!IsZExt && isSExtLoad(LI: LoadMI))) |
4530 | return false; |
4531 | |
4532 | // Nothing to be done. |
4533 | if (RetVT != MVT::i64 || SrcVT > MVT::i32) { |
4534 | updateValueMap(I, Reg); |
4535 | return true; |
4536 | } |
4537 | |
4538 | if (IsZExt) { |
4539 | Register Reg64 = createResultReg(&AArch64::GPR64RegClass); |
4540 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4541 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
4542 | .addImm(0) |
4543 | .addReg(Reg, getKillRegState(true)) |
4544 | .addImm(AArch64::sub_32); |
4545 | Reg = Reg64; |
4546 | } else { |
4547 | assert((MI->getOpcode() == TargetOpcode::COPY && |
4548 | MI->getOperand(1).getSubReg() == AArch64::sub_32) && |
4549 | "Expected copy instruction" ); |
4550 | Reg = MI->getOperand(i: 1).getReg(); |
4551 | MachineBasicBlock::iterator I(MI); |
4552 | removeDeadCode(I, E: std::next(x: I)); |
4553 | } |
4554 | updateValueMap(I, Reg); |
4555 | return true; |
4556 | } |
4557 | |
4558 | bool AArch64FastISel::selectIntExt(const Instruction *I) { |
4559 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
4560 | "Unexpected integer extend instruction." ); |
4561 | MVT RetVT; |
4562 | MVT SrcVT; |
4563 | if (!isTypeSupported(Ty: I->getType(), VT&: RetVT)) |
4564 | return false; |
4565 | |
4566 | if (!isTypeSupported(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT)) |
4567 | return false; |
4568 | |
4569 | // Try to optimize already sign-/zero-extended values from load instructions. |
4570 | if (optimizeIntExtLoad(I, RetVT, SrcVT)) |
4571 | return true; |
4572 | |
4573 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
4574 | if (!SrcReg) |
4575 | return false; |
4576 | |
4577 | // Try to optimize already sign-/zero-extended values from function arguments. |
4578 | bool IsZExt = isa<ZExtInst>(Val: I); |
4579 | if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) { |
4580 | if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { |
4581 | if (RetVT == MVT::i64 && SrcVT != MVT::i64) { |
4582 | Register ResultReg = createResultReg(&AArch64::GPR64RegClass); |
4583 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
4584 | TII.get(AArch64::SUBREG_TO_REG), ResultReg) |
4585 | .addImm(0) |
4586 | .addReg(SrcReg) |
4587 | .addImm(AArch64::sub_32); |
4588 | SrcReg = ResultReg; |
4589 | } |
4590 | |
4591 | updateValueMap(I, Reg: SrcReg); |
4592 | return true; |
4593 | } |
4594 | } |
4595 | |
4596 | unsigned ResultReg = emitIntExt(SrcVT, SrcReg, DestVT: RetVT, IsZExt); |
4597 | if (!ResultReg) |
4598 | return false; |
4599 | |
4600 | updateValueMap(I, Reg: ResultReg); |
4601 | return true; |
4602 | } |
4603 | |
4604 | bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { |
4605 | EVT DestEVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true); |
4606 | if (!DestEVT.isSimple()) |
4607 | return false; |
4608 | |
4609 | MVT DestVT = DestEVT.getSimpleVT(); |
4610 | if (DestVT != MVT::i64 && DestVT != MVT::i32) |
4611 | return false; |
4612 | |
4613 | unsigned DivOpc; |
4614 | bool Is64bit = (DestVT == MVT::i64); |
4615 | switch (ISDOpcode) { |
4616 | default: |
4617 | return false; |
4618 | case ISD::SREM: |
4619 | DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; |
4620 | break; |
4621 | case ISD::UREM: |
4622 | DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; |
4623 | break; |
4624 | } |
4625 | unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; |
4626 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4627 | if (!Src0Reg) |
4628 | return false; |
4629 | |
4630 | Register Src1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4631 | if (!Src1Reg) |
4632 | return false; |
4633 | |
4634 | const TargetRegisterClass *RC = |
4635 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4636 | Register QuotReg = fastEmitInst_rr(MachineInstOpcode: DivOpc, RC, Op0: Src0Reg, Op1: Src1Reg); |
4637 | assert(QuotReg && "Unexpected DIV instruction emission failure." ); |
4638 | // The remainder is computed as numerator - (quotient * denominator) using the |
4639 | // MSUB instruction. |
4640 | Register ResultReg = fastEmitInst_rrr(MachineInstOpcode: MSubOpc, RC, Op0: QuotReg, Op1: Src1Reg, Op2: Src0Reg); |
4641 | updateValueMap(I, Reg: ResultReg); |
4642 | return true; |
4643 | } |
4644 | |
4645 | bool AArch64FastISel::selectMul(const Instruction *I) { |
4646 | MVT VT; |
4647 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
4648 | return false; |
4649 | |
4650 | if (VT.isVector()) |
4651 | return selectBinaryOp(I, ISDOpcode: ISD::MUL); |
4652 | |
4653 | const Value *Src0 = I->getOperand(i: 0); |
4654 | const Value *Src1 = I->getOperand(i: 1); |
4655 | if (const auto *C = dyn_cast<ConstantInt>(Val: Src0)) |
4656 | if (C->getValue().isPowerOf2()) |
4657 | std::swap(a&: Src0, b&: Src1); |
4658 | |
4659 | // Try to simplify to a shift instruction. |
4660 | if (const auto *C = dyn_cast<ConstantInt>(Val: Src1)) |
4661 | if (C->getValue().isPowerOf2()) { |
4662 | uint64_t ShiftVal = C->getValue().logBase2(); |
4663 | MVT SrcVT = VT; |
4664 | bool IsZExt = true; |
4665 | if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Src0)) { |
4666 | if (!isIntExtFree(I: ZExt)) { |
4667 | MVT VT; |
4668 | if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT)) { |
4669 | SrcVT = VT; |
4670 | IsZExt = true; |
4671 | Src0 = ZExt->getOperand(i_nocapture: 0); |
4672 | } |
4673 | } |
4674 | } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Src0)) { |
4675 | if (!isIntExtFree(I: SExt)) { |
4676 | MVT VT; |
4677 | if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT)) { |
4678 | SrcVT = VT; |
4679 | IsZExt = false; |
4680 | Src0 = SExt->getOperand(i_nocapture: 0); |
4681 | } |
4682 | } |
4683 | } |
4684 | |
4685 | Register Src0Reg = getRegForValue(V: Src0); |
4686 | if (!Src0Reg) |
4687 | return false; |
4688 | |
4689 | unsigned ResultReg = |
4690 | emitLSL_ri(RetVT: VT, SrcVT, Op0: Src0Reg, Shift: ShiftVal, IsZExt); |
4691 | |
4692 | if (ResultReg) { |
4693 | updateValueMap(I, Reg: ResultReg); |
4694 | return true; |
4695 | } |
4696 | } |
4697 | |
4698 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4699 | if (!Src0Reg) |
4700 | return false; |
4701 | |
4702 | Register Src1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4703 | if (!Src1Reg) |
4704 | return false; |
4705 | |
4706 | unsigned ResultReg = emitMul_rr(RetVT: VT, Op0: Src0Reg, Op1: Src1Reg); |
4707 | |
4708 | if (!ResultReg) |
4709 | return false; |
4710 | |
4711 | updateValueMap(I, Reg: ResultReg); |
4712 | return true; |
4713 | } |
4714 | |
4715 | bool AArch64FastISel::selectShift(const Instruction *I) { |
4716 | MVT RetVT; |
4717 | if (!isTypeSupported(Ty: I->getType(), VT&: RetVT, /*IsVectorAllowed=*/true)) |
4718 | return false; |
4719 | |
4720 | if (RetVT.isVector()) |
4721 | return selectOperator(I, Opcode: I->getOpcode()); |
4722 | |
4723 | if (const auto *C = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) { |
4724 | unsigned ResultReg = 0; |
4725 | uint64_t ShiftVal = C->getZExtValue(); |
4726 | MVT SrcVT = RetVT; |
4727 | bool IsZExt = I->getOpcode() != Instruction::AShr; |
4728 | const Value *Op0 = I->getOperand(i: 0); |
4729 | if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Op0)) { |
4730 | if (!isIntExtFree(I: ZExt)) { |
4731 | MVT TmpVT; |
4732 | if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT&: TmpVT)) { |
4733 | SrcVT = TmpVT; |
4734 | IsZExt = true; |
4735 | Op0 = ZExt->getOperand(i_nocapture: 0); |
4736 | } |
4737 | } |
4738 | } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Op0)) { |
4739 | if (!isIntExtFree(I: SExt)) { |
4740 | MVT TmpVT; |
4741 | if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT&: TmpVT)) { |
4742 | SrcVT = TmpVT; |
4743 | IsZExt = false; |
4744 | Op0 = SExt->getOperand(i_nocapture: 0); |
4745 | } |
4746 | } |
4747 | } |
4748 | |
4749 | Register Op0Reg = getRegForValue(V: Op0); |
4750 | if (!Op0Reg) |
4751 | return false; |
4752 | |
4753 | switch (I->getOpcode()) { |
4754 | default: llvm_unreachable("Unexpected instruction." ); |
4755 | case Instruction::Shl: |
4756 | ResultReg = emitLSL_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4757 | break; |
4758 | case Instruction::AShr: |
4759 | ResultReg = emitASR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4760 | break; |
4761 | case Instruction::LShr: |
4762 | ResultReg = emitLSR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4763 | break; |
4764 | } |
4765 | if (!ResultReg) |
4766 | return false; |
4767 | |
4768 | updateValueMap(I, Reg: ResultReg); |
4769 | return true; |
4770 | } |
4771 | |
4772 | Register Op0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4773 | if (!Op0Reg) |
4774 | return false; |
4775 | |
4776 | Register Op1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4777 | if (!Op1Reg) |
4778 | return false; |
4779 | |
4780 | unsigned ResultReg = 0; |
4781 | switch (I->getOpcode()) { |
4782 | default: llvm_unreachable("Unexpected instruction." ); |
4783 | case Instruction::Shl: |
4784 | ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); |
4785 | break; |
4786 | case Instruction::AShr: |
4787 | ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); |
4788 | break; |
4789 | case Instruction::LShr: |
4790 | ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); |
4791 | break; |
4792 | } |
4793 | |
4794 | if (!ResultReg) |
4795 | return false; |
4796 | |
4797 | updateValueMap(I, Reg: ResultReg); |
4798 | return true; |
4799 | } |
4800 | |
4801 | bool AArch64FastISel::selectBitCast(const Instruction *I) { |
4802 | MVT RetVT, SrcVT; |
4803 | |
4804 | if (!isTypeLegal(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT)) |
4805 | return false; |
4806 | if (!isTypeLegal(Ty: I->getType(), VT&: RetVT)) |
4807 | return false; |
4808 | |
4809 | unsigned Opc; |
4810 | if (RetVT == MVT::f32 && SrcVT == MVT::i32) |
4811 | Opc = AArch64::FMOVWSr; |
4812 | else if (RetVT == MVT::f64 && SrcVT == MVT::i64) |
4813 | Opc = AArch64::FMOVXDr; |
4814 | else if (RetVT == MVT::i32 && SrcVT == MVT::f32) |
4815 | Opc = AArch64::FMOVSWr; |
4816 | else if (RetVT == MVT::i64 && SrcVT == MVT::f64) |
4817 | Opc = AArch64::FMOVDXr; |
4818 | else |
4819 | return false; |
4820 | |
4821 | const TargetRegisterClass *RC = nullptr; |
4822 | switch (RetVT.SimpleTy) { |
4823 | default: llvm_unreachable("Unexpected value type." ); |
4824 | case MVT::i32: RC = &AArch64::GPR32RegClass; break; |
4825 | case MVT::i64: RC = &AArch64::GPR64RegClass; break; |
4826 | case MVT::f32: RC = &AArch64::FPR32RegClass; break; |
4827 | case MVT::f64: RC = &AArch64::FPR64RegClass; break; |
4828 | } |
4829 | Register Op0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4830 | if (!Op0Reg) |
4831 | return false; |
4832 | |
4833 | Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC, Op0: Op0Reg); |
4834 | if (!ResultReg) |
4835 | return false; |
4836 | |
4837 | updateValueMap(I, Reg: ResultReg); |
4838 | return true; |
4839 | } |
4840 | |
4841 | bool AArch64FastISel::selectFRem(const Instruction *I) { |
4842 | MVT RetVT; |
4843 | if (!isTypeLegal(Ty: I->getType(), VT&: RetVT)) |
4844 | return false; |
4845 | |
4846 | RTLIB::Libcall LC; |
4847 | switch (RetVT.SimpleTy) { |
4848 | default: |
4849 | return false; |
4850 | case MVT::f32: |
4851 | LC = RTLIB::REM_F32; |
4852 | break; |
4853 | case MVT::f64: |
4854 | LC = RTLIB::REM_F64; |
4855 | break; |
4856 | } |
4857 | |
4858 | ArgListTy Args; |
4859 | Args.reserve(n: I->getNumOperands()); |
4860 | |
4861 | // Populate the argument list. |
4862 | for (auto &Arg : I->operands()) { |
4863 | ArgListEntry Entry; |
4864 | Entry.Val = Arg; |
4865 | Entry.Ty = Arg->getType(); |
4866 | Args.push_back(x: Entry); |
4867 | } |
4868 | |
4869 | CallLoweringInfo CLI; |
4870 | MCContext &Ctx = MF->getContext(); |
4871 | CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: I->getType(), |
4872 | Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args)); |
4873 | if (!lowerCallTo(CLI)) |
4874 | return false; |
4875 | updateValueMap(I, Reg: CLI.ResultReg); |
4876 | return true; |
4877 | } |
4878 | |
4879 | bool AArch64FastISel::selectSDiv(const Instruction *I) { |
4880 | MVT VT; |
4881 | if (!isTypeLegal(Ty: I->getType(), VT)) |
4882 | return false; |
4883 | |
4884 | if (!isa<ConstantInt>(Val: I->getOperand(i: 1))) |
4885 | return selectBinaryOp(I, ISDOpcode: ISD::SDIV); |
4886 | |
4887 | const APInt &C = cast<ConstantInt>(Val: I->getOperand(i: 1))->getValue(); |
4888 | if ((VT != MVT::i32 && VT != MVT::i64) || !C || |
4889 | !(C.isPowerOf2() || C.isNegatedPowerOf2())) |
4890 | return selectBinaryOp(I, ISDOpcode: ISD::SDIV); |
4891 | |
4892 | unsigned Lg2 = C.countr_zero(); |
4893 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4894 | if (!Src0Reg) |
4895 | return false; |
4896 | |
4897 | if (cast<BinaryOperator>(Val: I)->isExact()) { |
4898 | unsigned ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: Src0Reg, Shift: Lg2); |
4899 | if (!ResultReg) |
4900 | return false; |
4901 | updateValueMap(I, Reg: ResultReg); |
4902 | return true; |
4903 | } |
4904 | |
4905 | int64_t Pow2MinusOne = (1ULL << Lg2) - 1; |
4906 | unsigned AddReg = emitAdd_ri_(VT, Op0: Src0Reg, Imm: Pow2MinusOne); |
4907 | if (!AddReg) |
4908 | return false; |
4909 | |
4910 | // (Src0 < 0) ? Pow2 - 1 : 0; |
4911 | if (!emitICmp_ri(RetVT: VT, LHSReg: Src0Reg, Imm: 0)) |
4912 | return false; |
4913 | |
4914 | unsigned SelectOpc; |
4915 | const TargetRegisterClass *RC; |
4916 | if (VT == MVT::i64) { |
4917 | SelectOpc = AArch64::CSELXr; |
4918 | RC = &AArch64::GPR64RegClass; |
4919 | } else { |
4920 | SelectOpc = AArch64::CSELWr; |
4921 | RC = &AArch64::GPR32RegClass; |
4922 | } |
4923 | Register SelectReg = fastEmitInst_rri(MachineInstOpcode: SelectOpc, RC, Op0: AddReg, Op1: Src0Reg, |
4924 | Imm: AArch64CC::LT); |
4925 | if (!SelectReg) |
4926 | return false; |
4927 | |
4928 | // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also |
4929 | // negate the result. |
4930 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
4931 | unsigned ResultReg; |
4932 | if (C.isNegative()) |
4933 | ResultReg = emitAddSub_rs(/*UseAdd=*/false, RetVT: VT, LHSReg: ZeroReg, RHSReg: SelectReg, |
4934 | ShiftType: AArch64_AM::ASR, ShiftImm: Lg2); |
4935 | else |
4936 | ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: SelectReg, Shift: Lg2); |
4937 | |
4938 | if (!ResultReg) |
4939 | return false; |
4940 | |
4941 | updateValueMap(I, Reg: ResultReg); |
4942 | return true; |
4943 | } |
4944 | |
4945 | /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We |
4946 | /// have to duplicate it for AArch64, because otherwise we would fail during the |
4947 | /// sign-extend emission. |
4948 | unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { |
4949 | Register IdxN = getRegForValue(V: Idx); |
4950 | if (IdxN == 0) |
4951 | // Unhandled operand. Halt "fast" selection and bail. |
4952 | return 0; |
4953 | |
4954 | // If the index is smaller or larger than intptr_t, truncate or extend it. |
4955 | MVT PtrVT = TLI.getPointerTy(DL); |
4956 | EVT IdxVT = EVT::getEVT(Ty: Idx->getType(), /*HandleUnknown=*/false); |
4957 | if (IdxVT.bitsLT(VT: PtrVT)) { |
4958 | IdxN = emitIntExt(SrcVT: IdxVT.getSimpleVT(), SrcReg: IdxN, DestVT: PtrVT, /*isZExt=*/IsZExt: false); |
4959 | } else if (IdxVT.bitsGT(VT: PtrVT)) |
4960 | llvm_unreachable("AArch64 FastISel doesn't support types larger than i64" ); |
4961 | return IdxN; |
4962 | } |
4963 | |
4964 | /// This is mostly a copy of the existing FastISel GEP code, but we have to |
4965 | /// duplicate it for AArch64, because otherwise we would bail out even for |
4966 | /// simple cases. This is because the standard fastEmit functions don't cover |
4967 | /// MUL at all and ADD is lowered very inefficientily. |
4968 | bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { |
4969 | if (Subtarget->isTargetILP32()) |
4970 | return false; |
4971 | |
4972 | Register N = getRegForValue(V: I->getOperand(i: 0)); |
4973 | if (!N) |
4974 | return false; |
4975 | |
4976 | // Keep a running tab of the total offset to coalesce multiple N = N + Offset |
4977 | // into a single N = N + TotalOffset. |
4978 | uint64_t TotalOffs = 0; |
4979 | MVT VT = TLI.getPointerTy(DL); |
4980 | for (gep_type_iterator GTI = gep_type_begin(GEP: I), E = gep_type_end(GEP: I); |
4981 | GTI != E; ++GTI) { |
4982 | const Value *Idx = GTI.getOperand(); |
4983 | if (auto *StTy = GTI.getStructTypeOrNull()) { |
4984 | unsigned Field = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
4985 | // N = N + Offset |
4986 | if (Field) |
4987 | TotalOffs += DL.getStructLayout(Ty: StTy)->getElementOffset(Idx: Field); |
4988 | } else { |
4989 | // If this is a constant subscript, handle it quickly. |
4990 | if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) { |
4991 | if (CI->isZero()) |
4992 | continue; |
4993 | // N = N + Offset |
4994 | TotalOffs += GTI.getSequentialElementStride(DL) * |
4995 | cast<ConstantInt>(Val: CI)->getSExtValue(); |
4996 | continue; |
4997 | } |
4998 | if (TotalOffs) { |
4999 | N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs); |
5000 | if (!N) |
5001 | return false; |
5002 | TotalOffs = 0; |
5003 | } |
5004 | |
5005 | // N = N + Idx * ElementSize; |
5006 | uint64_t ElementSize = GTI.getSequentialElementStride(DL); |
5007 | unsigned IdxN = getRegForGEPIndex(Idx); |
5008 | if (!IdxN) |
5009 | return false; |
5010 | |
5011 | if (ElementSize != 1) { |
5012 | unsigned C = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, Imm: ElementSize); |
5013 | if (!C) |
5014 | return false; |
5015 | IdxN = emitMul_rr(RetVT: VT, Op0: IdxN, Op1: C); |
5016 | if (!IdxN) |
5017 | return false; |
5018 | } |
5019 | N = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::ADD, Op0: N, Op1: IdxN); |
5020 | if (!N) |
5021 | return false; |
5022 | } |
5023 | } |
5024 | if (TotalOffs) { |
5025 | N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs); |
5026 | if (!N) |
5027 | return false; |
5028 | } |
5029 | updateValueMap(I, Reg: N); |
5030 | return true; |
5031 | } |
5032 | |
5033 | bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { |
5034 | assert(TM.getOptLevel() == CodeGenOptLevel::None && |
5035 | "cmpxchg survived AtomicExpand at optlevel > -O0" ); |
5036 | |
5037 | auto *RetPairTy = cast<StructType>(Val: I->getType()); |
5038 | Type *RetTy = RetPairTy->getTypeAtIndex(N: 0U); |
5039 | assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && |
5040 | "cmpxchg has a non-i1 status result" ); |
5041 | |
5042 | MVT VT; |
5043 | if (!isTypeLegal(Ty: RetTy, VT)) |
5044 | return false; |
5045 | |
5046 | const TargetRegisterClass *ResRC; |
5047 | unsigned Opc, CmpOpc; |
5048 | // This only supports i32/i64, because i8/i16 aren't legal, and the generic |
5049 | // extractvalue selection doesn't support that. |
5050 | if (VT == MVT::i32) { |
5051 | Opc = AArch64::CMP_SWAP_32; |
5052 | CmpOpc = AArch64::SUBSWrs; |
5053 | ResRC = &AArch64::GPR32RegClass; |
5054 | } else if (VT == MVT::i64) { |
5055 | Opc = AArch64::CMP_SWAP_64; |
5056 | CmpOpc = AArch64::SUBSXrs; |
5057 | ResRC = &AArch64::GPR64RegClass; |
5058 | } else { |
5059 | return false; |
5060 | } |
5061 | |
5062 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
5063 | |
5064 | const Register AddrReg = constrainOperandRegClass( |
5065 | II, Op: getRegForValue(V: I->getPointerOperand()), OpNum: II.getNumDefs()); |
5066 | const Register DesiredReg = constrainOperandRegClass( |
5067 | II, Op: getRegForValue(V: I->getCompareOperand()), OpNum: II.getNumDefs() + 1); |
5068 | const Register NewReg = constrainOperandRegClass( |
5069 | II, Op: getRegForValue(V: I->getNewValOperand()), OpNum: II.getNumDefs() + 2); |
5070 | |
5071 | const Register ResultReg1 = createResultReg(RC: ResRC); |
5072 | const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); |
5073 | const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); |
5074 | |
5075 | // FIXME: MachineMemOperand doesn't support cmpxchg yet. |
5076 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
5077 | .addDef(RegNo: ResultReg1) |
5078 | .addDef(RegNo: ScratchReg) |
5079 | .addUse(RegNo: AddrReg) |
5080 | .addUse(RegNo: DesiredReg) |
5081 | .addUse(RegNo: NewReg); |
5082 | |
5083 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) |
5084 | .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) |
5085 | .addUse(ResultReg1) |
5086 | .addUse(DesiredReg) |
5087 | .addImm(0); |
5088 | |
5089 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) |
5090 | .addDef(ResultReg2) |
5091 | .addUse(AArch64::WZR) |
5092 | .addUse(AArch64::WZR) |
5093 | .addImm(AArch64CC::NE); |
5094 | |
5095 | assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers." ); |
5096 | updateValueMap(I, Reg: ResultReg1, NumRegs: 2); |
5097 | return true; |
5098 | } |
5099 | |
5100 | bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { |
5101 | if (TLI.fallBackToDAGISel(Inst: *I)) |
5102 | return false; |
5103 | switch (I->getOpcode()) { |
5104 | default: |
5105 | break; |
5106 | case Instruction::Add: |
5107 | case Instruction::Sub: |
5108 | return selectAddSub(I); |
5109 | case Instruction::Mul: |
5110 | return selectMul(I); |
5111 | case Instruction::SDiv: |
5112 | return selectSDiv(I); |
5113 | case Instruction::SRem: |
5114 | if (!selectBinaryOp(I, ISDOpcode: ISD::SREM)) |
5115 | return selectRem(I, ISDOpcode: ISD::SREM); |
5116 | return true; |
5117 | case Instruction::URem: |
5118 | if (!selectBinaryOp(I, ISDOpcode: ISD::UREM)) |
5119 | return selectRem(I, ISDOpcode: ISD::UREM); |
5120 | return true; |
5121 | case Instruction::Shl: |
5122 | case Instruction::LShr: |
5123 | case Instruction::AShr: |
5124 | return selectShift(I); |
5125 | case Instruction::And: |
5126 | case Instruction::Or: |
5127 | case Instruction::Xor: |
5128 | return selectLogicalOp(I); |
5129 | case Instruction::Br: |
5130 | return selectBranch(I); |
5131 | case Instruction::IndirectBr: |
5132 | return selectIndirectBr(I); |
5133 | case Instruction::BitCast: |
5134 | if (!FastISel::selectBitCast(I)) |
5135 | return selectBitCast(I); |
5136 | return true; |
5137 | case Instruction::FPToSI: |
5138 | if (!selectCast(I, Opcode: ISD::FP_TO_SINT)) |
5139 | return selectFPToInt(I, /*Signed=*/true); |
5140 | return true; |
5141 | case Instruction::FPToUI: |
5142 | return selectFPToInt(I, /*Signed=*/false); |
5143 | case Instruction::ZExt: |
5144 | case Instruction::SExt: |
5145 | return selectIntExt(I); |
5146 | case Instruction::Trunc: |
5147 | if (!selectCast(I, Opcode: ISD::TRUNCATE)) |
5148 | return selectTrunc(I); |
5149 | return true; |
5150 | case Instruction::FPExt: |
5151 | return selectFPExt(I); |
5152 | case Instruction::FPTrunc: |
5153 | return selectFPTrunc(I); |
5154 | case Instruction::SIToFP: |
5155 | if (!selectCast(I, Opcode: ISD::SINT_TO_FP)) |
5156 | return selectIntToFP(I, /*Signed=*/true); |
5157 | return true; |
5158 | case Instruction::UIToFP: |
5159 | return selectIntToFP(I, /*Signed=*/false); |
5160 | case Instruction::Load: |
5161 | return selectLoad(I); |
5162 | case Instruction::Store: |
5163 | return selectStore(I); |
5164 | case Instruction::FCmp: |
5165 | case Instruction::ICmp: |
5166 | return selectCmp(I); |
5167 | case Instruction::Select: |
5168 | return selectSelect(I); |
5169 | case Instruction::Ret: |
5170 | return selectRet(I); |
5171 | case Instruction::FRem: |
5172 | return selectFRem(I); |
5173 | case Instruction::GetElementPtr: |
5174 | return selectGetElementPtr(I); |
5175 | case Instruction::AtomicCmpXchg: |
5176 | return selectAtomicCmpXchg(I: cast<AtomicCmpXchgInst>(Val: I)); |
5177 | } |
5178 | |
5179 | // fall-back to target-independent instruction selection. |
5180 | return selectOperator(I, Opcode: I->getOpcode()); |
5181 | } |
5182 | |
5183 | FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, |
5184 | const TargetLibraryInfo *LibInfo) { |
5185 | |
5186 | SMEAttrs CallerAttrs(*FuncInfo.Fn); |
5187 | if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() || |
5188 | CallerAttrs.hasStreamingInterfaceOrBody() || |
5189 | CallerAttrs.hasStreamingCompatibleInterface()) |
5190 | return nullptr; |
5191 | return new AArch64FastISel(FuncInfo, LibInfo); |
5192 | } |
5193 | |