1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the InstructionSelector class for |
10 | /// AArch64. |
11 | /// \todo This should be generated by TableGen. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64GlobalISelUtils.h" |
15 | #include "AArch64InstrInfo.h" |
16 | #include "AArch64MachineFunctionInfo.h" |
17 | #include "AArch64RegisterBankInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "AArch64TargetMachine.h" |
21 | #include "MCTargetDesc/AArch64AddressingModes.h" |
22 | #include "MCTargetDesc/AArch64MCTargetDesc.h" |
23 | #include "llvm/BinaryFormat/Dwarf.h" |
24 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
25 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
26 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
27 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
28 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
29 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | #include "llvm/CodeGen/MachineConstantPool.h" |
32 | #include "llvm/CodeGen/MachineFrameInfo.h" |
33 | #include "llvm/CodeGen/MachineFunction.h" |
34 | #include "llvm/CodeGen/MachineInstr.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineOperand.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/TargetOpcodes.h" |
40 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | #include "llvm/IR/Constants.h" |
42 | #include "llvm/IR/DerivedTypes.h" |
43 | #include "llvm/IR/Instructions.h" |
44 | #include "llvm/IR/IntrinsicsAArch64.h" |
45 | #include "llvm/IR/PatternMatch.h" |
46 | #include "llvm/IR/Type.h" |
47 | #include "llvm/Pass.h" |
48 | #include "llvm/Support/Debug.h" |
49 | #include "llvm/Support/raw_ostream.h" |
50 | #include <optional> |
51 | |
52 | #define DEBUG_TYPE "aarch64-isel" |
53 | |
54 | using namespace llvm; |
55 | using namespace MIPatternMatch; |
56 | using namespace AArch64GISelUtils; |
57 | |
58 | namespace llvm { |
59 | class BlockFrequencyInfo; |
60 | class ProfileSummaryInfo; |
61 | } |
62 | |
63 | namespace { |
64 | |
65 | #define GET_GLOBALISEL_PREDICATE_BITSET |
66 | #include "AArch64GenGlobalISel.inc" |
67 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
68 | |
69 | |
70 | class AArch64InstructionSelector : public InstructionSelector { |
71 | public: |
72 | AArch64InstructionSelector(const AArch64TargetMachine &TM, |
73 | const AArch64Subtarget &STI, |
74 | const AArch64RegisterBankInfo &RBI); |
75 | |
76 | bool select(MachineInstr &I) override; |
77 | static const char *getName() { return DEBUG_TYPE; } |
78 | |
79 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, |
80 | CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, |
81 | BlockFrequencyInfo *BFI) override { |
82 | InstructionSelector::setupMF(mf&: MF, kb: KB, covinfo: CoverageInfo, psi: PSI, bfi: BFI); |
83 | MIB.setMF(MF); |
84 | |
85 | // hasFnAttribute() is expensive to call on every BRCOND selection, so |
86 | // cache it here for each run of the selector. |
87 | ProduceNonFlagSettingCondBr = |
88 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); |
89 | MFReturnAddr = Register(); |
90 | |
91 | processPHIs(MF); |
92 | } |
93 | |
94 | private: |
95 | /// tblgen-erated 'select' implementation, used as the initial selector for |
96 | /// the patterns that don't require complex C++. |
97 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
98 | |
99 | // A lowering phase that runs before any selection attempts. |
100 | // Returns true if the instruction was modified. |
101 | bool preISelLower(MachineInstr &I); |
102 | |
103 | // An early selection function that runs before the selectImpl() call. |
104 | bool earlySelect(MachineInstr &I); |
105 | |
106 | /// Save state that is shared between select calls, call select on \p I and |
107 | /// then restore the saved state. This can be used to recursively call select |
108 | /// within a select call. |
109 | bool selectAndRestoreState(MachineInstr &I); |
110 | |
111 | // Do some preprocessing of G_PHIs before we begin selection. |
112 | void processPHIs(MachineFunction &MF); |
113 | |
114 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
115 | |
116 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). |
117 | bool contractCrossBankCopyIntoStore(MachineInstr &I, |
118 | MachineRegisterInfo &MRI); |
119 | |
120 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); |
121 | |
122 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, |
123 | MachineRegisterInfo &MRI) const; |
124 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, |
125 | MachineRegisterInfo &MRI) const; |
126 | |
127 | ///@{ |
128 | /// Helper functions for selectCompareBranch. |
129 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, |
130 | MachineIRBuilder &MIB) const; |
131 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
132 | MachineIRBuilder &MIB) const; |
133 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
134 | MachineIRBuilder &MIB) const; |
135 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, |
136 | MachineBasicBlock *DstMBB, |
137 | MachineIRBuilder &MIB) const; |
138 | ///@} |
139 | |
140 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, |
141 | MachineRegisterInfo &MRI); |
142 | |
143 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); |
144 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
145 | |
146 | // Helper to generate an equivalent of scalar_to_vector into a new register, |
147 | // returned via 'Dst'. |
148 | MachineInstr *emitScalarToVector(unsigned EltSize, |
149 | const TargetRegisterClass *DstRC, |
150 | Register Scalar, |
151 | MachineIRBuilder &MIRBuilder) const; |
152 | /// Helper to narrow vector that was widened by emitScalarToVector. |
153 | /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit |
154 | /// vector, correspondingly. |
155 | MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg, |
156 | MachineIRBuilder &MIRBuilder, |
157 | MachineRegisterInfo &MRI) const; |
158 | |
159 | /// Emit a lane insert into \p DstReg, or a new vector register if |
160 | /// std::nullopt is provided. |
161 | /// |
162 | /// The lane inserted into is defined by \p LaneIdx. The vector source |
163 | /// register is given by \p SrcReg. The register containing the element is |
164 | /// given by \p EltReg. |
165 | MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg, |
166 | Register EltReg, unsigned LaneIdx, |
167 | const RegisterBank &RB, |
168 | MachineIRBuilder &MIRBuilder) const; |
169 | |
170 | /// Emit a sequence of instructions representing a constant \p CV for a |
171 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) |
172 | /// |
173 | /// \returns the last instruction in the sequence on success, and nullptr |
174 | /// otherwise. |
175 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, |
176 | MachineIRBuilder &MIRBuilder, |
177 | MachineRegisterInfo &MRI); |
178 | |
179 | MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits, |
180 | MachineIRBuilder &MIRBuilder); |
181 | |
182 | MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits, |
183 | MachineIRBuilder &MIRBuilder, bool Inv); |
184 | |
185 | MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits, |
186 | MachineIRBuilder &MIRBuilder, bool Inv); |
187 | MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits, |
188 | MachineIRBuilder &MIRBuilder); |
189 | MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits, |
190 | MachineIRBuilder &MIRBuilder, bool Inv); |
191 | MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits, |
192 | MachineIRBuilder &MIRBuilder); |
193 | |
194 | bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); |
195 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, |
196 | MachineRegisterInfo &MRI); |
197 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a |
198 | /// SUBREG_TO_REG. |
199 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); |
200 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); |
201 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
202 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
203 | |
204 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); |
205 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); |
206 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); |
207 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); |
208 | |
209 | /// Helper function to select vector load intrinsics like |
210 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. |
211 | /// \p Opc is the opcode that the selected instruction should use. |
212 | /// \p NumVecs is the number of vector destinations for the instruction. |
213 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. |
214 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, |
215 | MachineInstr &I); |
216 | bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs, |
217 | MachineInstr &I); |
218 | void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs, |
219 | unsigned Opc); |
220 | bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs, |
221 | unsigned Opc); |
222 | bool selectIntrinsicWithSideEffects(MachineInstr &I, |
223 | MachineRegisterInfo &MRI); |
224 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); |
225 | bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); |
226 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); |
227 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); |
228 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); |
229 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); |
230 | bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); |
231 | bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); |
232 | |
233 | bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
234 | bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
235 | bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI); |
236 | |
237 | unsigned emitConstantPoolEntry(const Constant *CPVal, |
238 | MachineFunction &MF) const; |
239 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, |
240 | MachineIRBuilder &MIRBuilder) const; |
241 | |
242 | // Emit a vector concat operation. |
243 | MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1, |
244 | Register Op2, |
245 | MachineIRBuilder &MIRBuilder) const; |
246 | |
247 | // Emit an integer compare between LHS and RHS, which checks for Predicate. |
248 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
249 | MachineOperand &Predicate, |
250 | MachineIRBuilder &MIRBuilder) const; |
251 | |
252 | /// Emit a floating point comparison between \p LHS and \p RHS. |
253 | /// \p Pred if given is the intended predicate to use. |
254 | MachineInstr * |
255 | emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
256 | std::optional<CmpInst::Predicate> = std::nullopt) const; |
257 | |
258 | MachineInstr * |
259 | emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
260 | std::initializer_list<llvm::SrcOp> SrcOps, |
261 | MachineIRBuilder &MIRBuilder, |
262 | const ComplexRendererFns &RenderFns = std::nullopt) const; |
263 | /// Helper function to emit an add or sub instruction. |
264 | /// |
265 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above |
266 | /// in a specific order. |
267 | /// |
268 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. |
269 | /// |
270 | /// \code |
271 | /// const std::array<std::array<unsigned, 2>, 4> Table { |
272 | /// {{AArch64::ADDXri, AArch64::ADDWri}, |
273 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, |
274 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, |
275 | /// {AArch64::SUBXri, AArch64::SUBWri}, |
276 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
277 | /// \endcode |
278 | /// |
279 | /// Each row in the table corresponds to a different addressing mode. Each |
280 | /// column corresponds to a different register size. |
281 | /// |
282 | /// \attention Rows must be structured as follows: |
283 | /// - Row 0: The ri opcode variants |
284 | /// - Row 1: The rs opcode variants |
285 | /// - Row 2: The rr opcode variants |
286 | /// - Row 3: The ri opcode variants for negative immediates |
287 | /// - Row 4: The rx opcode variants |
288 | /// |
289 | /// \attention Columns must be structured as follows: |
290 | /// - Column 0: The 64-bit opcode variants |
291 | /// - Column 1: The 32-bit opcode variants |
292 | /// |
293 | /// \p Dst is the destination register of the binop to emit. |
294 | /// \p LHS is the left-hand operand of the binop to emit. |
295 | /// \p RHS is the right-hand operand of the binop to emit. |
296 | MachineInstr *emitAddSub( |
297 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
298 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
299 | MachineIRBuilder &MIRBuilder) const; |
300 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, |
301 | MachineOperand &RHS, |
302 | MachineIRBuilder &MIRBuilder) const; |
303 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
304 | MachineIRBuilder &MIRBuilder) const; |
305 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
306 | MachineIRBuilder &MIRBuilder) const; |
307 | MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
308 | MachineIRBuilder &MIRBuilder) const; |
309 | MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
310 | MachineIRBuilder &MIRBuilder) const; |
311 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
312 | MachineIRBuilder &MIRBuilder) const; |
313 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, |
314 | MachineIRBuilder &MIRBuilder) const; |
315 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, |
316 | AArch64CC::CondCode CC, |
317 | MachineIRBuilder &MIRBuilder) const; |
318 | MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg, |
319 | const RegisterBank &DstRB, LLT ScalarTy, |
320 | Register VecReg, unsigned LaneIdx, |
321 | MachineIRBuilder &MIRBuilder) const; |
322 | MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, |
323 | AArch64CC::CondCode Pred, |
324 | MachineIRBuilder &MIRBuilder) const; |
325 | /// Emit a CSet for a FP compare. |
326 | /// |
327 | /// \p Dst is expected to be a 32-bit scalar register. |
328 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, |
329 | MachineIRBuilder &MIRBuilder) const; |
330 | |
331 | /// Emit an instruction that sets NZCV to the carry-in expected by \p I. |
332 | /// Might elide the instruction if the previous instruction already sets NZCV |
333 | /// correctly. |
334 | MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg); |
335 | |
336 | /// Emit the overflow op for \p Opcode. |
337 | /// |
338 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, |
339 | /// G_USUBO, etc. |
340 | std::pair<MachineInstr *, AArch64CC::CondCode> |
341 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, |
342 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; |
343 | |
344 | bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI); |
345 | |
346 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). |
347 | /// In some cases this is even possible with OR operations in the expression. |
348 | MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, |
349 | MachineIRBuilder &MIB) const; |
350 | MachineInstr *emitConditionalComparison(Register LHS, Register RHS, |
351 | CmpInst::Predicate CC, |
352 | AArch64CC::CondCode Predicate, |
353 | AArch64CC::CondCode OutCC, |
354 | MachineIRBuilder &MIB) const; |
355 | MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, |
356 | bool Negate, Register CCOp, |
357 | AArch64CC::CondCode Predicate, |
358 | MachineIRBuilder &MIB) const; |
359 | |
360 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. |
361 | /// \p IsNegative is true if the test should be "not zero". |
362 | /// This will also optimize the test bit instruction when possible. |
363 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, |
364 | MachineBasicBlock *DstMBB, |
365 | MachineIRBuilder &MIB) const; |
366 | |
367 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. |
368 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, |
369 | MachineBasicBlock *DestMBB, |
370 | MachineIRBuilder &MIB) const; |
371 | |
372 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. |
373 | // We use these manually instead of using the importer since it doesn't |
374 | // support SDNodeXForm. |
375 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; |
376 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; |
377 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; |
378 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; |
379 | |
380 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; |
381 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; |
382 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; |
383 | |
384 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, |
385 | unsigned Size) const; |
386 | |
387 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { |
388 | return selectAddrModeUnscaled(Root, Size: 1); |
389 | } |
390 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { |
391 | return selectAddrModeUnscaled(Root, Size: 2); |
392 | } |
393 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { |
394 | return selectAddrModeUnscaled(Root, Size: 4); |
395 | } |
396 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { |
397 | return selectAddrModeUnscaled(Root, Size: 8); |
398 | } |
399 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { |
400 | return selectAddrModeUnscaled(Root, Size: 16); |
401 | } |
402 | |
403 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used |
404 | /// from complex pattern matchers like selectAddrModeIndexed(). |
405 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, |
406 | MachineRegisterInfo &MRI) const; |
407 | |
408 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, |
409 | unsigned Size) const; |
410 | template <int Width> |
411 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { |
412 | return selectAddrModeIndexed(Root, Size: Width / 8); |
413 | } |
414 | |
415 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, |
416 | const MachineRegisterInfo &MRI) const; |
417 | ComplexRendererFns |
418 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, |
419 | unsigned SizeInBytes) const; |
420 | |
421 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether |
422 | /// or not a shift + extend should be folded into an addressing mode. Returns |
423 | /// None when this is not profitable or possible. |
424 | ComplexRendererFns |
425 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, |
426 | MachineOperand &Offset, unsigned SizeInBytes, |
427 | bool WantsExt) const; |
428 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; |
429 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, |
430 | unsigned SizeInBytes) const; |
431 | template <int Width> |
432 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { |
433 | return selectAddrModeXRO(Root, SizeInBytes: Width / 8); |
434 | } |
435 | |
436 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, |
437 | unsigned SizeInBytes) const; |
438 | template <int Width> |
439 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { |
440 | return selectAddrModeWRO(Root, SizeInBytes: Width / 8); |
441 | } |
442 | |
443 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root, |
444 | bool AllowROR = false) const; |
445 | |
446 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { |
447 | return selectShiftedRegister(Root); |
448 | } |
449 | |
450 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { |
451 | return selectShiftedRegister(Root, AllowROR: true); |
452 | } |
453 | |
454 | /// Given an extend instruction, determine the correct shift-extend type for |
455 | /// that instruction. |
456 | /// |
457 | /// If the instruction is going to be used in a load or store, pass |
458 | /// \p IsLoadStore = true. |
459 | AArch64_AM::ShiftExtendType |
460 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, |
461 | bool IsLoadStore = false) const; |
462 | |
463 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. |
464 | /// |
465 | /// \returns Either \p Reg if no change was necessary, or the new register |
466 | /// created by moving \p Reg. |
467 | /// |
468 | /// Note: This uses emitCopy right now. |
469 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, |
470 | MachineIRBuilder &MIB) const; |
471 | |
472 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; |
473 | |
474 | ComplexRendererFns selectExtractHigh(MachineOperand &Root) const; |
475 | |
476 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
477 | int OpIdx = -1) const; |
478 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, |
479 | int OpIdx = -1) const; |
480 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, |
481 | int OpIdx = -1) const; |
482 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, |
483 | int OpIdx = -1) const; |
484 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, |
485 | int OpIdx = -1) const; |
486 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, |
487 | int OpIdx = -1) const; |
488 | void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, |
489 | const MachineInstr &MI, |
490 | int OpIdx = -1) const; |
491 | |
492 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. |
493 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); |
494 | |
495 | // Optimization methods. |
496 | bool tryOptSelect(GSelect &Sel); |
497 | bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); |
498 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
499 | MachineOperand &Predicate, |
500 | MachineIRBuilder &MIRBuilder) const; |
501 | |
502 | /// Return true if \p MI is a load or store of \p NumBytes bytes. |
503 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; |
504 | |
505 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit |
506 | /// register zeroed out. In other words, the result of MI has been explicitly |
507 | /// zero extended. |
508 | bool isDef32(const MachineInstr &MI) const; |
509 | |
510 | const AArch64TargetMachine &TM; |
511 | const AArch64Subtarget &STI; |
512 | const AArch64InstrInfo &TII; |
513 | const AArch64RegisterInfo &TRI; |
514 | const AArch64RegisterBankInfo &RBI; |
515 | |
516 | bool ProduceNonFlagSettingCondBr = false; |
517 | |
518 | // Some cached values used during selection. |
519 | // We use LR as a live-in register, and we keep track of it here as it can be |
520 | // clobbered by calls. |
521 | Register MFReturnAddr; |
522 | |
523 | MachineIRBuilder MIB; |
524 | |
525 | #define GET_GLOBALISEL_PREDICATES_DECL |
526 | #include "AArch64GenGlobalISel.inc" |
527 | #undef GET_GLOBALISEL_PREDICATES_DECL |
528 | |
529 | // We declare the temporaries used by selectImpl() in the class to minimize the |
530 | // cost of constructing placeholder values. |
531 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
532 | #include "AArch64GenGlobalISel.inc" |
533 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
534 | }; |
535 | |
536 | } // end anonymous namespace |
537 | |
538 | #define GET_GLOBALISEL_IMPL |
539 | #include "AArch64GenGlobalISel.inc" |
540 | #undef GET_GLOBALISEL_IMPL |
541 | |
542 | AArch64InstructionSelector::AArch64InstructionSelector( |
543 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, |
544 | const AArch64RegisterBankInfo &RBI) |
545 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), |
546 | RBI(RBI), |
547 | #define GET_GLOBALISEL_PREDICATES_INIT |
548 | #include "AArch64GenGlobalISel.inc" |
549 | #undef GET_GLOBALISEL_PREDICATES_INIT |
550 | #define GET_GLOBALISEL_TEMPORARIES_INIT |
551 | #include "AArch64GenGlobalISel.inc" |
552 | #undef GET_GLOBALISEL_TEMPORARIES_INIT |
553 | { |
554 | } |
555 | |
556 | // FIXME: This should be target-independent, inferred from the types declared |
557 | // for each class in the bank. |
558 | // |
559 | /// Given a register bank, and a type, return the smallest register class that |
560 | /// can represent that combination. |
561 | static const TargetRegisterClass * |
562 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, |
563 | bool GetAllRegSet = false) { |
564 | if (RB.getID() == AArch64::GPRRegBankID) { |
565 | if (Ty.getSizeInBits() <= 32) |
566 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
567 | : &AArch64::GPR32RegClass; |
568 | if (Ty.getSizeInBits() == 64) |
569 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
570 | : &AArch64::GPR64RegClass; |
571 | if (Ty.getSizeInBits() == 128) |
572 | return &AArch64::XSeqPairsClassRegClass; |
573 | return nullptr; |
574 | } |
575 | |
576 | if (RB.getID() == AArch64::FPRRegBankID) { |
577 | switch (Ty.getSizeInBits()) { |
578 | case 8: |
579 | return &AArch64::FPR8RegClass; |
580 | case 16: |
581 | return &AArch64::FPR16RegClass; |
582 | case 32: |
583 | return &AArch64::FPR32RegClass; |
584 | case 64: |
585 | return &AArch64::FPR64RegClass; |
586 | case 128: |
587 | return &AArch64::FPR128RegClass; |
588 | } |
589 | return nullptr; |
590 | } |
591 | |
592 | return nullptr; |
593 | } |
594 | |
595 | /// Given a register bank, and size in bits, return the smallest register class |
596 | /// that can represent that combination. |
597 | static const TargetRegisterClass * |
598 | getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, |
599 | bool GetAllRegSet = false) { |
600 | unsigned RegBankID = RB.getID(); |
601 | |
602 | if (RegBankID == AArch64::GPRRegBankID) { |
603 | if (SizeInBits <= 32) |
604 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
605 | : &AArch64::GPR32RegClass; |
606 | if (SizeInBits == 64) |
607 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
608 | : &AArch64::GPR64RegClass; |
609 | if (SizeInBits == 128) |
610 | return &AArch64::XSeqPairsClassRegClass; |
611 | } |
612 | |
613 | if (RegBankID == AArch64::FPRRegBankID) { |
614 | switch (SizeInBits) { |
615 | default: |
616 | return nullptr; |
617 | case 8: |
618 | return &AArch64::FPR8RegClass; |
619 | case 16: |
620 | return &AArch64::FPR16RegClass; |
621 | case 32: |
622 | return &AArch64::FPR32RegClass; |
623 | case 64: |
624 | return &AArch64::FPR64RegClass; |
625 | case 128: |
626 | return &AArch64::FPR128RegClass; |
627 | } |
628 | } |
629 | |
630 | return nullptr; |
631 | } |
632 | |
633 | /// Returns the correct subregister to use for a given register class. |
634 | static bool getSubRegForClass(const TargetRegisterClass *RC, |
635 | const TargetRegisterInfo &TRI, unsigned &SubReg) { |
636 | switch (TRI.getRegSizeInBits(RC: *RC)) { |
637 | case 8: |
638 | SubReg = AArch64::bsub; |
639 | break; |
640 | case 16: |
641 | SubReg = AArch64::hsub; |
642 | break; |
643 | case 32: |
644 | if (RC != &AArch64::FPR32RegClass) |
645 | SubReg = AArch64::sub_32; |
646 | else |
647 | SubReg = AArch64::ssub; |
648 | break; |
649 | case 64: |
650 | SubReg = AArch64::dsub; |
651 | break; |
652 | default: |
653 | LLVM_DEBUG( |
654 | dbgs() << "Couldn't find appropriate subregister for register class." ); |
655 | return false; |
656 | } |
657 | |
658 | return true; |
659 | } |
660 | |
661 | /// Returns the minimum size the given register bank can hold. |
662 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { |
663 | switch (RB.getID()) { |
664 | case AArch64::GPRRegBankID: |
665 | return 32; |
666 | case AArch64::FPRRegBankID: |
667 | return 8; |
668 | default: |
669 | llvm_unreachable("Tried to get minimum size for unknown register bank." ); |
670 | } |
671 | } |
672 | |
673 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. |
674 | /// Helper function for functions like createDTuple and createQTuple. |
675 | /// |
676 | /// \p RegClassIDs - The list of register class IDs available for some tuple of |
677 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is |
678 | /// expected to contain between 2 and 4 tuple classes. |
679 | /// |
680 | /// \p SubRegs - The list of subregister classes associated with each register |
681 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 |
682 | /// subregister class. The index of each subregister class is expected to |
683 | /// correspond with the index of each register class. |
684 | /// |
685 | /// \returns Either the destination register of REG_SEQUENCE instruction that |
686 | /// was created, or the 0th element of \p Regs if \p Regs contains a single |
687 | /// element. |
688 | static Register createTuple(ArrayRef<Register> Regs, |
689 | const unsigned RegClassIDs[], |
690 | const unsigned SubRegs[], MachineIRBuilder &MIB) { |
691 | unsigned NumRegs = Regs.size(); |
692 | if (NumRegs == 1) |
693 | return Regs[0]; |
694 | assert(NumRegs >= 2 && NumRegs <= 4 && |
695 | "Only support between two and 4 registers in a tuple!" ); |
696 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); |
697 | auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - 2]); |
698 | auto RegSequence = |
699 | MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {}); |
700 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { |
701 | RegSequence.addUse(RegNo: Regs[I]); |
702 | RegSequence.addImm(Val: SubRegs[I]); |
703 | } |
704 | return RegSequence.getReg(Idx: 0); |
705 | } |
706 | |
707 | /// Create a tuple of D-registers using the registers in \p Regs. |
708 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
709 | static const unsigned RegClassIDs[] = { |
710 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
711 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
712 | AArch64::dsub2, AArch64::dsub3}; |
713 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
714 | } |
715 | |
716 | /// Create a tuple of Q-registers using the registers in \p Regs. |
717 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
718 | static const unsigned RegClassIDs[] = { |
719 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
720 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
721 | AArch64::qsub2, AArch64::qsub3}; |
722 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
723 | } |
724 | |
725 | static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { |
726 | auto &MI = *Root.getParent(); |
727 | auto &MBB = *MI.getParent(); |
728 | auto &MF = *MBB.getParent(); |
729 | auto &MRI = MF.getRegInfo(); |
730 | uint64_t Immed; |
731 | if (Root.isImm()) |
732 | Immed = Root.getImm(); |
733 | else if (Root.isCImm()) |
734 | Immed = Root.getCImm()->getZExtValue(); |
735 | else if (Root.isReg()) { |
736 | auto ValAndVReg = |
737 | getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true); |
738 | if (!ValAndVReg) |
739 | return std::nullopt; |
740 | Immed = ValAndVReg->Value.getSExtValue(); |
741 | } else |
742 | return std::nullopt; |
743 | return Immed; |
744 | } |
745 | |
746 | /// Check whether \p I is a currently unsupported binary operation: |
747 | /// - it has an unsized type |
748 | /// - an operand is not a vreg |
749 | /// - all operands are not in the same bank |
750 | /// These are checks that should someday live in the verifier, but right now, |
751 | /// these are mostly limitations of the aarch64 selector. |
752 | static bool unsupportedBinOp(const MachineInstr &I, |
753 | const AArch64RegisterBankInfo &RBI, |
754 | const MachineRegisterInfo &MRI, |
755 | const AArch64RegisterInfo &TRI) { |
756 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
757 | if (!Ty.isValid()) { |
758 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n" ); |
759 | return true; |
760 | } |
761 | |
762 | const RegisterBank *PrevOpBank = nullptr; |
763 | for (auto &MO : I.operands()) { |
764 | // FIXME: Support non-register operands. |
765 | if (!MO.isReg()) { |
766 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n" ); |
767 | return true; |
768 | } |
769 | |
770 | // FIXME: Can generic operations have physical registers operands? If |
771 | // so, this will need to be taught about that, and we'll need to get the |
772 | // bank out of the minimal class for the register. |
773 | // Either way, this needs to be documented (and possibly verified). |
774 | if (!MO.getReg().isVirtual()) { |
775 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n" ); |
776 | return true; |
777 | } |
778 | |
779 | const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); |
780 | if (!OpBank) { |
781 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n" ); |
782 | return true; |
783 | } |
784 | |
785 | if (PrevOpBank && OpBank != PrevOpBank) { |
786 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n" ); |
787 | return true; |
788 | } |
789 | PrevOpBank = OpBank; |
790 | } |
791 | return false; |
792 | } |
793 | |
794 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc |
795 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID |
796 | /// and of size \p OpSize. |
797 | /// \returns \p GenericOpc if the combination is unsupported. |
798 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, |
799 | unsigned OpSize) { |
800 | switch (RegBankID) { |
801 | case AArch64::GPRRegBankID: |
802 | if (OpSize == 32) { |
803 | switch (GenericOpc) { |
804 | case TargetOpcode::G_SHL: |
805 | return AArch64::LSLVWr; |
806 | case TargetOpcode::G_LSHR: |
807 | return AArch64::LSRVWr; |
808 | case TargetOpcode::G_ASHR: |
809 | return AArch64::ASRVWr; |
810 | default: |
811 | return GenericOpc; |
812 | } |
813 | } else if (OpSize == 64) { |
814 | switch (GenericOpc) { |
815 | case TargetOpcode::G_PTR_ADD: |
816 | return AArch64::ADDXrr; |
817 | case TargetOpcode::G_SHL: |
818 | return AArch64::LSLVXr; |
819 | case TargetOpcode::G_LSHR: |
820 | return AArch64::LSRVXr; |
821 | case TargetOpcode::G_ASHR: |
822 | return AArch64::ASRVXr; |
823 | default: |
824 | return GenericOpc; |
825 | } |
826 | } |
827 | break; |
828 | case AArch64::FPRRegBankID: |
829 | switch (OpSize) { |
830 | case 32: |
831 | switch (GenericOpc) { |
832 | case TargetOpcode::G_FADD: |
833 | return AArch64::FADDSrr; |
834 | case TargetOpcode::G_FSUB: |
835 | return AArch64::FSUBSrr; |
836 | case TargetOpcode::G_FMUL: |
837 | return AArch64::FMULSrr; |
838 | case TargetOpcode::G_FDIV: |
839 | return AArch64::FDIVSrr; |
840 | default: |
841 | return GenericOpc; |
842 | } |
843 | case 64: |
844 | switch (GenericOpc) { |
845 | case TargetOpcode::G_FADD: |
846 | return AArch64::FADDDrr; |
847 | case TargetOpcode::G_FSUB: |
848 | return AArch64::FSUBDrr; |
849 | case TargetOpcode::G_FMUL: |
850 | return AArch64::FMULDrr; |
851 | case TargetOpcode::G_FDIV: |
852 | return AArch64::FDIVDrr; |
853 | case TargetOpcode::G_OR: |
854 | return AArch64::ORRv8i8; |
855 | default: |
856 | return GenericOpc; |
857 | } |
858 | } |
859 | break; |
860 | } |
861 | return GenericOpc; |
862 | } |
863 | |
864 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, |
865 | /// appropriate for the (value) register bank \p RegBankID and of memory access |
866 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate |
867 | /// addressing mode (e.g., LDRXui). |
868 | /// \returns \p GenericOpc if the combination is unsupported. |
869 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, |
870 | unsigned OpSize) { |
871 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; |
872 | switch (RegBankID) { |
873 | case AArch64::GPRRegBankID: |
874 | switch (OpSize) { |
875 | case 8: |
876 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; |
877 | case 16: |
878 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; |
879 | case 32: |
880 | return isStore ? AArch64::STRWui : AArch64::LDRWui; |
881 | case 64: |
882 | return isStore ? AArch64::STRXui : AArch64::LDRXui; |
883 | } |
884 | break; |
885 | case AArch64::FPRRegBankID: |
886 | switch (OpSize) { |
887 | case 8: |
888 | return isStore ? AArch64::STRBui : AArch64::LDRBui; |
889 | case 16: |
890 | return isStore ? AArch64::STRHui : AArch64::LDRHui; |
891 | case 32: |
892 | return isStore ? AArch64::STRSui : AArch64::LDRSui; |
893 | case 64: |
894 | return isStore ? AArch64::STRDui : AArch64::LDRDui; |
895 | case 128: |
896 | return isStore ? AArch64::STRQui : AArch64::LDRQui; |
897 | } |
898 | break; |
899 | } |
900 | return GenericOpc; |
901 | } |
902 | |
903 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg |
904 | /// to \p *To. |
905 | /// |
906 | /// E.g "To = COPY SrcReg:SubReg" |
907 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, |
908 | const RegisterBankInfo &RBI, Register SrcReg, |
909 | const TargetRegisterClass *To, unsigned SubReg) { |
910 | assert(SrcReg.isValid() && "Expected a valid source register?" ); |
911 | assert(To && "Destination register class cannot be null" ); |
912 | assert(SubReg && "Expected a valid subregister" ); |
913 | |
914 | MachineIRBuilder MIB(I); |
915 | auto SubRegCopy = |
916 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: 0, SubReg); |
917 | MachineOperand &RegOp = I.getOperand(i: 1); |
918 | RegOp.setReg(SubRegCopy.getReg(Idx: 0)); |
919 | |
920 | // It's possible that the destination register won't be constrained. Make |
921 | // sure that happens. |
922 | if (!I.getOperand(i: 0).getReg().isPhysical()) |
923 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: *To, MRI); |
924 | |
925 | return true; |
926 | } |
927 | |
928 | /// Helper function to get the source and destination register classes for a |
929 | /// copy. Returns a std::pair containing the source register class for the |
930 | /// copy, and the destination register class for the copy. If a register class |
931 | /// cannot be determined, then it will be nullptr. |
932 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> |
933 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, |
934 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
935 | const RegisterBankInfo &RBI) { |
936 | Register DstReg = I.getOperand(i: 0).getReg(); |
937 | Register SrcReg = I.getOperand(i: 1).getReg(); |
938 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
939 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
940 | unsigned DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI); |
941 | unsigned SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI); |
942 | |
943 | // Special casing for cross-bank copies of s1s. We can technically represent |
944 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 |
945 | // bits. So, we need to put the FPR on 32 bits as well. |
946 | // |
947 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, |
948 | // then we can pull it into the helpers that get the appropriate class for a |
949 | // register bank. Or make a new helper that carries along some constraint |
950 | // information. |
951 | if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) |
952 | SrcSize = DstSize = 32; |
953 | |
954 | return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true), |
955 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)}; |
956 | } |
957 | |
958 | // FIXME: We need some sort of API in RBI/TRI to allow generic code to |
959 | // constrain operands of simple instructions given a TargetRegisterClass |
960 | // and LLT |
961 | static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, |
962 | const RegisterBankInfo &RBI) { |
963 | for (MachineOperand &MO : I.operands()) { |
964 | if (!MO.isReg()) |
965 | continue; |
966 | Register Reg = MO.getReg(); |
967 | if (!Reg) |
968 | continue; |
969 | if (Reg.isPhysical()) |
970 | continue; |
971 | LLT Ty = MRI.getType(Reg); |
972 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); |
973 | const TargetRegisterClass *RC = |
974 | RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); |
975 | if (!RC) { |
976 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); |
977 | RC = getRegClassForTypeOnBank(Ty, RB); |
978 | if (!RC) { |
979 | LLVM_DEBUG( |
980 | dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ); |
981 | break; |
982 | } |
983 | } |
984 | RBI.constrainGenericRegister(Reg, RC: *RC, MRI); |
985 | } |
986 | |
987 | return true; |
988 | } |
989 | |
990 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, |
991 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
992 | const RegisterBankInfo &RBI) { |
993 | Register DstReg = I.getOperand(i: 0).getReg(); |
994 | Register SrcReg = I.getOperand(i: 1).getReg(); |
995 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
996 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
997 | |
998 | // Find the correct register classes for the source and destination registers. |
999 | const TargetRegisterClass *SrcRC; |
1000 | const TargetRegisterClass *DstRC; |
1001 | std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); |
1002 | |
1003 | if (!DstRC) { |
1004 | LLVM_DEBUG(dbgs() << "Unexpected dest size " |
1005 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); |
1006 | return false; |
1007 | } |
1008 | |
1009 | // Is this a copy? If so, then we may need to insert a subregister copy. |
1010 | if (I.isCopy()) { |
1011 | // Yes. Check if there's anything to fix up. |
1012 | if (!SrcRC) { |
1013 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n" ); |
1014 | return false; |
1015 | } |
1016 | |
1017 | unsigned SrcSize = TRI.getRegSizeInBits(RC: *SrcRC); |
1018 | unsigned DstSize = TRI.getRegSizeInBits(RC: *DstRC); |
1019 | unsigned SubReg; |
1020 | |
1021 | // If the source bank doesn't support a subregister copy small enough, |
1022 | // then we first need to copy to the destination bank. |
1023 | if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) { |
1024 | const TargetRegisterClass *DstTempRC = |
1025 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, /* GetAllRegSet */ true); |
1026 | getSubRegForClass(RC: DstRC, TRI, SubReg); |
1027 | |
1028 | MachineIRBuilder MIB(I); |
1029 | auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg}); |
1030 | copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: 0), To: DstRC, SubReg); |
1031 | } else if (SrcSize > DstSize) { |
1032 | // If the source register is bigger than the destination we need to |
1033 | // perform a subregister copy. |
1034 | const TargetRegisterClass *SubRegRC = |
1035 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1036 | getSubRegForClass(RC: SubRegRC, TRI, SubReg); |
1037 | copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg); |
1038 | } else if (DstSize > SrcSize) { |
1039 | // If the destination register is bigger than the source we need to do |
1040 | // a promotion using SUBREG_TO_REG. |
1041 | const TargetRegisterClass *PromotionRC = |
1042 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1043 | getSubRegForClass(RC: SrcRC, TRI, SubReg); |
1044 | |
1045 | Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC); |
1046 | BuildMI(*I.getParent(), I, I.getDebugLoc(), |
1047 | TII.get(AArch64::SUBREG_TO_REG), PromoteReg) |
1048 | .addImm(0) |
1049 | .addUse(SrcReg) |
1050 | .addImm(SubReg); |
1051 | MachineOperand &RegOp = I.getOperand(i: 1); |
1052 | RegOp.setReg(PromoteReg); |
1053 | } |
1054 | |
1055 | // If the destination is a physical register, then there's nothing to |
1056 | // change, so we're done. |
1057 | if (DstReg.isPhysical()) |
1058 | return true; |
1059 | } |
1060 | |
1061 | // No need to constrain SrcReg. It will get constrained when we hit another |
1062 | // of its use or its defs. Copies do not have constraints. |
1063 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1064 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
1065 | << " operand\n" ); |
1066 | return false; |
1067 | } |
1068 | |
1069 | // If this a GPR ZEXT that we want to just reduce down into a copy. |
1070 | // The sizes will be mismatched with the source < 32b but that's ok. |
1071 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { |
1072 | I.setDesc(TII.get(AArch64::COPY)); |
1073 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID); |
1074 | return selectCopy(I, TII, MRI, TRI, RBI); |
1075 | } |
1076 | |
1077 | I.setDesc(TII.get(AArch64::COPY)); |
1078 | return true; |
1079 | } |
1080 | |
1081 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { |
1082 | if (!DstTy.isScalar() || !SrcTy.isScalar()) |
1083 | return GenericOpc; |
1084 | |
1085 | const unsigned DstSize = DstTy.getSizeInBits(); |
1086 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
1087 | |
1088 | switch (DstSize) { |
1089 | case 32: |
1090 | switch (SrcSize) { |
1091 | case 32: |
1092 | switch (GenericOpc) { |
1093 | case TargetOpcode::G_SITOFP: |
1094 | return AArch64::SCVTFUWSri; |
1095 | case TargetOpcode::G_UITOFP: |
1096 | return AArch64::UCVTFUWSri; |
1097 | case TargetOpcode::G_FPTOSI: |
1098 | return AArch64::FCVTZSUWSr; |
1099 | case TargetOpcode::G_FPTOUI: |
1100 | return AArch64::FCVTZUUWSr; |
1101 | default: |
1102 | return GenericOpc; |
1103 | } |
1104 | case 64: |
1105 | switch (GenericOpc) { |
1106 | case TargetOpcode::G_SITOFP: |
1107 | return AArch64::SCVTFUXSri; |
1108 | case TargetOpcode::G_UITOFP: |
1109 | return AArch64::UCVTFUXSri; |
1110 | case TargetOpcode::G_FPTOSI: |
1111 | return AArch64::FCVTZSUWDr; |
1112 | case TargetOpcode::G_FPTOUI: |
1113 | return AArch64::FCVTZUUWDr; |
1114 | default: |
1115 | return GenericOpc; |
1116 | } |
1117 | default: |
1118 | return GenericOpc; |
1119 | } |
1120 | case 64: |
1121 | switch (SrcSize) { |
1122 | case 32: |
1123 | switch (GenericOpc) { |
1124 | case TargetOpcode::G_SITOFP: |
1125 | return AArch64::SCVTFUWDri; |
1126 | case TargetOpcode::G_UITOFP: |
1127 | return AArch64::UCVTFUWDri; |
1128 | case TargetOpcode::G_FPTOSI: |
1129 | return AArch64::FCVTZSUXSr; |
1130 | case TargetOpcode::G_FPTOUI: |
1131 | return AArch64::FCVTZUUXSr; |
1132 | default: |
1133 | return GenericOpc; |
1134 | } |
1135 | case 64: |
1136 | switch (GenericOpc) { |
1137 | case TargetOpcode::G_SITOFP: |
1138 | return AArch64::SCVTFUXDri; |
1139 | case TargetOpcode::G_UITOFP: |
1140 | return AArch64::UCVTFUXDri; |
1141 | case TargetOpcode::G_FPTOSI: |
1142 | return AArch64::FCVTZSUXDr; |
1143 | case TargetOpcode::G_FPTOUI: |
1144 | return AArch64::FCVTZUUXDr; |
1145 | default: |
1146 | return GenericOpc; |
1147 | } |
1148 | default: |
1149 | return GenericOpc; |
1150 | } |
1151 | default: |
1152 | return GenericOpc; |
1153 | }; |
1154 | return GenericOpc; |
1155 | } |
1156 | |
1157 | MachineInstr * |
1158 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, |
1159 | Register False, AArch64CC::CondCode CC, |
1160 | MachineIRBuilder &MIB) const { |
1161 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1162 | assert(RBI.getRegBank(False, MRI, TRI)->getID() == |
1163 | RBI.getRegBank(True, MRI, TRI)->getID() && |
1164 | "Expected both select operands to have the same regbank?" ); |
1165 | LLT Ty = MRI.getType(Reg: True); |
1166 | if (Ty.isVector()) |
1167 | return nullptr; |
1168 | const unsigned Size = Ty.getSizeInBits(); |
1169 | assert((Size == 32 || Size == 64) && |
1170 | "Expected 32 bit or 64 bit select only?" ); |
1171 | const bool Is32Bit = Size == 32; |
1172 | if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { |
1173 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; |
1174 | auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1175 | constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); |
1176 | return &*FCSel; |
1177 | } |
1178 | |
1179 | // By default, we'll try and emit a CSEL. |
1180 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; |
1181 | bool Optimized = false; |
1182 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, |
1183 | &Optimized](Register &Reg, Register &OtherReg, |
1184 | bool Invert) { |
1185 | if (Optimized) |
1186 | return false; |
1187 | |
1188 | // Attempt to fold: |
1189 | // |
1190 | // %sub = G_SUB 0, %x |
1191 | // %select = G_SELECT cc, %reg, %sub |
1192 | // |
1193 | // Into: |
1194 | // %select = CSNEG %reg, %x, cc |
1195 | Register MatchReg; |
1196 | if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) { |
1197 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; |
1198 | Reg = MatchReg; |
1199 | if (Invert) { |
1200 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1201 | std::swap(a&: Reg, b&: OtherReg); |
1202 | } |
1203 | return true; |
1204 | } |
1205 | |
1206 | // Attempt to fold: |
1207 | // |
1208 | // %xor = G_XOR %x, -1 |
1209 | // %select = G_SELECT cc, %reg, %xor |
1210 | // |
1211 | // Into: |
1212 | // %select = CSINV %reg, %x, cc |
1213 | if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) { |
1214 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1215 | Reg = MatchReg; |
1216 | if (Invert) { |
1217 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1218 | std::swap(a&: Reg, b&: OtherReg); |
1219 | } |
1220 | return true; |
1221 | } |
1222 | |
1223 | // Attempt to fold: |
1224 | // |
1225 | // %add = G_ADD %x, 1 |
1226 | // %select = G_SELECT cc, %reg, %add |
1227 | // |
1228 | // Into: |
1229 | // %select = CSINC %reg, %x, cc |
1230 | if (mi_match(R: Reg, MRI, |
1231 | P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1)), |
1232 | preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1))))) { |
1233 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1234 | Reg = MatchReg; |
1235 | if (Invert) { |
1236 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1237 | std::swap(a&: Reg, b&: OtherReg); |
1238 | } |
1239 | return true; |
1240 | } |
1241 | |
1242 | return false; |
1243 | }; |
1244 | |
1245 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its |
1246 | // true/false values are constants. |
1247 | // FIXME: All of these patterns already exist in tablegen. We should be |
1248 | // able to import these. |
1249 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, |
1250 | &Optimized]() { |
1251 | if (Optimized) |
1252 | return false; |
1253 | auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI); |
1254 | auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI); |
1255 | if (!TrueCst && !FalseCst) |
1256 | return false; |
1257 | |
1258 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; |
1259 | if (TrueCst && FalseCst) { |
1260 | int64_t T = TrueCst->Value.getSExtValue(); |
1261 | int64_t F = FalseCst->Value.getSExtValue(); |
1262 | |
1263 | if (T == 0 && F == 1) { |
1264 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc |
1265 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1266 | True = ZReg; |
1267 | False = ZReg; |
1268 | return true; |
1269 | } |
1270 | |
1271 | if (T == 0 && F == -1) { |
1272 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc |
1273 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1274 | True = ZReg; |
1275 | False = ZReg; |
1276 | return true; |
1277 | } |
1278 | } |
1279 | |
1280 | if (TrueCst) { |
1281 | int64_t T = TrueCst->Value.getSExtValue(); |
1282 | if (T == 1) { |
1283 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc |
1284 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1285 | True = False; |
1286 | False = ZReg; |
1287 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1288 | return true; |
1289 | } |
1290 | |
1291 | if (T == -1) { |
1292 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc |
1293 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1294 | True = False; |
1295 | False = ZReg; |
1296 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1297 | return true; |
1298 | } |
1299 | } |
1300 | |
1301 | if (FalseCst) { |
1302 | int64_t F = FalseCst->Value.getSExtValue(); |
1303 | if (F == 1) { |
1304 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc |
1305 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1306 | False = ZReg; |
1307 | return true; |
1308 | } |
1309 | |
1310 | if (F == -1) { |
1311 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc |
1312 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1313 | False = ZReg; |
1314 | return true; |
1315 | } |
1316 | } |
1317 | return false; |
1318 | }; |
1319 | |
1320 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); |
1321 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); |
1322 | Optimized |= TryOptSelectCst(); |
1323 | auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1324 | constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); |
1325 | return &*SelectInst; |
1326 | } |
1327 | |
1328 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { |
1329 | switch (P) { |
1330 | default: |
1331 | llvm_unreachable("Unknown condition code!" ); |
1332 | case CmpInst::ICMP_NE: |
1333 | return AArch64CC::NE; |
1334 | case CmpInst::ICMP_EQ: |
1335 | return AArch64CC::EQ; |
1336 | case CmpInst::ICMP_SGT: |
1337 | return AArch64CC::GT; |
1338 | case CmpInst::ICMP_SGE: |
1339 | return AArch64CC::GE; |
1340 | case CmpInst::ICMP_SLT: |
1341 | return AArch64CC::LT; |
1342 | case CmpInst::ICMP_SLE: |
1343 | return AArch64CC::LE; |
1344 | case CmpInst::ICMP_UGT: |
1345 | return AArch64CC::HI; |
1346 | case CmpInst::ICMP_UGE: |
1347 | return AArch64CC::HS; |
1348 | case CmpInst::ICMP_ULT: |
1349 | return AArch64CC::LO; |
1350 | case CmpInst::ICMP_ULE: |
1351 | return AArch64CC::LS; |
1352 | } |
1353 | } |
1354 | |
1355 | /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. |
1356 | static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, |
1357 | AArch64CC::CondCode &CondCode, |
1358 | AArch64CC::CondCode &CondCode2) { |
1359 | CondCode2 = AArch64CC::AL; |
1360 | switch (CC) { |
1361 | default: |
1362 | llvm_unreachable("Unknown FP condition!" ); |
1363 | case CmpInst::FCMP_OEQ: |
1364 | CondCode = AArch64CC::EQ; |
1365 | break; |
1366 | case CmpInst::FCMP_OGT: |
1367 | CondCode = AArch64CC::GT; |
1368 | break; |
1369 | case CmpInst::FCMP_OGE: |
1370 | CondCode = AArch64CC::GE; |
1371 | break; |
1372 | case CmpInst::FCMP_OLT: |
1373 | CondCode = AArch64CC::MI; |
1374 | break; |
1375 | case CmpInst::FCMP_OLE: |
1376 | CondCode = AArch64CC::LS; |
1377 | break; |
1378 | case CmpInst::FCMP_ONE: |
1379 | CondCode = AArch64CC::MI; |
1380 | CondCode2 = AArch64CC::GT; |
1381 | break; |
1382 | case CmpInst::FCMP_ORD: |
1383 | CondCode = AArch64CC::VC; |
1384 | break; |
1385 | case CmpInst::FCMP_UNO: |
1386 | CondCode = AArch64CC::VS; |
1387 | break; |
1388 | case CmpInst::FCMP_UEQ: |
1389 | CondCode = AArch64CC::EQ; |
1390 | CondCode2 = AArch64CC::VS; |
1391 | break; |
1392 | case CmpInst::FCMP_UGT: |
1393 | CondCode = AArch64CC::HI; |
1394 | break; |
1395 | case CmpInst::FCMP_UGE: |
1396 | CondCode = AArch64CC::PL; |
1397 | break; |
1398 | case CmpInst::FCMP_ULT: |
1399 | CondCode = AArch64CC::LT; |
1400 | break; |
1401 | case CmpInst::FCMP_ULE: |
1402 | CondCode = AArch64CC::LE; |
1403 | break; |
1404 | case CmpInst::FCMP_UNE: |
1405 | CondCode = AArch64CC::NE; |
1406 | break; |
1407 | } |
1408 | } |
1409 | |
1410 | /// Convert an IR fp condition code to an AArch64 CC. |
1411 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that |
1412 | /// should be AND'ed instead of OR'ed. |
1413 | static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, |
1414 | AArch64CC::CondCode &CondCode, |
1415 | AArch64CC::CondCode &CondCode2) { |
1416 | CondCode2 = AArch64CC::AL; |
1417 | switch (CC) { |
1418 | default: |
1419 | changeFPCCToORAArch64CC(CC, CondCode, CondCode2); |
1420 | assert(CondCode2 == AArch64CC::AL); |
1421 | break; |
1422 | case CmpInst::FCMP_ONE: |
1423 | // (a one b) |
1424 | // == ((a olt b) || (a ogt b)) |
1425 | // == ((a ord b) && (a une b)) |
1426 | CondCode = AArch64CC::VC; |
1427 | CondCode2 = AArch64CC::NE; |
1428 | break; |
1429 | case CmpInst::FCMP_UEQ: |
1430 | // (a ueq b) |
1431 | // == ((a uno b) || (a oeq b)) |
1432 | // == ((a ule b) && (a uge b)) |
1433 | CondCode = AArch64CC::PL; |
1434 | CondCode2 = AArch64CC::LE; |
1435 | break; |
1436 | } |
1437 | } |
1438 | |
1439 | /// Return a register which can be used as a bit to test in a TB(N)Z. |
1440 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, |
1441 | MachineRegisterInfo &MRI) { |
1442 | assert(Reg.isValid() && "Expected valid register!" ); |
1443 | bool HasZext = false; |
1444 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { |
1445 | unsigned Opc = MI->getOpcode(); |
1446 | |
1447 | if (!MI->getOperand(i: 0).isReg() || |
1448 | !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg())) |
1449 | break; |
1450 | |
1451 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. |
1452 | // |
1453 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number |
1454 | // on the truncated x is the same as the bit number on x. |
1455 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || |
1456 | Opc == TargetOpcode::G_TRUNC) { |
1457 | if (Opc == TargetOpcode::G_ZEXT) |
1458 | HasZext = true; |
1459 | |
1460 | Register NextReg = MI->getOperand(i: 1).getReg(); |
1461 | // Did we find something worth folding? |
1462 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(RegNo: NextReg)) |
1463 | break; |
1464 | |
1465 | // NextReg is worth folding. Keep looking. |
1466 | Reg = NextReg; |
1467 | continue; |
1468 | } |
1469 | |
1470 | // Attempt to find a suitable operation with a constant on one side. |
1471 | std::optional<uint64_t> C; |
1472 | Register TestReg; |
1473 | switch (Opc) { |
1474 | default: |
1475 | break; |
1476 | case TargetOpcode::G_AND: |
1477 | case TargetOpcode::G_XOR: { |
1478 | TestReg = MI->getOperand(i: 1).getReg(); |
1479 | Register ConstantReg = MI->getOperand(i: 2).getReg(); |
1480 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1481 | if (!VRegAndVal) { |
1482 | // AND commutes, check the other side for a constant. |
1483 | // FIXME: Can we canonicalize the constant so that it's always on the |
1484 | // same side at some point earlier? |
1485 | std::swap(a&: ConstantReg, b&: TestReg); |
1486 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1487 | } |
1488 | if (VRegAndVal) { |
1489 | if (HasZext) |
1490 | C = VRegAndVal->Value.getZExtValue(); |
1491 | else |
1492 | C = VRegAndVal->Value.getSExtValue(); |
1493 | } |
1494 | break; |
1495 | } |
1496 | case TargetOpcode::G_ASHR: |
1497 | case TargetOpcode::G_LSHR: |
1498 | case TargetOpcode::G_SHL: { |
1499 | TestReg = MI->getOperand(i: 1).getReg(); |
1500 | auto VRegAndVal = |
1501 | getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: 2).getReg(), MRI); |
1502 | if (VRegAndVal) |
1503 | C = VRegAndVal->Value.getSExtValue(); |
1504 | break; |
1505 | } |
1506 | } |
1507 | |
1508 | // Didn't find a constant or viable register. Bail out of the loop. |
1509 | if (!C || !TestReg.isValid()) |
1510 | break; |
1511 | |
1512 | // We found a suitable instruction with a constant. Check to see if we can |
1513 | // walk through the instruction. |
1514 | Register NextReg; |
1515 | unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits(); |
1516 | switch (Opc) { |
1517 | default: |
1518 | break; |
1519 | case TargetOpcode::G_AND: |
1520 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. |
1521 | if ((*C >> Bit) & 1) |
1522 | NextReg = TestReg; |
1523 | break; |
1524 | case TargetOpcode::G_SHL: |
1525 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in |
1526 | // the type of the register. |
1527 | if (*C <= Bit && (Bit - *C) < TestRegSize) { |
1528 | NextReg = TestReg; |
1529 | Bit = Bit - *C; |
1530 | } |
1531 | break; |
1532 | case TargetOpcode::G_ASHR: |
1533 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits |
1534 | // in x |
1535 | NextReg = TestReg; |
1536 | Bit = Bit + *C; |
1537 | if (Bit >= TestRegSize) |
1538 | Bit = TestRegSize - 1; |
1539 | break; |
1540 | case TargetOpcode::G_LSHR: |
1541 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x |
1542 | if ((Bit + *C) < TestRegSize) { |
1543 | NextReg = TestReg; |
1544 | Bit = Bit + *C; |
1545 | } |
1546 | break; |
1547 | case TargetOpcode::G_XOR: |
1548 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when |
1549 | // appropriate. |
1550 | // |
1551 | // e.g. If x' = xor x, c, and the b-th bit is set in c then |
1552 | // |
1553 | // tbz x', b -> tbnz x, b |
1554 | // |
1555 | // Because x' only has the b-th bit set if x does not. |
1556 | if ((*C >> Bit) & 1) |
1557 | Invert = !Invert; |
1558 | NextReg = TestReg; |
1559 | break; |
1560 | } |
1561 | |
1562 | // Check if we found anything worth folding. |
1563 | if (!NextReg.isValid()) |
1564 | return Reg; |
1565 | Reg = NextReg; |
1566 | } |
1567 | |
1568 | return Reg; |
1569 | } |
1570 | |
1571 | MachineInstr *AArch64InstructionSelector::emitTestBit( |
1572 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, |
1573 | MachineIRBuilder &MIB) const { |
1574 | assert(TestReg.isValid()); |
1575 | assert(ProduceNonFlagSettingCondBr && |
1576 | "Cannot emit TB(N)Z with speculation tracking!" ); |
1577 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1578 | |
1579 | // Attempt to optimize the test bit by walking over instructions. |
1580 | TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI); |
1581 | LLT Ty = MRI.getType(Reg: TestReg); |
1582 | unsigned Size = Ty.getSizeInBits(); |
1583 | assert(!Ty.isVector() && "Expected a scalar!" ); |
1584 | assert(Bit < 64 && "Bit is too large!" ); |
1585 | |
1586 | // When the test register is a 64-bit register, we have to narrow to make |
1587 | // TBNZW work. |
1588 | bool UseWReg = Bit < 32; |
1589 | unsigned NecessarySize = UseWReg ? 32 : 64; |
1590 | if (Size != NecessarySize) |
1591 | TestReg = moveScalarRegClass( |
1592 | TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, |
1593 | MIB); |
1594 | |
1595 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, |
1596 | {AArch64::TBZW, AArch64::TBNZW}}; |
1597 | unsigned Opc = OpcTable[UseWReg][IsNegative]; |
1598 | auto TestBitMI = |
1599 | MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB); |
1600 | constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); |
1601 | return &*TestBitMI; |
1602 | } |
1603 | |
1604 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( |
1605 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, |
1606 | MachineIRBuilder &MIB) const { |
1607 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?" ); |
1608 | // Given something like this: |
1609 | // |
1610 | // %x = ...Something... |
1611 | // %one = G_CONSTANT i64 1 |
1612 | // %zero = G_CONSTANT i64 0 |
1613 | // %and = G_AND %x, %one |
1614 | // %cmp = G_ICMP intpred(ne), %and, %zero |
1615 | // %cmp_trunc = G_TRUNC %cmp |
1616 | // G_BRCOND %cmp_trunc, %bb.3 |
1617 | // |
1618 | // We want to try and fold the AND into the G_BRCOND and produce either a |
1619 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). |
1620 | // |
1621 | // In this case, we'd get |
1622 | // |
1623 | // TBNZ %x %bb.3 |
1624 | // |
1625 | |
1626 | // Check if the AND has a constant on its RHS which we can use as a mask. |
1627 | // If it's a power of 2, then it's the same as checking a specific bit. |
1628 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) |
1629 | auto MaybeBit = getIConstantVRegValWithLookThrough( |
1630 | VReg: AndInst.getOperand(i: 2).getReg(), MRI: *MIB.getMRI()); |
1631 | if (!MaybeBit) |
1632 | return false; |
1633 | |
1634 | int32_t Bit = MaybeBit->Value.exactLogBase2(); |
1635 | if (Bit < 0) |
1636 | return false; |
1637 | |
1638 | Register TestReg = AndInst.getOperand(i: 1).getReg(); |
1639 | |
1640 | // Emit a TB(N)Z. |
1641 | emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB); |
1642 | return true; |
1643 | } |
1644 | |
1645 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, |
1646 | bool IsNegative, |
1647 | MachineBasicBlock *DestMBB, |
1648 | MachineIRBuilder &MIB) const { |
1649 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!" ); |
1650 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1651 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == |
1652 | AArch64::GPRRegBankID && |
1653 | "Expected GPRs only?" ); |
1654 | auto Ty = MRI.getType(Reg: CompareReg); |
1655 | unsigned Width = Ty.getSizeInBits(); |
1656 | assert(!Ty.isVector() && "Expected scalar only?" ); |
1657 | assert(Width <= 64 && "Expected width to be at most 64?" ); |
1658 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, |
1659 | {AArch64::CBNZW, AArch64::CBNZX}}; |
1660 | unsigned Opc = OpcTable[IsNegative][Width == 64]; |
1661 | auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB); |
1662 | constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); |
1663 | return &*BranchMI; |
1664 | } |
1665 | |
1666 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( |
1667 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { |
1668 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); |
1669 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1670 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't |
1671 | // totally clean. Some of them require two branches to implement. |
1672 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: 1).getPredicate(); |
1673 | emitFPCompare(LHS: FCmp.getOperand(i: 2).getReg(), RHS: FCmp.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
1674 | Pred); |
1675 | AArch64CC::CondCode CC1, CC2; |
1676 | changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2); |
1677 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1678 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); |
1679 | if (CC2 != AArch64CC::AL) |
1680 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); |
1681 | I.eraseFromParent(); |
1682 | return true; |
1683 | } |
1684 | |
1685 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( |
1686 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1687 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1688 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1689 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. |
1690 | // |
1691 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1692 | // instructions will not be produced, as they are conditional branch |
1693 | // instructions that do not set flags. |
1694 | if (!ProduceNonFlagSettingCondBr) |
1695 | return false; |
1696 | |
1697 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1698 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1699 | auto Pred = |
1700 | static_cast<CmpInst::Predicate>(ICmp.getOperand(i: 1).getPredicate()); |
1701 | Register LHS = ICmp.getOperand(i: 2).getReg(); |
1702 | Register RHS = ICmp.getOperand(i: 3).getReg(); |
1703 | |
1704 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. |
1705 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1706 | MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1707 | |
1708 | // When we can emit a TB(N)Z, prefer that. |
1709 | // |
1710 | // Handle non-commutative condition codes first. |
1711 | // Note that we don't want to do this when we have a G_AND because it can |
1712 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. |
1713 | if (VRegAndVal && !AndInst) { |
1714 | int64_t C = VRegAndVal->Value.getSExtValue(); |
1715 | |
1716 | // When we have a greater-than comparison, we can just test if the msb is |
1717 | // zero. |
1718 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { |
1719 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1720 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1721 | I.eraseFromParent(); |
1722 | return true; |
1723 | } |
1724 | |
1725 | // When we have a less than comparison, we can just test if the msb is not |
1726 | // zero. |
1727 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { |
1728 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1729 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ true, DstMBB: DestMBB, MIB); |
1730 | I.eraseFromParent(); |
1731 | return true; |
1732 | } |
1733 | |
1734 | // Inversely, if we have a signed greater-than-or-equal comparison to zero, |
1735 | // we can test if the msb is zero. |
1736 | if (C == 0 && Pred == CmpInst::ICMP_SGE) { |
1737 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1738 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1739 | I.eraseFromParent(); |
1740 | return true; |
1741 | } |
1742 | } |
1743 | |
1744 | // Attempt to handle commutative condition codes. Right now, that's only |
1745 | // eq/ne. |
1746 | if (ICmpInst::isEquality(P: Pred)) { |
1747 | if (!VRegAndVal) { |
1748 | std::swap(a&: RHS, b&: LHS); |
1749 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1750 | AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1751 | } |
1752 | |
1753 | if (VRegAndVal && VRegAndVal->Value == 0) { |
1754 | // If there's a G_AND feeding into this branch, try to fold it away by |
1755 | // emitting a TB(N)Z instead. |
1756 | // |
1757 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be |
1758 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding |
1759 | // would be redundant. |
1760 | if (AndInst && |
1761 | tryOptAndIntoCompareBranch( |
1762 | AndInst&: *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) { |
1763 | I.eraseFromParent(); |
1764 | return true; |
1765 | } |
1766 | |
1767 | // Otherwise, try to emit a CB(N)Z instead. |
1768 | auto LHSTy = MRI.getType(Reg: LHS); |
1769 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { |
1770 | emitCBZ(CompareReg: LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); |
1771 | I.eraseFromParent(); |
1772 | return true; |
1773 | } |
1774 | } |
1775 | } |
1776 | |
1777 | return false; |
1778 | } |
1779 | |
1780 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( |
1781 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1782 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1783 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1784 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) |
1785 | return true; |
1786 | |
1787 | // Couldn't optimize. Emit a compare + a Bcc. |
1788 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1789 | auto PredOp = ICmp.getOperand(i: 1); |
1790 | emitIntegerCompare(LHS&: ICmp.getOperand(i: 2), RHS&: ICmp.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
1791 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( |
1792 | P: static_cast<CmpInst::Predicate>(PredOp.getPredicate())); |
1793 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); |
1794 | I.eraseFromParent(); |
1795 | return true; |
1796 | } |
1797 | |
1798 | bool AArch64InstructionSelector::selectCompareBranch( |
1799 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { |
1800 | Register CondReg = I.getOperand(i: 0).getReg(); |
1801 | MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg); |
1802 | // Try to select the G_BRCOND using whatever is feeding the condition if |
1803 | // possible. |
1804 | unsigned CCMIOpc = CCMI->getOpcode(); |
1805 | if (CCMIOpc == TargetOpcode::G_FCMP) |
1806 | return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB); |
1807 | if (CCMIOpc == TargetOpcode::G_ICMP) |
1808 | return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB); |
1809 | |
1810 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1811 | // instructions will not be produced, as they are conditional branch |
1812 | // instructions that do not set flags. |
1813 | if (ProduceNonFlagSettingCondBr) { |
1814 | emitTestBit(TestReg: CondReg, /*Bit = */ 0, /*IsNegative = */ true, |
1815 | DstMBB: I.getOperand(i: 1).getMBB(), MIB); |
1816 | I.eraseFromParent(); |
1817 | return true; |
1818 | } |
1819 | |
1820 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. |
1821 | auto TstMI = |
1822 | MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); |
1823 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); |
1824 | auto Bcc = MIB.buildInstr(AArch64::Bcc) |
1825 | .addImm(AArch64CC::NE) |
1826 | .addMBB(I.getOperand(1).getMBB()); |
1827 | I.eraseFromParent(); |
1828 | return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); |
1829 | } |
1830 | |
1831 | /// Returns the element immediate value of a vector shift operand if found. |
1832 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. |
1833 | static std::optional<int64_t> getVectorShiftImm(Register Reg, |
1834 | MachineRegisterInfo &MRI) { |
1835 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand" ); |
1836 | MachineInstr *OpMI = MRI.getVRegDef(Reg); |
1837 | return getAArch64VectorSplatScalar(MI: *OpMI, MRI); |
1838 | } |
1839 | |
1840 | /// Matches and returns the shift immediate value for a SHL instruction given |
1841 | /// a shift operand. |
1842 | static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, |
1843 | MachineRegisterInfo &MRI) { |
1844 | std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); |
1845 | if (!ShiftImm) |
1846 | return std::nullopt; |
1847 | // Check the immediate is in range for a SHL. |
1848 | int64_t Imm = *ShiftImm; |
1849 | if (Imm < 0) |
1850 | return std::nullopt; |
1851 | switch (SrcTy.getElementType().getSizeInBits()) { |
1852 | default: |
1853 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift" ); |
1854 | return std::nullopt; |
1855 | case 8: |
1856 | if (Imm > 7) |
1857 | return std::nullopt; |
1858 | break; |
1859 | case 16: |
1860 | if (Imm > 15) |
1861 | return std::nullopt; |
1862 | break; |
1863 | case 32: |
1864 | if (Imm > 31) |
1865 | return std::nullopt; |
1866 | break; |
1867 | case 64: |
1868 | if (Imm > 63) |
1869 | return std::nullopt; |
1870 | break; |
1871 | } |
1872 | return Imm; |
1873 | } |
1874 | |
1875 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, |
1876 | MachineRegisterInfo &MRI) { |
1877 | assert(I.getOpcode() == TargetOpcode::G_SHL); |
1878 | Register DstReg = I.getOperand(i: 0).getReg(); |
1879 | const LLT Ty = MRI.getType(Reg: DstReg); |
1880 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1881 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1882 | |
1883 | if (!Ty.isVector()) |
1884 | return false; |
1885 | |
1886 | // Check if we have a vector of constants on RHS that we can select as the |
1887 | // immediate form. |
1888 | std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI); |
1889 | |
1890 | unsigned Opc = 0; |
1891 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1892 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; |
1893 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1894 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; |
1895 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1896 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; |
1897 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1898 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; |
1899 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1900 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; |
1901 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1902 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; |
1903 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1904 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; |
1905 | } else { |
1906 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type" ); |
1907 | return false; |
1908 | } |
1909 | |
1910 | auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg}); |
1911 | if (ImmVal) |
1912 | Shl.addImm(Val: *ImmVal); |
1913 | else |
1914 | Shl.addUse(RegNo: Src2Reg); |
1915 | constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); |
1916 | I.eraseFromParent(); |
1917 | return true; |
1918 | } |
1919 | |
1920 | bool AArch64InstructionSelector::selectVectorAshrLshr( |
1921 | MachineInstr &I, MachineRegisterInfo &MRI) { |
1922 | assert(I.getOpcode() == TargetOpcode::G_ASHR || |
1923 | I.getOpcode() == TargetOpcode::G_LSHR); |
1924 | Register DstReg = I.getOperand(i: 0).getReg(); |
1925 | const LLT Ty = MRI.getType(Reg: DstReg); |
1926 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1927 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1928 | |
1929 | if (!Ty.isVector()) |
1930 | return false; |
1931 | |
1932 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; |
1933 | |
1934 | // We expect the immediate case to be lowered in the PostLegalCombiner to |
1935 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. |
1936 | |
1937 | // There is not a shift right register instruction, but the shift left |
1938 | // register instruction takes a signed value, where negative numbers specify a |
1939 | // right shift. |
1940 | |
1941 | unsigned Opc = 0; |
1942 | unsigned NegOpc = 0; |
1943 | const TargetRegisterClass *RC = |
1944 | getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID)); |
1945 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1946 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; |
1947 | NegOpc = AArch64::NEGv2i64; |
1948 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1949 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; |
1950 | NegOpc = AArch64::NEGv4i32; |
1951 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1952 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; |
1953 | NegOpc = AArch64::NEGv2i32; |
1954 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1955 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; |
1956 | NegOpc = AArch64::NEGv4i16; |
1957 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1958 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; |
1959 | NegOpc = AArch64::NEGv8i16; |
1960 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1961 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; |
1962 | NegOpc = AArch64::NEGv16i8; |
1963 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1964 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; |
1965 | NegOpc = AArch64::NEGv8i8; |
1966 | } else { |
1967 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type" ); |
1968 | return false; |
1969 | } |
1970 | |
1971 | auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg}); |
1972 | constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); |
1973 | auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg}); |
1974 | constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); |
1975 | I.eraseFromParent(); |
1976 | return true; |
1977 | } |
1978 | |
1979 | bool AArch64InstructionSelector::selectVaStartAAPCS( |
1980 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
1981 | return false; |
1982 | } |
1983 | |
1984 | bool AArch64InstructionSelector::selectVaStartDarwin( |
1985 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
1986 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
1987 | Register ListReg = I.getOperand(i: 0).getReg(); |
1988 | |
1989 | Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
1990 | |
1991 | int FrameIdx = FuncInfo->getVarArgsStackIndex(); |
1992 | if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( |
1993 | CC: MF.getFunction().getCallingConv())) { |
1994 | FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 |
1995 | ? FuncInfo->getVarArgsGPRIndex() |
1996 | : FuncInfo->getVarArgsStackIndex(); |
1997 | } |
1998 | |
1999 | auto MIB = |
2000 | BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) |
2001 | .addDef(ArgsAddrReg) |
2002 | .addFrameIndex(FrameIdx) |
2003 | .addImm(0) |
2004 | .addImm(0); |
2005 | |
2006 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); |
2007 | |
2008 | MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) |
2009 | .addUse(ArgsAddrReg) |
2010 | .addUse(ListReg) |
2011 | .addImm(0) |
2012 | .addMemOperand(*I.memoperands_begin()); |
2013 | |
2014 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); |
2015 | I.eraseFromParent(); |
2016 | return true; |
2017 | } |
2018 | |
2019 | void AArch64InstructionSelector::materializeLargeCMVal( |
2020 | MachineInstr &I, const Value *V, unsigned OpFlags) { |
2021 | MachineBasicBlock &MBB = *I.getParent(); |
2022 | MachineFunction &MF = *MBB.getParent(); |
2023 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2024 | |
2025 | auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); |
2026 | MovZ->addOperand(MF, I.getOperand(i: 1)); |
2027 | MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | |
2028 | AArch64II::MO_NC); |
2029 | MovZ->addOperand(MF, MachineOperand::CreateImm(Val: 0)); |
2030 | constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); |
2031 | |
2032 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, |
2033 | Register ForceDstReg) { |
2034 | Register DstReg = ForceDstReg |
2035 | ? ForceDstReg |
2036 | : MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
2037 | auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); |
2038 | if (auto *GV = dyn_cast<GlobalValue>(Val: V)) { |
2039 | MovI->addOperand(MF, MachineOperand::CreateGA( |
2040 | GV, Offset: MovZ->getOperand(1).getOffset(), TargetFlags: Flags)); |
2041 | } else { |
2042 | MovI->addOperand( |
2043 | MF, MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V), |
2044 | Offset: MovZ->getOperand(1).getOffset(), TargetFlags: Flags)); |
2045 | } |
2046 | MovI->addOperand(MF, MachineOperand::CreateImm(Val: Offset)); |
2047 | constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); |
2048 | return DstReg; |
2049 | }; |
2050 | Register DstReg = BuildMovK(MovZ.getReg(0), |
2051 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); |
2052 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); |
2053 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(i: 0).getReg()); |
2054 | } |
2055 | |
2056 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { |
2057 | MachineBasicBlock &MBB = *I.getParent(); |
2058 | MachineFunction &MF = *MBB.getParent(); |
2059 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2060 | |
2061 | switch (I.getOpcode()) { |
2062 | case TargetOpcode::G_STORE: { |
2063 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); |
2064 | MachineOperand &SrcOp = I.getOperand(i: 0); |
2065 | if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) { |
2066 | // Allow matching with imported patterns for stores of pointers. Unlike |
2067 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy |
2068 | // and constrain. |
2069 | auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: SrcOp); |
2070 | Register NewSrc = Copy.getReg(Idx: 0); |
2071 | SrcOp.setReg(NewSrc); |
2072 | RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); |
2073 | Changed = true; |
2074 | } |
2075 | return Changed; |
2076 | } |
2077 | case TargetOpcode::G_PTR_ADD: |
2078 | return convertPtrAddToAdd(I, MRI); |
2079 | case TargetOpcode::G_LOAD: { |
2080 | // For scalar loads of pointers, we try to convert the dest type from p0 |
2081 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD |
2082 | // conversion, this should be ok because all users should have been |
2083 | // selected already, so the type doesn't matter for them. |
2084 | Register DstReg = I.getOperand(i: 0).getReg(); |
2085 | const LLT DstTy = MRI.getType(Reg: DstReg); |
2086 | if (!DstTy.isPointer()) |
2087 | return false; |
2088 | MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: 64)); |
2089 | return true; |
2090 | } |
2091 | case AArch64::G_DUP: { |
2092 | // Convert the type from p0 to s64 to help selection. |
2093 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2094 | if (!DstTy.isPointerVector()) |
2095 | return false; |
2096 | auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 1).getReg()); |
2097 | MRI.setType(VReg: I.getOperand(i: 0).getReg(), |
2098 | Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64))); |
2099 | MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); |
2100 | I.getOperand(i: 1).setReg(NewSrc.getReg(Idx: 0)); |
2101 | return true; |
2102 | } |
2103 | case TargetOpcode::G_UITOFP: |
2104 | case TargetOpcode::G_SITOFP: { |
2105 | // If both source and destination regbanks are FPR, then convert the opcode |
2106 | // to G_SITOF so that the importer can select it to an fpr variant. |
2107 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank |
2108 | // copy. |
2109 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2110 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2111 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2112 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
2113 | return false; |
2114 | |
2115 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { |
2116 | if (I.getOpcode() == TargetOpcode::G_SITOFP) |
2117 | I.setDesc(TII.get(AArch64::G_SITOF)); |
2118 | else |
2119 | I.setDesc(TII.get(AArch64::G_UITOF)); |
2120 | return true; |
2121 | } |
2122 | return false; |
2123 | } |
2124 | default: |
2125 | return false; |
2126 | } |
2127 | } |
2128 | |
2129 | /// This lowering tries to look for G_PTR_ADD instructions and then converts |
2130 | /// them to a standard G_ADD with a COPY on the source. |
2131 | /// |
2132 | /// The motivation behind this is to expose the add semantics to the imported |
2133 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, |
2134 | /// because the selector works bottom up, uses before defs. By the time we |
2135 | /// end up trying to select a G_PTR_ADD, we should have already attempted to |
2136 | /// fold this into addressing modes and were therefore unsuccessful. |
2137 | bool AArch64InstructionSelector::convertPtrAddToAdd( |
2138 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2139 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD" ); |
2140 | Register DstReg = I.getOperand(i: 0).getReg(); |
2141 | Register AddOp1Reg = I.getOperand(i: 1).getReg(); |
2142 | const LLT PtrTy = MRI.getType(Reg: DstReg); |
2143 | if (PtrTy.getAddressSpace() != 0) |
2144 | return false; |
2145 | |
2146 | const LLT CastPtrTy = |
2147 | PtrTy.isVector() ? LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) : LLT::scalar(SizeInBits: 64); |
2148 | auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg); |
2149 | // Set regbanks on the registers. |
2150 | if (PtrTy.isVector()) |
2151 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); |
2152 | else |
2153 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); |
2154 | |
2155 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: |
2156 | // %dst(intty) = G_ADD %intbase, off |
2157 | I.setDesc(TII.get(TargetOpcode::G_ADD)); |
2158 | MRI.setType(VReg: DstReg, Ty: CastPtrTy); |
2159 | I.getOperand(i: 1).setReg(PtrToInt.getReg(Idx: 0)); |
2160 | if (!select(I&: *PtrToInt)) { |
2161 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ); |
2162 | return false; |
2163 | } |
2164 | |
2165 | // Also take the opportunity here to try to do some optimization. |
2166 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. |
2167 | Register NegatedReg; |
2168 | if (!mi_match(R: I.getOperand(i: 2).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg)))) |
2169 | return true; |
2170 | I.getOperand(i: 2).setReg(NegatedReg); |
2171 | I.setDesc(TII.get(TargetOpcode::G_SUB)); |
2172 | return true; |
2173 | } |
2174 | |
2175 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, |
2176 | MachineRegisterInfo &MRI) { |
2177 | // We try to match the immediate variant of LSL, which is actually an alias |
2178 | // for a special case of UBFM. Otherwise, we fall back to the imported |
2179 | // selector which will match the register variant. |
2180 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op" ); |
2181 | const auto &MO = I.getOperand(i: 2); |
2182 | auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI); |
2183 | if (!VRegAndVal) |
2184 | return false; |
2185 | |
2186 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2187 | if (DstTy.isVector()) |
2188 | return false; |
2189 | bool Is64Bit = DstTy.getSizeInBits() == 64; |
2190 | auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO); |
2191 | auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO); |
2192 | |
2193 | if (!Imm1Fn || !Imm2Fn) |
2194 | return false; |
2195 | |
2196 | auto NewI = |
2197 | MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, |
2198 | {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); |
2199 | |
2200 | for (auto &RenderFn : *Imm1Fn) |
2201 | RenderFn(NewI); |
2202 | for (auto &RenderFn : *Imm2Fn) |
2203 | RenderFn(NewI); |
2204 | |
2205 | I.eraseFromParent(); |
2206 | return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); |
2207 | } |
2208 | |
2209 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( |
2210 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2211 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE" ); |
2212 | // If we're storing a scalar, it doesn't matter what register bank that |
2213 | // scalar is on. All that matters is the size. |
2214 | // |
2215 | // So, if we see something like this (with a 32-bit scalar as an example): |
2216 | // |
2217 | // %x:gpr(s32) = ... something ... |
2218 | // %y:fpr(s32) = COPY %x:gpr(s32) |
2219 | // G_STORE %y:fpr(s32) |
2220 | // |
2221 | // We can fix this up into something like this: |
2222 | // |
2223 | // G_STORE %x:gpr(s32) |
2224 | // |
2225 | // And then continue the selection process normally. |
2226 | Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: 0).getReg(), MRI); |
2227 | if (!DefDstReg.isValid()) |
2228 | return false; |
2229 | LLT DefDstTy = MRI.getType(Reg: DefDstReg); |
2230 | Register StoreSrcReg = I.getOperand(i: 0).getReg(); |
2231 | LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg); |
2232 | |
2233 | // If we get something strange like a physical register, then we shouldn't |
2234 | // go any further. |
2235 | if (!DefDstTy.isValid()) |
2236 | return false; |
2237 | |
2238 | // Are the source and dst types the same size? |
2239 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) |
2240 | return false; |
2241 | |
2242 | if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == |
2243 | RBI.getRegBank(DefDstReg, MRI, TRI)) |
2244 | return false; |
2245 | |
2246 | // We have a cross-bank copy, which is entering a store. Let's fold it. |
2247 | I.getOperand(i: 0).setReg(DefDstReg); |
2248 | return true; |
2249 | } |
2250 | |
2251 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { |
2252 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2253 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2254 | |
2255 | MachineBasicBlock &MBB = *I.getParent(); |
2256 | MachineFunction &MF = *MBB.getParent(); |
2257 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2258 | |
2259 | switch (I.getOpcode()) { |
2260 | case AArch64::G_DUP: { |
2261 | // Before selecting a DUP instruction, check if it is better selected as a |
2262 | // MOV or load from a constant pool. |
2263 | Register Src = I.getOperand(i: 1).getReg(); |
2264 | auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI); |
2265 | if (!ValAndVReg) |
2266 | return false; |
2267 | LLVMContext &Ctx = MF.getFunction().getContext(); |
2268 | Register Dst = I.getOperand(i: 0).getReg(); |
2269 | auto *CV = ConstantDataVector::getSplat( |
2270 | NumElts: MRI.getType(Reg: Dst).getNumElements(), |
2271 | Elt: ConstantInt::get(Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Src).getSizeInBits()), |
2272 | V: ValAndVReg->Value)); |
2273 | if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI)) |
2274 | return false; |
2275 | I.eraseFromParent(); |
2276 | return true; |
2277 | } |
2278 | case TargetOpcode::G_SEXT: |
2279 | // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV |
2280 | // over a normal extend. |
2281 | if (selectUSMovFromExtend(I, MRI)) |
2282 | return true; |
2283 | return false; |
2284 | case TargetOpcode::G_BR: |
2285 | return false; |
2286 | case TargetOpcode::G_SHL: |
2287 | return earlySelectSHL(I, MRI); |
2288 | case TargetOpcode::G_CONSTANT: { |
2289 | bool IsZero = false; |
2290 | if (I.getOperand(i: 1).isCImm()) |
2291 | IsZero = I.getOperand(i: 1).getCImm()->isZero(); |
2292 | else if (I.getOperand(i: 1).isImm()) |
2293 | IsZero = I.getOperand(i: 1).getImm() == 0; |
2294 | |
2295 | if (!IsZero) |
2296 | return false; |
2297 | |
2298 | Register DefReg = I.getOperand(i: 0).getReg(); |
2299 | LLT Ty = MRI.getType(Reg: DefReg); |
2300 | if (Ty.getSizeInBits() == 64) { |
2301 | I.getOperand(1).ChangeToRegister(AArch64::XZR, false); |
2302 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); |
2303 | } else if (Ty.getSizeInBits() == 32) { |
2304 | I.getOperand(1).ChangeToRegister(AArch64::WZR, false); |
2305 | RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); |
2306 | } else |
2307 | return false; |
2308 | |
2309 | I.setDesc(TII.get(TargetOpcode::COPY)); |
2310 | return true; |
2311 | } |
2312 | |
2313 | case TargetOpcode::G_ADD: { |
2314 | // Check if this is being fed by a G_ICMP on either side. |
2315 | // |
2316 | // (cmp pred, x, y) + z |
2317 | // |
2318 | // In the above case, when the cmp is true, we increment z by 1. So, we can |
2319 | // fold the add into the cset for the cmp by using cinc. |
2320 | // |
2321 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. |
2322 | Register AddDst = I.getOperand(i: 0).getReg(); |
2323 | Register AddLHS = I.getOperand(i: 1).getReg(); |
2324 | Register AddRHS = I.getOperand(i: 2).getReg(); |
2325 | // Only handle scalars. |
2326 | LLT Ty = MRI.getType(Reg: AddLHS); |
2327 | if (Ty.isVector()) |
2328 | return false; |
2329 | // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 |
2330 | // bits. |
2331 | unsigned Size = Ty.getSizeInBits(); |
2332 | if (Size != 32 && Size != 64) |
2333 | return false; |
2334 | auto MatchCmp = [&](Register Reg) -> MachineInstr * { |
2335 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2336 | return nullptr; |
2337 | // If the LHS of the add is 32 bits, then we want to fold a 32-bit |
2338 | // compare. |
2339 | if (Size == 32) |
2340 | return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI); |
2341 | // We model scalar compares using 32-bit destinations right now. |
2342 | // If it's a 64-bit compare, it'll have 64-bit sources. |
2343 | Register ZExt; |
2344 | if (!mi_match(R: Reg, MRI, |
2345 | P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt)))))) |
2346 | return nullptr; |
2347 | auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI); |
2348 | if (!Cmp || |
2349 | MRI.getType(Reg: Cmp->getOperand(i: 2).getReg()).getSizeInBits() != 64) |
2350 | return nullptr; |
2351 | return Cmp; |
2352 | }; |
2353 | // Try to match |
2354 | // z + (cmp pred, x, y) |
2355 | MachineInstr *Cmp = MatchCmp(AddRHS); |
2356 | if (!Cmp) { |
2357 | // (cmp pred, x, y) + z |
2358 | std::swap(a&: AddLHS, b&: AddRHS); |
2359 | Cmp = MatchCmp(AddRHS); |
2360 | if (!Cmp) |
2361 | return false; |
2362 | } |
2363 | auto &PredOp = Cmp->getOperand(i: 1); |
2364 | auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); |
2365 | const AArch64CC::CondCode InvCC = |
2366 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
2367 | MIB.setInstrAndDebugLoc(I); |
2368 | emitIntegerCompare(/*LHS=*/Cmp->getOperand(i: 2), |
2369 | /*RHS=*/Cmp->getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
2370 | emitCSINC(/*Dst=*/AddDst, /*Src =*/Src1: AddLHS, /*Src2=*/AddLHS, Pred: InvCC, MIRBuilder&: MIB); |
2371 | I.eraseFromParent(); |
2372 | return true; |
2373 | } |
2374 | case TargetOpcode::G_OR: { |
2375 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of |
2376 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via |
2377 | // shifting and masking that we can replace with a BFI (encoded as a BFM). |
2378 | Register Dst = I.getOperand(i: 0).getReg(); |
2379 | LLT Ty = MRI.getType(Reg: Dst); |
2380 | |
2381 | if (!Ty.isScalar()) |
2382 | return false; |
2383 | |
2384 | unsigned Size = Ty.getSizeInBits(); |
2385 | if (Size != 32 && Size != 64) |
2386 | return false; |
2387 | |
2388 | Register ShiftSrc; |
2389 | int64_t ShiftImm; |
2390 | Register MaskSrc; |
2391 | int64_t MaskImm; |
2392 | if (!mi_match( |
2393 | R: Dst, MRI, |
2394 | P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))), |
2395 | R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm)))))) |
2396 | return false; |
2397 | |
2398 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) |
2399 | return false; |
2400 | |
2401 | int64_t Immr = Size - ShiftImm; |
2402 | int64_t Imms = Size - ShiftImm - 1; |
2403 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; |
2404 | emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB); |
2405 | I.eraseFromParent(); |
2406 | return true; |
2407 | } |
2408 | case TargetOpcode::G_FENCE: { |
2409 | if (I.getOperand(i: 1).getImm() == 0) |
2410 | BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER)); |
2411 | else |
2412 | BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB)) |
2413 | .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb); |
2414 | I.eraseFromParent(); |
2415 | return true; |
2416 | } |
2417 | default: |
2418 | return false; |
2419 | } |
2420 | } |
2421 | |
2422 | bool AArch64InstructionSelector::select(MachineInstr &I) { |
2423 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2424 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2425 | |
2426 | MachineBasicBlock &MBB = *I.getParent(); |
2427 | MachineFunction &MF = *MBB.getParent(); |
2428 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2429 | |
2430 | const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
2431 | if (Subtarget->requiresStrictAlign()) { |
2432 | // We don't support this feature yet. |
2433 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n" ); |
2434 | return false; |
2435 | } |
2436 | |
2437 | MIB.setInstrAndDebugLoc(I); |
2438 | |
2439 | unsigned Opcode = I.getOpcode(); |
2440 | // G_PHI requires same handling as PHI |
2441 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { |
2442 | // Certain non-generic instructions also need some special handling. |
2443 | |
2444 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) |
2445 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2446 | |
2447 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { |
2448 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2449 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2450 | |
2451 | const RegClassOrRegBank &RegClassOrBank = |
2452 | MRI.getRegClassOrRegBank(Reg: DefReg); |
2453 | |
2454 | const TargetRegisterClass *DefRC |
2455 | = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); |
2456 | if (!DefRC) { |
2457 | if (!DefTy.isValid()) { |
2458 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n" ); |
2459 | return false; |
2460 | } |
2461 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); |
2462 | DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB); |
2463 | if (!DefRC) { |
2464 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n" ); |
2465 | return false; |
2466 | } |
2467 | } |
2468 | |
2469 | I.setDesc(TII.get(TargetOpcode::PHI)); |
2470 | |
2471 | return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI); |
2472 | } |
2473 | |
2474 | if (I.isCopy()) |
2475 | return selectCopy(I, TII, MRI, TRI, RBI); |
2476 | |
2477 | if (I.isDebugInstr()) |
2478 | return selectDebugInstr(I, MRI, RBI); |
2479 | |
2480 | return true; |
2481 | } |
2482 | |
2483 | |
2484 | if (I.getNumOperands() != I.getNumExplicitOperands()) { |
2485 | LLVM_DEBUG( |
2486 | dbgs() << "Generic instruction has unexpected implicit operands\n" ); |
2487 | return false; |
2488 | } |
2489 | |
2490 | // Try to do some lowering before we start instruction selecting. These |
2491 | // lowerings are purely transformations on the input G_MIR and so selection |
2492 | // must continue after any modification of the instruction. |
2493 | if (preISelLower(I)) { |
2494 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. |
2495 | } |
2496 | |
2497 | // There may be patterns where the importer can't deal with them optimally, |
2498 | // but does select it to a suboptimal sequence so our custom C++ selection |
2499 | // code later never has a chance to work on it. Therefore, we have an early |
2500 | // selection attempt here to give priority to certain selection routines |
2501 | // over the imported ones. |
2502 | if (earlySelect(I)) |
2503 | return true; |
2504 | |
2505 | if (selectImpl(I, CoverageInfo&: *CoverageInfo)) |
2506 | return true; |
2507 | |
2508 | LLT Ty = |
2509 | I.getOperand(i: 0).isReg() ? MRI.getType(Reg: I.getOperand(i: 0).getReg()) : LLT{}; |
2510 | |
2511 | switch (Opcode) { |
2512 | case TargetOpcode::G_SBFX: |
2513 | case TargetOpcode::G_UBFX: { |
2514 | static const unsigned OpcTable[2][2] = { |
2515 | {AArch64::UBFMWri, AArch64::UBFMXri}, |
2516 | {AArch64::SBFMWri, AArch64::SBFMXri}}; |
2517 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; |
2518 | unsigned Size = Ty.getSizeInBits(); |
2519 | unsigned Opc = OpcTable[IsSigned][Size == 64]; |
2520 | auto Cst1 = |
2521 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 2).getReg(), MRI); |
2522 | assert(Cst1 && "Should have gotten a constant for src 1?" ); |
2523 | auto Cst2 = |
2524 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 3).getReg(), MRI); |
2525 | assert(Cst2 && "Should have gotten a constant for src 2?" ); |
2526 | auto LSB = Cst1->Value.getZExtValue(); |
2527 | auto Width = Cst2->Value.getZExtValue(); |
2528 | auto BitfieldInst = |
2529 | MIB.buildInstr(Opc, DstOps: {I.getOperand(i: 0)}, SrcOps: {I.getOperand(i: 1)}) |
2530 | .addImm(Val: LSB) |
2531 | .addImm(Val: LSB + Width - 1); |
2532 | I.eraseFromParent(); |
2533 | return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); |
2534 | } |
2535 | case TargetOpcode::G_BRCOND: |
2536 | return selectCompareBranch(I, MF, MRI); |
2537 | |
2538 | case TargetOpcode::G_BRINDIRECT: { |
2539 | I.setDesc(TII.get(AArch64::BR)); |
2540 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2541 | } |
2542 | |
2543 | case TargetOpcode::G_BRJT: |
2544 | return selectBrJT(I, MRI); |
2545 | |
2546 | case AArch64::G_ADD_LOW: { |
2547 | // This op may have been separated from it's ADRP companion by the localizer |
2548 | // or some other code motion pass. Given that many CPUs will try to |
2549 | // macro fuse these operations anyway, select this into a MOVaddr pseudo |
2550 | // which will later be expanded into an ADRP+ADD pair after scheduling. |
2551 | MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
2552 | if (BaseMI->getOpcode() != AArch64::ADRP) { |
2553 | I.setDesc(TII.get(AArch64::ADDXri)); |
2554 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2555 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2556 | } |
2557 | assert(TM.getCodeModel() == CodeModel::Small && |
2558 | "Expected small code model" ); |
2559 | auto Op1 = BaseMI->getOperand(i: 1); |
2560 | auto Op2 = I.getOperand(i: 2); |
2561 | auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) |
2562 | .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), |
2563 | Op1.getTargetFlags()) |
2564 | .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), |
2565 | Op2.getTargetFlags()); |
2566 | I.eraseFromParent(); |
2567 | return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); |
2568 | } |
2569 | |
2570 | case TargetOpcode::G_FCONSTANT: |
2571 | case TargetOpcode::G_CONSTANT: { |
2572 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; |
2573 | |
2574 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
2575 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
2576 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
2577 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
2578 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
2579 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
2580 | |
2581 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2582 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2583 | const unsigned DefSize = DefTy.getSizeInBits(); |
2584 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); |
2585 | |
2586 | // FIXME: Redundant check, but even less readable when factored out. |
2587 | if (isFP) { |
2588 | if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { |
2589 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2590 | << " constant, expected: " << s16 << " or " << s32 |
2591 | << " or " << s64 << " or " << s128 << '\n'); |
2592 | return false; |
2593 | } |
2594 | |
2595 | if (RB.getID() != AArch64::FPRRegBankID) { |
2596 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2597 | << " constant on bank: " << RB |
2598 | << ", expected: FPR\n" ); |
2599 | return false; |
2600 | } |
2601 | |
2602 | // The case when we have 0.0 is covered by tablegen. Reject it here so we |
2603 | // can be sure tablegen works correctly and isn't rescued by this code. |
2604 | // 0.0 is not covered by tablegen for FP128. So we will handle this |
2605 | // scenario in the code here. |
2606 | if (DefSize != 128 && I.getOperand(i: 1).getFPImm()->isExactlyValue(V: 0.0)) |
2607 | return false; |
2608 | } else { |
2609 | // s32 and s64 are covered by tablegen. |
2610 | if (Ty != p0 && Ty != s8 && Ty != s16) { |
2611 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2612 | << " constant, expected: " << s32 << ", " << s64 |
2613 | << ", or " << p0 << '\n'); |
2614 | return false; |
2615 | } |
2616 | |
2617 | if (RB.getID() != AArch64::GPRRegBankID) { |
2618 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2619 | << " constant on bank: " << RB |
2620 | << ", expected: GPR\n" ); |
2621 | return false; |
2622 | } |
2623 | } |
2624 | |
2625 | if (isFP) { |
2626 | const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB); |
2627 | // For 16, 64, and 128b values, emit a constant pool load. |
2628 | switch (DefSize) { |
2629 | default: |
2630 | llvm_unreachable("Unexpected destination size for G_FCONSTANT?" ); |
2631 | case 32: |
2632 | case 64: { |
2633 | bool OptForSize = shouldOptForSize(MF: &MF); |
2634 | const auto &TLI = MF.getSubtarget().getTargetLowering(); |
2635 | // If TLI says that this fpimm is illegal, then we'll expand to a |
2636 | // constant pool load. |
2637 | if (TLI->isFPImmLegal(I.getOperand(i: 1).getFPImm()->getValueAPF(), |
2638 | EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize)) |
2639 | break; |
2640 | [[fallthrough]]; |
2641 | } |
2642 | case 16: |
2643 | case 128: { |
2644 | auto *FPImm = I.getOperand(i: 1).getFPImm(); |
2645 | auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB); |
2646 | if (!LoadMI) { |
2647 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n" ); |
2648 | return false; |
2649 | } |
2650 | MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: 0).getReg()}); |
2651 | I.eraseFromParent(); |
2652 | return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI); |
2653 | } |
2654 | } |
2655 | |
2656 | assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size" ); |
2657 | // Either emit a FMOV, or emit a copy to emit a normal mov. |
2658 | const Register DefGPRReg = MRI.createVirtualRegister( |
2659 | DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2660 | MachineOperand &RegOp = I.getOperand(i: 0); |
2661 | RegOp.setReg(DefGPRReg); |
2662 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2663 | MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg}); |
2664 | |
2665 | if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) { |
2666 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ); |
2667 | return false; |
2668 | } |
2669 | |
2670 | MachineOperand &ImmOp = I.getOperand(i: 1); |
2671 | // FIXME: Is going through int64_t always correct? |
2672 | ImmOp.ChangeToImmediate( |
2673 | ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); |
2674 | } else if (I.getOperand(i: 1).isCImm()) { |
2675 | uint64_t Val = I.getOperand(i: 1).getCImm()->getZExtValue(); |
2676 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2677 | } else if (I.getOperand(i: 1).isImm()) { |
2678 | uint64_t Val = I.getOperand(i: 1).getImm(); |
2679 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2680 | } |
2681 | |
2682 | const unsigned MovOpc = |
2683 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
2684 | I.setDesc(TII.get(MovOpc)); |
2685 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2686 | return true; |
2687 | } |
2688 | case TargetOpcode::G_EXTRACT: { |
2689 | Register DstReg = I.getOperand(i: 0).getReg(); |
2690 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2691 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2692 | LLT DstTy = MRI.getType(Reg: DstReg); |
2693 | (void)DstTy; |
2694 | unsigned SrcSize = SrcTy.getSizeInBits(); |
2695 | |
2696 | if (SrcTy.getSizeInBits() > 64) { |
2697 | // This should be an extract of an s128, which is like a vector extract. |
2698 | if (SrcTy.getSizeInBits() != 128) |
2699 | return false; |
2700 | // Only support extracting 64 bits from an s128 at the moment. |
2701 | if (DstTy.getSizeInBits() != 64) |
2702 | return false; |
2703 | |
2704 | unsigned Offset = I.getOperand(i: 2).getImm(); |
2705 | if (Offset % 64 != 0) |
2706 | return false; |
2707 | |
2708 | // Check we have the right regbank always. |
2709 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); |
2710 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
2711 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!" ); |
2712 | |
2713 | if (SrcRB.getID() == AArch64::GPRRegBankID) { |
2714 | auto NewI = |
2715 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) |
2716 | .addUse(SrcReg, 0, |
2717 | Offset == 0 ? AArch64::sube64 : AArch64::subo64); |
2718 | constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI, |
2719 | AArch64::GPR64RegClass, NewI->getOperand(0)); |
2720 | I.eraseFromParent(); |
2721 | return true; |
2722 | } |
2723 | |
2724 | // Emit the same code as a vector extract. |
2725 | // Offset must be a multiple of 64. |
2726 | unsigned LaneIdx = Offset / 64; |
2727 | MachineInstr * = emitExtractVectorElt( |
2728 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: 64), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB); |
2729 | if (!Extract) |
2730 | return false; |
2731 | I.eraseFromParent(); |
2732 | return true; |
2733 | } |
2734 | |
2735 | I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); |
2736 | MachineInstrBuilder(MF, I).addImm(Val: I.getOperand(i: 2).getImm() + |
2737 | Ty.getSizeInBits() - 1); |
2738 | |
2739 | if (SrcSize < 64) { |
2740 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && |
2741 | "unexpected G_EXTRACT types" ); |
2742 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2743 | } |
2744 | |
2745 | DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2746 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2747 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) |
2748 | .addReg(DstReg, 0, AArch64::sub_32); |
2749 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), |
2750 | AArch64::GPR32RegClass, MRI); |
2751 | I.getOperand(i: 0).setReg(DstReg); |
2752 | |
2753 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2754 | } |
2755 | |
2756 | case TargetOpcode::G_INSERT: { |
2757 | LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 2).getReg()); |
2758 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2759 | unsigned DstSize = DstTy.getSizeInBits(); |
2760 | // Larger inserts are vectors, same-size ones should be something else by |
2761 | // now (split up or turned into COPYs). |
2762 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) |
2763 | return false; |
2764 | |
2765 | I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); |
2766 | unsigned LSB = I.getOperand(i: 3).getImm(); |
2767 | unsigned Width = MRI.getType(Reg: I.getOperand(i: 2).getReg()).getSizeInBits(); |
2768 | I.getOperand(i: 3).setImm((DstSize - LSB) % DstSize); |
2769 | MachineInstrBuilder(MF, I).addImm(Val: Width - 1); |
2770 | |
2771 | if (DstSize < 64) { |
2772 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && |
2773 | "unexpected G_INSERT types" ); |
2774 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2775 | } |
2776 | |
2777 | Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2778 | BuildMI(MBB, I.getIterator(), I.getDebugLoc(), |
2779 | TII.get(AArch64::SUBREG_TO_REG)) |
2780 | .addDef(SrcReg) |
2781 | .addImm(0) |
2782 | .addUse(I.getOperand(2).getReg()) |
2783 | .addImm(AArch64::sub_32); |
2784 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), |
2785 | AArch64::GPR32RegClass, MRI); |
2786 | I.getOperand(i: 2).setReg(SrcReg); |
2787 | |
2788 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2789 | } |
2790 | case TargetOpcode::G_FRAME_INDEX: { |
2791 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). |
2792 | if (Ty != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2793 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty |
2794 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2795 | return false; |
2796 | } |
2797 | I.setDesc(TII.get(AArch64::ADDXri)); |
2798 | |
2799 | // MOs for a #0 shifted immediate. |
2800 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2801 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2802 | |
2803 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2804 | } |
2805 | |
2806 | case TargetOpcode::G_GLOBAL_VALUE: { |
2807 | auto GV = I.getOperand(i: 1).getGlobal(); |
2808 | if (GV->isThreadLocal()) |
2809 | return selectTLSGlobalValue(I, MRI); |
2810 | |
2811 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); |
2812 | if (OpFlags & AArch64II::MO_GOT) { |
2813 | I.setDesc(TII.get(AArch64::LOADgot)); |
2814 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2815 | } else if (TM.getCodeModel() == CodeModel::Large && |
2816 | !TM.isPositionIndependent()) { |
2817 | // Materialize the global using movz/movk instructions. |
2818 | materializeLargeCMVal(I, V: GV, OpFlags); |
2819 | I.eraseFromParent(); |
2820 | return true; |
2821 | } else if (TM.getCodeModel() == CodeModel::Tiny) { |
2822 | I.setDesc(TII.get(AArch64::ADR)); |
2823 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2824 | } else { |
2825 | I.setDesc(TII.get(AArch64::MOVaddr)); |
2826 | I.getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); |
2827 | MachineInstrBuilder MIB(MF, I); |
2828 | MIB.addGlobalAddress(GV, Offset: I.getOperand(i: 1).getOffset(), |
2829 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
2830 | } |
2831 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2832 | } |
2833 | |
2834 | case TargetOpcode::G_ZEXTLOAD: |
2835 | case TargetOpcode::G_LOAD: |
2836 | case TargetOpcode::G_STORE: { |
2837 | GLoadStore &LdSt = cast<GLoadStore>(Val&: I); |
2838 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; |
2839 | LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg()); |
2840 | |
2841 | if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2842 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy |
2843 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2844 | return false; |
2845 | } |
2846 | |
2847 | uint64_t MemSizeInBytes = LdSt.getMemSize(); |
2848 | unsigned MemSizeInBits = LdSt.getMemSizeInBits(); |
2849 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); |
2850 | |
2851 | // Need special instructions for atomics that affect ordering. |
2852 | if (Order != AtomicOrdering::NotAtomic && |
2853 | Order != AtomicOrdering::Unordered && |
2854 | Order != AtomicOrdering::Monotonic) { |
2855 | assert(!isa<GZExtLoad>(LdSt)); |
2856 | if (MemSizeInBytes > 64) |
2857 | return false; |
2858 | |
2859 | if (isa<GLoad>(Val: LdSt)) { |
2860 | static constexpr unsigned LDAPROpcodes[] = { |
2861 | AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; |
2862 | static constexpr unsigned LDAROpcodes[] = { |
2863 | AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; |
2864 | ArrayRef<unsigned> Opcodes = |
2865 | STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent |
2866 | ? LDAPROpcodes |
2867 | : LDAROpcodes; |
2868 | I.setDesc(TII.get(Opcodes[Log2_32(Value: MemSizeInBytes)])); |
2869 | } else { |
2870 | static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, |
2871 | AArch64::STLRW, AArch64::STLRX}; |
2872 | Register ValReg = LdSt.getReg(Idx: 0); |
2873 | if (MRI.getType(Reg: ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { |
2874 | // Emit a subreg copy of 32 bits. |
2875 | Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
2876 | MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {}) |
2877 | .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32); |
2878 | I.getOperand(i: 0).setReg(NewVal); |
2879 | } |
2880 | I.setDesc(TII.get(Opcodes[Log2_32(Value: MemSizeInBytes)])); |
2881 | } |
2882 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2883 | return true; |
2884 | } |
2885 | |
2886 | #ifndef NDEBUG |
2887 | const Register PtrReg = LdSt.getPointerReg(); |
2888 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); |
2889 | // Check that the pointer register is valid. |
2890 | assert(PtrRB.getID() == AArch64::GPRRegBankID && |
2891 | "Load/Store pointer operand isn't a GPR" ); |
2892 | assert(MRI.getType(PtrReg).isPointer() && |
2893 | "Load/Store pointer operand isn't a pointer" ); |
2894 | #endif |
2895 | |
2896 | const Register ValReg = LdSt.getReg(Idx: 0); |
2897 | const LLT ValTy = MRI.getType(Reg: ValReg); |
2898 | const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); |
2899 | |
2900 | // The code below doesn't support truncating stores, so we need to split it |
2901 | // again. |
2902 | if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
2903 | unsigned SubReg; |
2904 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
2905 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
2906 | if (!getSubRegForClass(RC, TRI, SubReg)) |
2907 | return false; |
2908 | |
2909 | // Generate a subreg copy. |
2910 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {}) |
2911 | .addReg(RegNo: ValReg, flags: 0, SubReg) |
2912 | .getReg(Idx: 0); |
2913 | RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI); |
2914 | LdSt.getOperand(i: 0).setReg(Copy); |
2915 | } else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
2916 | // If this is an any-extending load from the FPR bank, split it into a regular |
2917 | // load + extend. |
2918 | if (RB.getID() == AArch64::FPRRegBankID) { |
2919 | unsigned SubReg; |
2920 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
2921 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
2922 | if (!getSubRegForClass(RC, TRI, SubReg)) |
2923 | return false; |
2924 | Register OldDst = LdSt.getReg(Idx: 0); |
2925 | Register NewDst = |
2926 | MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType()); |
2927 | LdSt.getOperand(i: 0).setReg(NewDst); |
2928 | MRI.setRegBank(Reg: NewDst, RegBank: RB); |
2929 | // Generate a SUBREG_TO_REG to extend it. |
2930 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator())); |
2931 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) |
2932 | .addImm(0) |
2933 | .addUse(NewDst) |
2934 | .addImm(SubReg); |
2935 | auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB); |
2936 | RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI); |
2937 | MIB.setInstr(LdSt); |
2938 | } |
2939 | } |
2940 | |
2941 | // Helper lambda for partially selecting I. Either returns the original |
2942 | // instruction with an updated opcode, or a new instruction. |
2943 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { |
2944 | bool IsStore = isa<GStore>(Val: I); |
2945 | const unsigned NewOpc = |
2946 | selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits); |
2947 | if (NewOpc == I.getOpcode()) |
2948 | return nullptr; |
2949 | // Check if we can fold anything into the addressing mode. |
2950 | auto AddrModeFns = |
2951 | selectAddrModeIndexed(Root&: I.getOperand(i: 1), Size: MemSizeInBytes); |
2952 | if (!AddrModeFns) { |
2953 | // Can't fold anything. Use the original instruction. |
2954 | I.setDesc(TII.get(NewOpc)); |
2955 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2956 | return &I; |
2957 | } |
2958 | |
2959 | // Folded something. Create a new instruction and return it. |
2960 | auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags()); |
2961 | Register CurValReg = I.getOperand(i: 0).getReg(); |
2962 | IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); |
2963 | NewInst.cloneMemRefs(I); |
2964 | for (auto &Fn : *AddrModeFns) |
2965 | Fn(NewInst); |
2966 | I.eraseFromParent(); |
2967 | return &*NewInst; |
2968 | }; |
2969 | |
2970 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); |
2971 | if (!LoadStore) |
2972 | return false; |
2973 | |
2974 | // If we're storing a 0, use WZR/XZR. |
2975 | if (Opcode == TargetOpcode::G_STORE) { |
2976 | auto CVal = getIConstantVRegValWithLookThrough( |
2977 | VReg: LoadStore->getOperand(i: 0).getReg(), MRI); |
2978 | if (CVal && CVal->Value == 0) { |
2979 | switch (LoadStore->getOpcode()) { |
2980 | case AArch64::STRWui: |
2981 | case AArch64::STRHHui: |
2982 | case AArch64::STRBBui: |
2983 | LoadStore->getOperand(0).setReg(AArch64::WZR); |
2984 | break; |
2985 | case AArch64::STRXui: |
2986 | LoadStore->getOperand(0).setReg(AArch64::XZR); |
2987 | break; |
2988 | } |
2989 | } |
2990 | } |
2991 | |
2992 | if (IsZExtLoad) { |
2993 | // The zextload from a smaller type to i32 should be handled by the |
2994 | // importer. |
2995 | if (MRI.getType(Reg: LoadStore->getOperand(i: 0).getReg()).getSizeInBits() != 64) |
2996 | return false; |
2997 | // If we have a ZEXTLOAD then change the load's type to be a narrower reg |
2998 | // and zero_extend with SUBREG_TO_REG. |
2999 | Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
3000 | Register DstReg = LoadStore->getOperand(i: 0).getReg(); |
3001 | LoadStore->getOperand(i: 0).setReg(LdReg); |
3002 | |
3003 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator())); |
3004 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) |
3005 | .addImm(0) |
3006 | .addUse(LdReg) |
3007 | .addImm(AArch64::sub_32); |
3008 | constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); |
3009 | return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, |
3010 | MRI); |
3011 | } |
3012 | return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); |
3013 | } |
3014 | |
3015 | case TargetOpcode::G_INDEXED_ZEXTLOAD: |
3016 | case TargetOpcode::G_INDEXED_SEXTLOAD: |
3017 | return selectIndexedExtLoad(I, MRI); |
3018 | case TargetOpcode::G_INDEXED_LOAD: |
3019 | return selectIndexedLoad(I, MRI); |
3020 | case TargetOpcode::G_INDEXED_STORE: |
3021 | return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI); |
3022 | |
3023 | case TargetOpcode::G_LSHR: |
3024 | case TargetOpcode::G_ASHR: |
3025 | if (MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3026 | return selectVectorAshrLshr(I, MRI); |
3027 | [[fallthrough]]; |
3028 | case TargetOpcode::G_SHL: |
3029 | if (Opcode == TargetOpcode::G_SHL && |
3030 | MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3031 | return selectVectorSHL(I, MRI); |
3032 | |
3033 | // These shifts were legalized to have 64 bit shift amounts because we |
3034 | // want to take advantage of the selection patterns that assume the |
3035 | // immediates are s64s, however, selectBinaryOp will assume both operands |
3036 | // will have the same bit size. |
3037 | { |
3038 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3039 | Register ShiftReg = I.getOperand(i: 2).getReg(); |
3040 | const LLT ShiftTy = MRI.getType(Reg: ShiftReg); |
3041 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3042 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && |
3043 | ShiftTy.getSizeInBits() == 64) { |
3044 | assert(!ShiftTy.isVector() && "unexpected vector shift ty" ); |
3045 | // Insert a subregister copy to implement a 64->32 trunc |
3046 | auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) |
3047 | .addReg(ShiftReg, 0, AArch64::sub_32); |
3048 | MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); |
3049 | I.getOperand(i: 2).setReg(Trunc.getReg(0)); |
3050 | } |
3051 | } |
3052 | [[fallthrough]]; |
3053 | case TargetOpcode::G_OR: { |
3054 | // Reject the various things we don't support yet. |
3055 | if (unsupportedBinOp(I, RBI, MRI, TRI)) |
3056 | return false; |
3057 | |
3058 | const unsigned OpSize = Ty.getSizeInBits(); |
3059 | |
3060 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3061 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); |
3062 | |
3063 | const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize); |
3064 | if (NewOpc == I.getOpcode()) |
3065 | return false; |
3066 | |
3067 | I.setDesc(TII.get(NewOpc)); |
3068 | // FIXME: Should the type be always reset in setDesc? |
3069 | |
3070 | // Now that we selected an opcode, we need to constrain the register |
3071 | // operands to use appropriate classes. |
3072 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3073 | } |
3074 | |
3075 | case TargetOpcode::G_PTR_ADD: { |
3076 | emitADD(DefReg: I.getOperand(i: 0).getReg(), LHS&: I.getOperand(i: 1), RHS&: I.getOperand(i: 2), MIRBuilder&: MIB); |
3077 | I.eraseFromParent(); |
3078 | return true; |
3079 | } |
3080 | |
3081 | case TargetOpcode::G_SADDE: |
3082 | case TargetOpcode::G_UADDE: |
3083 | case TargetOpcode::G_SSUBE: |
3084 | case TargetOpcode::G_USUBE: |
3085 | case TargetOpcode::G_SADDO: |
3086 | case TargetOpcode::G_UADDO: |
3087 | case TargetOpcode::G_SSUBO: |
3088 | case TargetOpcode::G_USUBO: |
3089 | return selectOverflowOp(I, MRI); |
3090 | |
3091 | case TargetOpcode::G_PTRMASK: { |
3092 | Register MaskReg = I.getOperand(i: 2).getReg(); |
3093 | std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI); |
3094 | // TODO: Implement arbitrary cases |
3095 | if (!MaskVal || !isShiftedMask_64(Value: *MaskVal)) |
3096 | return false; |
3097 | |
3098 | uint64_t Mask = *MaskVal; |
3099 | I.setDesc(TII.get(AArch64::ANDXri)); |
3100 | I.getOperand(i: 2).ChangeToImmediate( |
3101 | ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: 64)); |
3102 | |
3103 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3104 | } |
3105 | case TargetOpcode::G_PTRTOINT: |
3106 | case TargetOpcode::G_TRUNC: { |
3107 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3108 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3109 | |
3110 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3111 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3112 | |
3113 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
3114 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); |
3115 | |
3116 | if (DstRB.getID() != SrcRB.getID()) { |
3117 | LLVM_DEBUG( |
3118 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ); |
3119 | return false; |
3120 | } |
3121 | |
3122 | if (DstRB.getID() == AArch64::GPRRegBankID) { |
3123 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3124 | if (!DstRC) |
3125 | return false; |
3126 | |
3127 | const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB); |
3128 | if (!SrcRC) |
3129 | return false; |
3130 | |
3131 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
3132 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
3133 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ); |
3134 | return false; |
3135 | } |
3136 | |
3137 | if (DstRC == SrcRC) { |
3138 | // Nothing to be done |
3139 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: 32) && |
3140 | SrcTy == LLT::scalar(SizeInBits: 64)) { |
3141 | llvm_unreachable("TableGen can import this case" ); |
3142 | return false; |
3143 | } else if (DstRC == &AArch64::GPR32RegClass && |
3144 | SrcRC == &AArch64::GPR64RegClass) { |
3145 | I.getOperand(1).setSubReg(AArch64::sub_32); |
3146 | } else { |
3147 | LLVM_DEBUG( |
3148 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ); |
3149 | return false; |
3150 | } |
3151 | |
3152 | I.setDesc(TII.get(TargetOpcode::COPY)); |
3153 | return true; |
3154 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { |
3155 | if (DstTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) && |
3156 | SrcTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
3157 | I.setDesc(TII.get(AArch64::XTNv4i16)); |
3158 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3159 | return true; |
3160 | } |
3161 | |
3162 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { |
3163 | MachineInstr * = emitExtractVectorElt( |
3164 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: 0, MIRBuilder&: MIB); |
3165 | if (!Extract) |
3166 | return false; |
3167 | I.eraseFromParent(); |
3168 | return true; |
3169 | } |
3170 | |
3171 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. |
3172 | if (Opcode == TargetOpcode::G_PTRTOINT) { |
3173 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ); |
3174 | I.setDesc(TII.get(TargetOpcode::COPY)); |
3175 | return selectCopy(I, TII, MRI, TRI, RBI); |
3176 | } |
3177 | } |
3178 | |
3179 | return false; |
3180 | } |
3181 | |
3182 | case TargetOpcode::G_ANYEXT: { |
3183 | if (selectUSMovFromExtend(I, MRI)) |
3184 | return true; |
3185 | |
3186 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3187 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3188 | |
3189 | const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); |
3190 | if (RBDst.getID() != AArch64::GPRRegBankID) { |
3191 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst |
3192 | << ", expected: GPR\n" ); |
3193 | return false; |
3194 | } |
3195 | |
3196 | const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); |
3197 | if (RBSrc.getID() != AArch64::GPRRegBankID) { |
3198 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc |
3199 | << ", expected: GPR\n" ); |
3200 | return false; |
3201 | } |
3202 | |
3203 | const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits(); |
3204 | |
3205 | if (DstSize == 0) { |
3206 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ); |
3207 | return false; |
3208 | } |
3209 | |
3210 | if (DstSize != 64 && DstSize > 32) { |
3211 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize |
3212 | << ", expected: 32 or 64\n" ); |
3213 | return false; |
3214 | } |
3215 | // At this point G_ANYEXT is just like a plain COPY, but we need |
3216 | // to explicitly form the 64-bit value if any. |
3217 | if (DstSize > 32) { |
3218 | Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); |
3219 | BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) |
3220 | .addDef(ExtSrc) |
3221 | .addImm(0) |
3222 | .addUse(SrcReg) |
3223 | .addImm(AArch64::sub_32); |
3224 | I.getOperand(i: 1).setReg(ExtSrc); |
3225 | } |
3226 | return selectCopy(I, TII, MRI, TRI, RBI); |
3227 | } |
3228 | |
3229 | case TargetOpcode::G_ZEXT: |
3230 | case TargetOpcode::G_SEXT_INREG: |
3231 | case TargetOpcode::G_SEXT: { |
3232 | if (selectUSMovFromExtend(I, MRI)) |
3233 | return true; |
3234 | |
3235 | unsigned Opcode = I.getOpcode(); |
3236 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; |
3237 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3238 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3239 | const LLT DstTy = MRI.getType(Reg: DefReg); |
3240 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3241 | unsigned DstSize = DstTy.getSizeInBits(); |
3242 | unsigned SrcSize = SrcTy.getSizeInBits(); |
3243 | |
3244 | // SEXT_INREG has the same src reg size as dst, the size of the value to be |
3245 | // extended is encoded in the imm. |
3246 | if (Opcode == TargetOpcode::G_SEXT_INREG) |
3247 | SrcSize = I.getOperand(i: 2).getImm(); |
3248 | |
3249 | if (DstTy.isVector()) |
3250 | return false; // Should be handled by imported patterns. |
3251 | |
3252 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == |
3253 | AArch64::GPRRegBankID && |
3254 | "Unexpected ext regbank" ); |
3255 | |
3256 | MachineInstr *ExtI; |
3257 | |
3258 | // First check if we're extending the result of a load which has a dest type |
3259 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest |
3260 | // GPR register on AArch64 and all loads which are smaller automatically |
3261 | // zero-extend the upper bits. E.g. |
3262 | // %v(s8) = G_LOAD %p, :: (load 1) |
3263 | // %v2(s32) = G_ZEXT %v(s8) |
3264 | if (!IsSigned) { |
3265 | auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI); |
3266 | bool IsGPR = |
3267 | RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; |
3268 | if (LoadMI && IsGPR) { |
3269 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); |
3270 | unsigned BytesLoaded = MemOp->getSize(); |
3271 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) |
3272 | return selectCopy(I, TII, MRI, TRI, RBI); |
3273 | } |
3274 | |
3275 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) |
3276 | // + SUBREG_TO_REG. |
3277 | if (IsGPR && SrcSize == 32 && DstSize == 64) { |
3278 | Register SubregToRegSrc = |
3279 | MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
3280 | const Register ZReg = AArch64::WZR; |
3281 | MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg}) |
3282 | .addImm(0); |
3283 | |
3284 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) |
3285 | .addImm(0) |
3286 | .addUse(SubregToRegSrc) |
3287 | .addImm(AArch64::sub_32); |
3288 | |
3289 | if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, |
3290 | MRI)) { |
3291 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n" ); |
3292 | return false; |
3293 | } |
3294 | |
3295 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, |
3296 | MRI)) { |
3297 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n" ); |
3298 | return false; |
3299 | } |
3300 | |
3301 | I.eraseFromParent(); |
3302 | return true; |
3303 | } |
3304 | } |
3305 | |
3306 | if (DstSize == 64) { |
3307 | if (Opcode != TargetOpcode::G_SEXT_INREG) { |
3308 | // FIXME: Can we avoid manually doing this? |
3309 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, |
3310 | MRI)) { |
3311 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) |
3312 | << " operand\n" ); |
3313 | return false; |
3314 | } |
3315 | SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, |
3316 | {&AArch64::GPR64RegClass}, {}) |
3317 | .addImm(0) |
3318 | .addUse(SrcReg) |
3319 | .addImm(AArch64::sub_32) |
3320 | .getReg(0); |
3321 | } |
3322 | |
3323 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, |
3324 | {DefReg}, {SrcReg}) |
3325 | .addImm(0) |
3326 | .addImm(SrcSize - 1); |
3327 | } else if (DstSize <= 32) { |
3328 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, |
3329 | {DefReg}, {SrcReg}) |
3330 | .addImm(0) |
3331 | .addImm(SrcSize - 1); |
3332 | } else { |
3333 | return false; |
3334 | } |
3335 | |
3336 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); |
3337 | I.eraseFromParent(); |
3338 | return true; |
3339 | } |
3340 | |
3341 | case TargetOpcode::G_SITOFP: |
3342 | case TargetOpcode::G_UITOFP: |
3343 | case TargetOpcode::G_FPTOSI: |
3344 | case TargetOpcode::G_FPTOUI: { |
3345 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()), |
3346 | SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3347 | const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy); |
3348 | if (NewOpc == Opcode) |
3349 | return false; |
3350 | |
3351 | I.setDesc(TII.get(NewOpc)); |
3352 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3353 | I.setFlags(MachineInstr::NoFPExcept); |
3354 | |
3355 | return true; |
3356 | } |
3357 | |
3358 | case TargetOpcode::G_FREEZE: |
3359 | return selectCopy(I, TII, MRI, TRI, RBI); |
3360 | |
3361 | case TargetOpcode::G_INTTOPTR: |
3362 | // The importer is currently unable to import pointer types since they |
3363 | // didn't exist in SelectionDAG. |
3364 | return selectCopy(I, TII, MRI, TRI, RBI); |
3365 | |
3366 | case TargetOpcode::G_BITCAST: |
3367 | // Imported SelectionDAG rules can handle every bitcast except those that |
3368 | // bitcast from a type to the same type. Ideally, these shouldn't occur |
3369 | // but we might not run an optimizer that deletes them. The other exception |
3370 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge |
3371 | // of them. |
3372 | return selectCopy(I, TII, MRI, TRI, RBI); |
3373 | |
3374 | case TargetOpcode::G_SELECT: { |
3375 | auto &Sel = cast<GSelect>(Val&: I); |
3376 | const Register CondReg = Sel.getCondReg(); |
3377 | const Register TReg = Sel.getTrueReg(); |
3378 | const Register FReg = Sel.getFalseReg(); |
3379 | |
3380 | if (tryOptSelect(Sel)) |
3381 | return true; |
3382 | |
3383 | // Make sure to use an unused vreg instead of wzr, so that the peephole |
3384 | // optimizations will be able to optimize these. |
3385 | Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
3386 | auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) |
3387 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); |
3388 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); |
3389 | if (!emitSelect(Dst: Sel.getReg(Idx: 0), True: TReg, False: FReg, CC: AArch64CC::NE, MIB)) |
3390 | return false; |
3391 | Sel.eraseFromParent(); |
3392 | return true; |
3393 | } |
3394 | case TargetOpcode::G_ICMP: { |
3395 | if (Ty.isVector()) |
3396 | return selectVectorICmp(I, MRI); |
3397 | |
3398 | if (Ty != LLT::scalar(SizeInBits: 32)) { |
3399 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty |
3400 | << ", expected: " << LLT::scalar(32) << '\n'); |
3401 | return false; |
3402 | } |
3403 | |
3404 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3405 | const AArch64CC::CondCode InvCC = |
3406 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
3407 | emitIntegerCompare(LHS&: I.getOperand(i: 2), RHS&: I.getOperand(i: 3), Predicate&: I.getOperand(i: 1), MIRBuilder&: MIB); |
3408 | emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, |
3409 | /*Src2=*/AArch64::WZR, InvCC, MIB); |
3410 | I.eraseFromParent(); |
3411 | return true; |
3412 | } |
3413 | |
3414 | case TargetOpcode::G_FCMP: { |
3415 | CmpInst::Predicate Pred = |
3416 | static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3417 | if (!emitFPCompare(LHS: I.getOperand(i: 2).getReg(), RHS: I.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
3418 | Pred) || |
3419 | !emitCSetForFCmp(Dst: I.getOperand(i: 0).getReg(), Pred, MIRBuilder&: MIB)) |
3420 | return false; |
3421 | I.eraseFromParent(); |
3422 | return true; |
3423 | } |
3424 | case TargetOpcode::G_VASTART: |
3425 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) |
3426 | : selectVaStartAAPCS(I, MF, MRI); |
3427 | case TargetOpcode::G_INTRINSIC: |
3428 | return selectIntrinsic(I, MRI); |
3429 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
3430 | return selectIntrinsicWithSideEffects(I, MRI); |
3431 | case TargetOpcode::G_IMPLICIT_DEF: { |
3432 | I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); |
3433 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3434 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3435 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
3436 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3437 | RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI); |
3438 | return true; |
3439 | } |
3440 | case TargetOpcode::G_BLOCK_ADDR: { |
3441 | if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) { |
3442 | materializeLargeCMVal(I, V: I.getOperand(i: 1).getBlockAddress(), OpFlags: 0); |
3443 | I.eraseFromParent(); |
3444 | return true; |
3445 | } else { |
3446 | I.setDesc(TII.get(AArch64::MOVaddrBA)); |
3447 | auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), |
3448 | I.getOperand(0).getReg()) |
3449 | .addBlockAddress(I.getOperand(1).getBlockAddress(), |
3450 | /* Offset */ 0, AArch64II::MO_PAGE) |
3451 | .addBlockAddress( |
3452 | I.getOperand(1).getBlockAddress(), /* Offset */ 0, |
3453 | AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3454 | I.eraseFromParent(); |
3455 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); |
3456 | } |
3457 | } |
3458 | case AArch64::G_DUP: { |
3459 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by |
3460 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is |
3461 | // difficult because at RBS we may end up pessimizing the fpr case if we |
3462 | // decided to add an anyextend to fix this. Manual selection is the most |
3463 | // robust solution for now. |
3464 | if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != |
3465 | AArch64::GPRRegBankID) |
3466 | return false; // We expect the fpr regbank case to be imported. |
3467 | LLT VecTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3468 | if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) |
3469 | I.setDesc(TII.get(AArch64::DUPv8i8gpr)); |
3470 | else if (VecTy == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) |
3471 | I.setDesc(TII.get(AArch64::DUPv16i8gpr)); |
3472 | else if (VecTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) |
3473 | I.setDesc(TII.get(AArch64::DUPv4i16gpr)); |
3474 | else if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) |
3475 | I.setDesc(TII.get(AArch64::DUPv8i16gpr)); |
3476 | else |
3477 | return false; |
3478 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3479 | } |
3480 | case TargetOpcode::G_BUILD_VECTOR: |
3481 | return selectBuildVector(I, MRI); |
3482 | case TargetOpcode::G_MERGE_VALUES: |
3483 | return selectMergeValues(I, MRI); |
3484 | case TargetOpcode::G_UNMERGE_VALUES: |
3485 | return selectUnmergeValues(I, MRI); |
3486 | case TargetOpcode::G_SHUFFLE_VECTOR: |
3487 | return selectShuffleVector(I, MRI); |
3488 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: |
3489 | return selectExtractElt(I, MRI); |
3490 | case TargetOpcode::G_INSERT_VECTOR_ELT: |
3491 | return selectInsertElt(I, MRI); |
3492 | case TargetOpcode::G_CONCAT_VECTORS: |
3493 | return selectConcatVectors(I, MRI); |
3494 | case TargetOpcode::G_JUMP_TABLE: |
3495 | return selectJumpTable(I, MRI); |
3496 | case TargetOpcode::G_MEMCPY: |
3497 | case TargetOpcode::G_MEMCPY_INLINE: |
3498 | case TargetOpcode::G_MEMMOVE: |
3499 | case TargetOpcode::G_MEMSET: |
3500 | assert(STI.hasMOPS() && "Shouldn't get here without +mops feature" ); |
3501 | return selectMOPS(I, MRI); |
3502 | } |
3503 | |
3504 | return false; |
3505 | } |
3506 | |
3507 | bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) { |
3508 | MachineIRBuilderState OldMIBState = MIB.getState(); |
3509 | bool Success = select(I); |
3510 | MIB.setState(OldMIBState); |
3511 | return Success; |
3512 | } |
3513 | |
3514 | bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, |
3515 | MachineRegisterInfo &MRI) { |
3516 | unsigned Mopcode; |
3517 | switch (GI.getOpcode()) { |
3518 | case TargetOpcode::G_MEMCPY: |
3519 | case TargetOpcode::G_MEMCPY_INLINE: |
3520 | Mopcode = AArch64::MOPSMemoryCopyPseudo; |
3521 | break; |
3522 | case TargetOpcode::G_MEMMOVE: |
3523 | Mopcode = AArch64::MOPSMemoryMovePseudo; |
3524 | break; |
3525 | case TargetOpcode::G_MEMSET: |
3526 | // For tagged memset see llvm.aarch64.mops.memset.tag |
3527 | Mopcode = AArch64::MOPSMemorySetPseudo; |
3528 | break; |
3529 | } |
3530 | |
3531 | auto &DstPtr = GI.getOperand(i: 0); |
3532 | auto &SrcOrVal = GI.getOperand(i: 1); |
3533 | auto &Size = GI.getOperand(i: 2); |
3534 | |
3535 | // Create copies of the registers that can be clobbered. |
3536 | const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg()); |
3537 | const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg()); |
3538 | const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg()); |
3539 | |
3540 | const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; |
3541 | const auto &SrcValRegClass = |
3542 | IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; |
3543 | |
3544 | // Constrain to specific registers |
3545 | RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); |
3546 | RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI); |
3547 | RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); |
3548 | |
3549 | MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr); |
3550 | MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal); |
3551 | MIB.buildCopy(Res: SizeCopy, Op: Size); |
3552 | |
3553 | // New instruction uses the copied registers because it must update them. |
3554 | // The defs are not used since they don't exist in G_MEM*. They are still |
3555 | // tied. |
3556 | // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE |
3557 | Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); |
3558 | Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3559 | if (IsSet) { |
3560 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize}, |
3561 | SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy}); |
3562 | } else { |
3563 | Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); |
3564 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize}, |
3565 | SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy}); |
3566 | } |
3567 | |
3568 | GI.eraseFromParent(); |
3569 | return true; |
3570 | } |
3571 | |
3572 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, |
3573 | MachineRegisterInfo &MRI) { |
3574 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT" ); |
3575 | Register JTAddr = I.getOperand(i: 0).getReg(); |
3576 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3577 | Register Index = I.getOperand(i: 2).getReg(); |
3578 | |
3579 | Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3580 | Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); |
3581 | |
3582 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: 4, PCRelSym: nullptr); |
3583 | auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, |
3584 | {TargetReg, ScratchReg}, {JTAddr, Index}) |
3585 | .addJumpTableIndex(JTI); |
3586 | // Save the jump table info. |
3587 | MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {}, |
3588 | SrcOps: {static_cast<int64_t>(JTI)}); |
3589 | // Build the indirect branch. |
3590 | MIB.buildInstr(AArch64::BR, {}, {TargetReg}); |
3591 | I.eraseFromParent(); |
3592 | return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); |
3593 | } |
3594 | |
3595 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, |
3596 | MachineRegisterInfo &MRI) { |
3597 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table" ); |
3598 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!" ); |
3599 | |
3600 | Register DstReg = I.getOperand(i: 0).getReg(); |
3601 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3602 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. |
3603 | auto MovMI = |
3604 | MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) |
3605 | .addJumpTableIndex(JTI, AArch64II::MO_PAGE) |
3606 | .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3607 | I.eraseFromParent(); |
3608 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); |
3609 | } |
3610 | |
3611 | bool AArch64InstructionSelector::selectTLSGlobalValue( |
3612 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3613 | if (!STI.isTargetMachO()) |
3614 | return false; |
3615 | MachineFunction &MF = *I.getParent()->getParent(); |
3616 | MF.getFrameInfo().setAdjustsStack(true); |
3617 | |
3618 | const auto &GlobalOp = I.getOperand(i: 1); |
3619 | assert(GlobalOp.getOffset() == 0 && |
3620 | "Shouldn't have an offset on TLS globals!" ); |
3621 | const GlobalValue &GV = *GlobalOp.getGlobal(); |
3622 | |
3623 | auto LoadGOT = |
3624 | MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) |
3625 | .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); |
3626 | |
3627 | auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, |
3628 | {LoadGOT.getReg(0)}) |
3629 | .addImm(0); |
3630 | |
3631 | MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); |
3632 | // TLS calls preserve all registers except those that absolutely must be |
3633 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be |
3634 | // silly). |
3635 | MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) |
3636 | .addUse(AArch64::X0, RegState::Implicit) |
3637 | .addDef(AArch64::X0, RegState::Implicit) |
3638 | .addRegMask(TRI.getTLSCallPreservedMask()); |
3639 | |
3640 | MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); |
3641 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, |
3642 | MRI); |
3643 | I.eraseFromParent(); |
3644 | return true; |
3645 | } |
3646 | |
3647 | bool AArch64InstructionSelector::selectVectorICmp( |
3648 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3649 | Register DstReg = I.getOperand(i: 0).getReg(); |
3650 | LLT DstTy = MRI.getType(Reg: DstReg); |
3651 | Register SrcReg = I.getOperand(i: 2).getReg(); |
3652 | Register Src2Reg = I.getOperand(i: 3).getReg(); |
3653 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
3654 | |
3655 | unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); |
3656 | unsigned NumElts = DstTy.getNumElements(); |
3657 | |
3658 | // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b |
3659 | // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 |
3660 | // Third index is cc opcode: |
3661 | // 0 == eq |
3662 | // 1 == ugt |
3663 | // 2 == uge |
3664 | // 3 == ult |
3665 | // 4 == ule |
3666 | // 5 == sgt |
3667 | // 6 == sge |
3668 | // 7 == slt |
3669 | // 8 == sle |
3670 | // ne is done by negating 'eq' result. |
3671 | |
3672 | // This table below assumes that for some comparisons the operands will be |
3673 | // commuted. |
3674 | // ult op == commute + ugt op |
3675 | // ule op == commute + uge op |
3676 | // slt op == commute + sgt op |
3677 | // sle op == commute + sge op |
3678 | unsigned PredIdx = 0; |
3679 | bool SwapOperands = false; |
3680 | CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(i: 1).getPredicate(); |
3681 | switch (Pred) { |
3682 | case CmpInst::ICMP_NE: |
3683 | case CmpInst::ICMP_EQ: |
3684 | PredIdx = 0; |
3685 | break; |
3686 | case CmpInst::ICMP_UGT: |
3687 | PredIdx = 1; |
3688 | break; |
3689 | case CmpInst::ICMP_UGE: |
3690 | PredIdx = 2; |
3691 | break; |
3692 | case CmpInst::ICMP_ULT: |
3693 | PredIdx = 3; |
3694 | SwapOperands = true; |
3695 | break; |
3696 | case CmpInst::ICMP_ULE: |
3697 | PredIdx = 4; |
3698 | SwapOperands = true; |
3699 | break; |
3700 | case CmpInst::ICMP_SGT: |
3701 | PredIdx = 5; |
3702 | break; |
3703 | case CmpInst::ICMP_SGE: |
3704 | PredIdx = 6; |
3705 | break; |
3706 | case CmpInst::ICMP_SLT: |
3707 | PredIdx = 7; |
3708 | SwapOperands = true; |
3709 | break; |
3710 | case CmpInst::ICMP_SLE: |
3711 | PredIdx = 8; |
3712 | SwapOperands = true; |
3713 | break; |
3714 | default: |
3715 | llvm_unreachable("Unhandled icmp predicate" ); |
3716 | return false; |
3717 | } |
3718 | |
3719 | // This table obviously should be tablegen'd when we have our GISel native |
3720 | // tablegen selector. |
3721 | |
3722 | static const unsigned OpcTable[4][4][9] = { |
3723 | { |
3724 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3725 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3726 | 0 /* invalid */}, |
3727 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3728 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3729 | 0 /* invalid */}, |
3730 | {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, |
3731 | AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, |
3732 | AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, |
3733 | {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, |
3734 | AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, |
3735 | AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} |
3736 | }, |
3737 | { |
3738 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3739 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3740 | 0 /* invalid */}, |
3741 | {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, |
3742 | AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, |
3743 | AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, |
3744 | {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, |
3745 | AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, |
3746 | AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, |
3747 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3748 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3749 | 0 /* invalid */} |
3750 | }, |
3751 | { |
3752 | {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, |
3753 | AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, |
3754 | AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, |
3755 | {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, |
3756 | AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, |
3757 | AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, |
3758 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3759 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3760 | 0 /* invalid */}, |
3761 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3762 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3763 | 0 /* invalid */} |
3764 | }, |
3765 | { |
3766 | {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, |
3767 | AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, |
3768 | AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, |
3769 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3770 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3771 | 0 /* invalid */}, |
3772 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3773 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3774 | 0 /* invalid */}, |
3775 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3776 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3777 | 0 /* invalid */} |
3778 | }, |
3779 | }; |
3780 | unsigned EltIdx = Log2_32(Value: SrcEltSize / 8); |
3781 | unsigned NumEltsIdx = Log2_32(Value: NumElts / 2); |
3782 | unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; |
3783 | if (!Opc) { |
3784 | LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode" ); |
3785 | return false; |
3786 | } |
3787 | |
3788 | const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); |
3789 | const TargetRegisterClass *SrcRC = |
3790 | getRegClassForTypeOnBank(Ty: SrcTy, RB: VecRB, GetAllRegSet: true); |
3791 | if (!SrcRC) { |
3792 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n" ); |
3793 | return false; |
3794 | } |
3795 | |
3796 | unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; |
3797 | if (SrcTy.getSizeInBits() == 128) |
3798 | NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; |
3799 | |
3800 | if (SwapOperands) |
3801 | std::swap(a&: SrcReg, b&: Src2Reg); |
3802 | |
3803 | auto Cmp = MIB.buildInstr(Opc, DstOps: {SrcRC}, SrcOps: {SrcReg, Src2Reg}); |
3804 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); |
3805 | |
3806 | // Invert if we had a 'ne' cc. |
3807 | if (NotOpc) { |
3808 | Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); |
3809 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); |
3810 | } else { |
3811 | MIB.buildCopy(Res: DstReg, Op: Cmp.getReg(0)); |
3812 | } |
3813 | RBI.constrainGenericRegister(Reg: DstReg, RC: *SrcRC, MRI); |
3814 | I.eraseFromParent(); |
3815 | return true; |
3816 | } |
3817 | |
3818 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( |
3819 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, |
3820 | MachineIRBuilder &MIRBuilder) const { |
3821 | auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {}); |
3822 | |
3823 | auto BuildFn = [&](unsigned SubregIndex) { |
3824 | auto Ins = |
3825 | MIRBuilder |
3826 | .buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar}) |
3827 | .addImm(Val: SubregIndex); |
3828 | constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); |
3829 | constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); |
3830 | return &*Ins; |
3831 | }; |
3832 | |
3833 | switch (EltSize) { |
3834 | case 8: |
3835 | return BuildFn(AArch64::bsub); |
3836 | case 16: |
3837 | return BuildFn(AArch64::hsub); |
3838 | case 32: |
3839 | return BuildFn(AArch64::ssub); |
3840 | case 64: |
3841 | return BuildFn(AArch64::dsub); |
3842 | default: |
3843 | return nullptr; |
3844 | } |
3845 | } |
3846 | |
3847 | MachineInstr * |
3848 | AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg, |
3849 | MachineIRBuilder &MIB, |
3850 | MachineRegisterInfo &MRI) const { |
3851 | LLT DstTy = MRI.getType(Reg: DstReg); |
3852 | const TargetRegisterClass *RC = |
3853 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI)); |
3854 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
3855 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
3856 | return nullptr; |
3857 | } |
3858 | unsigned SubReg = 0; |
3859 | if (!getSubRegForClass(RC, TRI, SubReg)) |
3860 | return nullptr; |
3861 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
3862 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" |
3863 | << DstTy.getSizeInBits() << "\n" ); |
3864 | return nullptr; |
3865 | } |
3866 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}) |
3867 | .addReg(RegNo: SrcReg, flags: 0, SubReg); |
3868 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
3869 | return Copy; |
3870 | } |
3871 | |
3872 | bool AArch64InstructionSelector::selectMergeValues( |
3873 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3874 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode" ); |
3875 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3876 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3877 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation" ); |
3878 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(i: 1).getReg(), MRI, TRI); |
3879 | |
3880 | if (I.getNumOperands() != 3) |
3881 | return false; |
3882 | |
3883 | // Merging 2 s64s into an s128. |
3884 | if (DstTy == LLT::scalar(SizeInBits: 128)) { |
3885 | if (SrcTy.getSizeInBits() != 64) |
3886 | return false; |
3887 | Register DstReg = I.getOperand(i: 0).getReg(); |
3888 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
3889 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
3890 | auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {}); |
3891 | MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: 0), EltReg: Src1Reg, |
3892 | /* LaneIdx */ 0, RB, MIRBuilder&: MIB); |
3893 | if (!InsMI) |
3894 | return false; |
3895 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(), |
3896 | EltReg: Src2Reg, /* LaneIdx */ 1, RB, MIRBuilder&: MIB); |
3897 | if (!Ins2MI) |
3898 | return false; |
3899 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); |
3900 | constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); |
3901 | I.eraseFromParent(); |
3902 | return true; |
3903 | } |
3904 | |
3905 | if (RB.getID() != AArch64::GPRRegBankID) |
3906 | return false; |
3907 | |
3908 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) |
3909 | return false; |
3910 | |
3911 | auto *DstRC = &AArch64::GPR64RegClass; |
3912 | Register SubToRegDef = MRI.createVirtualRegister(DstRC); |
3913 | MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), |
3914 | TII.get(TargetOpcode::SUBREG_TO_REG)) |
3915 | .addDef(SubToRegDef) |
3916 | .addImm(0) |
3917 | .addUse(I.getOperand(1).getReg()) |
3918 | .addImm(AArch64::sub_32); |
3919 | Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); |
3920 | // Need to anyext the second scalar before we can use bfm |
3921 | MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), |
3922 | TII.get(TargetOpcode::SUBREG_TO_REG)) |
3923 | .addDef(SubToRegDef2) |
3924 | .addImm(0) |
3925 | .addUse(I.getOperand(2).getReg()) |
3926 | .addImm(AArch64::sub_32); |
3927 | MachineInstr &BFM = |
3928 | *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) |
3929 | .addDef(I.getOperand(0).getReg()) |
3930 | .addUse(SubToRegDef) |
3931 | .addUse(SubToRegDef2) |
3932 | .addImm(32) |
3933 | .addImm(31); |
3934 | constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); |
3935 | constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); |
3936 | constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); |
3937 | I.eraseFromParent(); |
3938 | return true; |
3939 | } |
3940 | |
3941 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &, |
3942 | const unsigned EltSize) { |
3943 | // Choose a lane copy opcode and subregister based off of the size of the |
3944 | // vector's elements. |
3945 | switch (EltSize) { |
3946 | case 8: |
3947 | CopyOpc = AArch64::DUPi8; |
3948 | ExtractSubReg = AArch64::bsub; |
3949 | break; |
3950 | case 16: |
3951 | CopyOpc = AArch64::DUPi16; |
3952 | ExtractSubReg = AArch64::hsub; |
3953 | break; |
3954 | case 32: |
3955 | CopyOpc = AArch64::DUPi32; |
3956 | ExtractSubReg = AArch64::ssub; |
3957 | break; |
3958 | case 64: |
3959 | CopyOpc = AArch64::DUPi64; |
3960 | ExtractSubReg = AArch64::dsub; |
3961 | break; |
3962 | default: |
3963 | // Unknown size, bail out. |
3964 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n" ); |
3965 | return false; |
3966 | } |
3967 | return true; |
3968 | } |
3969 | |
3970 | MachineInstr *AArch64InstructionSelector::( |
3971 | std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, |
3972 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { |
3973 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
3974 | unsigned CopyOpc = 0; |
3975 | unsigned = 0; |
3976 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) { |
3977 | LLVM_DEBUG( |
3978 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ); |
3979 | return nullptr; |
3980 | } |
3981 | |
3982 | const TargetRegisterClass *DstRC = |
3983 | getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true); |
3984 | if (!DstRC) { |
3985 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n" ); |
3986 | return nullptr; |
3987 | } |
3988 | |
3989 | const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); |
3990 | const LLT &VecTy = MRI.getType(Reg: VecReg); |
3991 | const TargetRegisterClass *VecRC = |
3992 | getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true); |
3993 | if (!VecRC) { |
3994 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n" ); |
3995 | return nullptr; |
3996 | } |
3997 | |
3998 | // The register that we're going to copy into. |
3999 | Register InsertReg = VecReg; |
4000 | if (!DstReg) |
4001 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
4002 | // If the lane index is 0, we just use a subregister COPY. |
4003 | if (LaneIdx == 0) { |
4004 | auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {}) |
4005 | .addReg(RegNo: VecReg, flags: 0, SubReg: ExtractSubReg); |
4006 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
4007 | return &*Copy; |
4008 | } |
4009 | |
4010 | // Lane copies require 128-bit wide registers. If we're dealing with an |
4011 | // unpacked vector, then we need to move up to that width. Insert an implicit |
4012 | // def and a subregister insert to get us there. |
4013 | if (VecTy.getSizeInBits() != 128) { |
4014 | MachineInstr *ScalarToVector = emitScalarToVector( |
4015 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); |
4016 | if (!ScalarToVector) |
4017 | return nullptr; |
4018 | InsertReg = ScalarToVector->getOperand(i: 0).getReg(); |
4019 | } |
4020 | |
4021 | MachineInstr *LaneCopyMI = |
4022 | MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx); |
4023 | constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); |
4024 | |
4025 | // Make sure that we actually constrain the initial copy. |
4026 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
4027 | return LaneCopyMI; |
4028 | } |
4029 | |
4030 | bool AArch64InstructionSelector::( |
4031 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4032 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && |
4033 | "unexpected opcode!" ); |
4034 | Register DstReg = I.getOperand(i: 0).getReg(); |
4035 | const LLT NarrowTy = MRI.getType(Reg: DstReg); |
4036 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
4037 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4038 | (void)WideTy; |
4039 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && |
4040 | "source register size too small!" ); |
4041 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!" ); |
4042 | |
4043 | // Need the lane index to determine the correct copy opcode. |
4044 | MachineOperand &LaneIdxOp = I.getOperand(i: 2); |
4045 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?" ); |
4046 | |
4047 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
4048 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n" ); |
4049 | return false; |
4050 | } |
4051 | |
4052 | // Find the index to extract from. |
4053 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI); |
4054 | if (!VRegAndVal) |
4055 | return false; |
4056 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); |
4057 | |
4058 | |
4059 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
4060 | MachineInstr * = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, |
4061 | LaneIdx, MIRBuilder&: MIB); |
4062 | if (!Extract) |
4063 | return false; |
4064 | |
4065 | I.eraseFromParent(); |
4066 | return true; |
4067 | } |
4068 | |
4069 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( |
4070 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4071 | unsigned NumElts = I.getNumOperands() - 1; |
4072 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4073 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4074 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
4075 | |
4076 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors" ); |
4077 | if (SrcTy.getSizeInBits() > 128) { |
4078 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge" ); |
4079 | return false; |
4080 | } |
4081 | |
4082 | // We implement a split vector operation by treating the sub-vectors as |
4083 | // scalars and extracting them. |
4084 | const RegisterBank &DstRB = |
4085 | *RBI.getRegBank(I.getOperand(i: 0).getReg(), MRI, TRI); |
4086 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { |
4087 | Register Dst = I.getOperand(i: OpIdx).getReg(); |
4088 | MachineInstr * = |
4089 | emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB); |
4090 | if (!Extract) |
4091 | return false; |
4092 | } |
4093 | I.eraseFromParent(); |
4094 | return true; |
4095 | } |
4096 | |
4097 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, |
4098 | MachineRegisterInfo &MRI) { |
4099 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && |
4100 | "unexpected opcode" ); |
4101 | |
4102 | // TODO: Handle unmerging into GPRs and from scalars to scalars. |
4103 | if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != |
4104 | AArch64::FPRRegBankID || |
4105 | RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != |
4106 | AArch64::FPRRegBankID) { |
4107 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " |
4108 | "currently unsupported.\n" ); |
4109 | return false; |
4110 | } |
4111 | |
4112 | // The last operand is the vector source register, and every other operand is |
4113 | // a register to unpack into. |
4114 | unsigned NumElts = I.getNumOperands() - 1; |
4115 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4116 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4117 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4118 | (void)WideTy; |
4119 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && |
4120 | "can only unmerge from vector or s128 types!" ); |
4121 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && |
4122 | "source register size too small!" ); |
4123 | |
4124 | if (!NarrowTy.isScalar()) |
4125 | return selectSplitVectorUnmerge(I, MRI); |
4126 | |
4127 | // Choose a lane copy opcode and subregister based off of the size of the |
4128 | // vector's elements. |
4129 | unsigned CopyOpc = 0; |
4130 | unsigned = 0; |
4131 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits())) |
4132 | return false; |
4133 | |
4134 | // Set up for the lane copies. |
4135 | MachineBasicBlock &MBB = *I.getParent(); |
4136 | |
4137 | // Stores the registers we'll be copying from. |
4138 | SmallVector<Register, 4> InsertRegs; |
4139 | |
4140 | // We'll use the first register twice, so we only need NumElts-1 registers. |
4141 | unsigned NumInsertRegs = NumElts - 1; |
4142 | |
4143 | // If our elements fit into exactly 128 bits, then we can copy from the source |
4144 | // directly. Otherwise, we need to do a bit of setup with some subregister |
4145 | // inserts. |
4146 | if (NarrowTy.getSizeInBits() * NumElts == 128) { |
4147 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); |
4148 | } else { |
4149 | // No. We have to perform subregister inserts. For each insert, create an |
4150 | // implicit def and a subregister insert, and save the register we create. |
4151 | const TargetRegisterClass *RC = getRegClassForTypeOnBank( |
4152 | LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()), |
4153 | *RBI.getRegBank(SrcReg, MRI, TRI)); |
4154 | unsigned SubReg = 0; |
4155 | bool Found = getSubRegForClass(RC, TRI, SubReg); |
4156 | (void)Found; |
4157 | assert(Found && "expected to find last operand's subeg idx" ); |
4158 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { |
4159 | Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); |
4160 | MachineInstr &ImpDefMI = |
4161 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), |
4162 | ImpDefReg); |
4163 | |
4164 | // Now, create the subregister insert from SrcReg. |
4165 | Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); |
4166 | MachineInstr &InsMI = |
4167 | *BuildMI(MBB, I, I.getDebugLoc(), |
4168 | TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) |
4169 | .addUse(ImpDefReg) |
4170 | .addUse(SrcReg) |
4171 | .addImm(SubReg); |
4172 | |
4173 | constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); |
4174 | constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); |
4175 | |
4176 | // Save the register so that we can copy from it after. |
4177 | InsertRegs.push_back(Elt: InsertReg); |
4178 | } |
4179 | } |
4180 | |
4181 | // Now that we've created any necessary subregister inserts, we can |
4182 | // create the copies. |
4183 | // |
4184 | // Perform the first copy separately as a subregister copy. |
4185 | Register CopyTo = I.getOperand(i: 0).getReg(); |
4186 | auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {}) |
4187 | .addReg(RegNo: InsertRegs[0], flags: 0, SubReg: ExtractSubReg); |
4188 | constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); |
4189 | |
4190 | // Now, perform the remaining copies as vector lane copies. |
4191 | unsigned LaneIdx = 1; |
4192 | for (Register InsReg : InsertRegs) { |
4193 | Register CopyTo = I.getOperand(i: LaneIdx).getReg(); |
4194 | MachineInstr &CopyInst = |
4195 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) |
4196 | .addUse(InsReg) |
4197 | .addImm(LaneIdx); |
4198 | constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); |
4199 | ++LaneIdx; |
4200 | } |
4201 | |
4202 | // Separately constrain the first copy's destination. Because of the |
4203 | // limitation in constrainOperandRegClass, we can't guarantee that this will |
4204 | // actually be constrained. So, do it ourselves using the second operand. |
4205 | const TargetRegisterClass *RC = |
4206 | MRI.getRegClassOrNull(Reg: I.getOperand(i: 1).getReg()); |
4207 | if (!RC) { |
4208 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n" ); |
4209 | return false; |
4210 | } |
4211 | |
4212 | RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI); |
4213 | I.eraseFromParent(); |
4214 | return true; |
4215 | } |
4216 | |
4217 | bool AArch64InstructionSelector::selectConcatVectors( |
4218 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4219 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && |
4220 | "Unexpected opcode" ); |
4221 | Register Dst = I.getOperand(i: 0).getReg(); |
4222 | Register Op1 = I.getOperand(i: 1).getReg(); |
4223 | Register Op2 = I.getOperand(i: 2).getReg(); |
4224 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB); |
4225 | if (!ConcatMI) |
4226 | return false; |
4227 | I.eraseFromParent(); |
4228 | return true; |
4229 | } |
4230 | |
4231 | unsigned |
4232 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, |
4233 | MachineFunction &MF) const { |
4234 | Type *CPTy = CPVal->getType(); |
4235 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy); |
4236 | |
4237 | MachineConstantPool *MCP = MF.getConstantPool(); |
4238 | return MCP->getConstantPoolIndex(C: CPVal, Alignment); |
4239 | } |
4240 | |
4241 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( |
4242 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { |
4243 | const TargetRegisterClass *RC; |
4244 | unsigned Opc; |
4245 | bool IsTiny = TM.getCodeModel() == CodeModel::Tiny; |
4246 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType()); |
4247 | switch (Size) { |
4248 | case 16: |
4249 | RC = &AArch64::FPR128RegClass; |
4250 | Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui; |
4251 | break; |
4252 | case 8: |
4253 | RC = &AArch64::FPR64RegClass; |
4254 | Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui; |
4255 | break; |
4256 | case 4: |
4257 | RC = &AArch64::FPR32RegClass; |
4258 | Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui; |
4259 | break; |
4260 | case 2: |
4261 | RC = &AArch64::FPR16RegClass; |
4262 | Opc = AArch64::LDRHui; |
4263 | break; |
4264 | default: |
4265 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " |
4266 | << *CPVal->getType()); |
4267 | return nullptr; |
4268 | } |
4269 | |
4270 | MachineInstr *LoadMI = nullptr; |
4271 | auto &MF = MIRBuilder.getMF(); |
4272 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); |
4273 | if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) { |
4274 | // Use load(literal) for tiny code model. |
4275 | LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx); |
4276 | } else { |
4277 | auto Adrp = |
4278 | MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) |
4279 | .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); |
4280 | |
4281 | LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp}) |
4282 | .addConstantPoolIndex( |
4283 | CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
4284 | |
4285 | constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); |
4286 | } |
4287 | |
4288 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); |
4289 | LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo, |
4290 | f: MachineMemOperand::MOLoad, |
4291 | s: Size, base_alignment: Align(Size))); |
4292 | constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); |
4293 | return LoadMI; |
4294 | } |
4295 | |
4296 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given |
4297 | /// size and RB. |
4298 | static std::pair<unsigned, unsigned> |
4299 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { |
4300 | unsigned Opc, SubregIdx; |
4301 | if (RB.getID() == AArch64::GPRRegBankID) { |
4302 | if (EltSize == 8) { |
4303 | Opc = AArch64::INSvi8gpr; |
4304 | SubregIdx = AArch64::bsub; |
4305 | } else if (EltSize == 16) { |
4306 | Opc = AArch64::INSvi16gpr; |
4307 | SubregIdx = AArch64::ssub; |
4308 | } else if (EltSize == 32) { |
4309 | Opc = AArch64::INSvi32gpr; |
4310 | SubregIdx = AArch64::ssub; |
4311 | } else if (EltSize == 64) { |
4312 | Opc = AArch64::INSvi64gpr; |
4313 | SubregIdx = AArch64::dsub; |
4314 | } else { |
4315 | llvm_unreachable("invalid elt size!" ); |
4316 | } |
4317 | } else { |
4318 | if (EltSize == 8) { |
4319 | Opc = AArch64::INSvi8lane; |
4320 | SubregIdx = AArch64::bsub; |
4321 | } else if (EltSize == 16) { |
4322 | Opc = AArch64::INSvi16lane; |
4323 | SubregIdx = AArch64::hsub; |
4324 | } else if (EltSize == 32) { |
4325 | Opc = AArch64::INSvi32lane; |
4326 | SubregIdx = AArch64::ssub; |
4327 | } else if (EltSize == 64) { |
4328 | Opc = AArch64::INSvi64lane; |
4329 | SubregIdx = AArch64::dsub; |
4330 | } else { |
4331 | llvm_unreachable("invalid elt size!" ); |
4332 | } |
4333 | } |
4334 | return std::make_pair(x&: Opc, y&: SubregIdx); |
4335 | } |
4336 | |
4337 | MachineInstr *AArch64InstructionSelector::emitInstr( |
4338 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
4339 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, |
4340 | const ComplexRendererFns &RenderFns) const { |
4341 | assert(Opcode && "Expected an opcode?" ); |
4342 | assert(!isPreISelGenericOpcode(Opcode) && |
4343 | "Function should only be used to produce selected instructions!" ); |
4344 | auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps); |
4345 | if (RenderFns) |
4346 | for (auto &Fn : *RenderFns) |
4347 | Fn(MI); |
4348 | constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); |
4349 | return &*MI; |
4350 | } |
4351 | |
4352 | MachineInstr *AArch64InstructionSelector::emitAddSub( |
4353 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
4354 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
4355 | MachineIRBuilder &MIRBuilder) const { |
4356 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4357 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4358 | auto Ty = MRI.getType(Reg: LHS.getReg()); |
4359 | assert(!Ty.isVector() && "Expected a scalar or pointer?" ); |
4360 | unsigned Size = Ty.getSizeInBits(); |
4361 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only" ); |
4362 | bool Is32Bit = Size == 32; |
4363 | |
4364 | // INSTRri form with positive arithmetic immediate. |
4365 | if (auto Fns = selectArithImmed(Root&: RHS)) |
4366 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[0][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4367 | MIRBuilder, RenderFns: Fns); |
4368 | |
4369 | // INSTRri form with negative arithmetic immediate. |
4370 | if (auto Fns = selectNegArithImmed(Root&: RHS)) |
4371 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[3][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4372 | MIRBuilder, RenderFns: Fns); |
4373 | |
4374 | // INSTRrx form. |
4375 | if (auto Fns = selectArithExtendedRegister(Root&: RHS)) |
4376 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[4][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4377 | MIRBuilder, RenderFns: Fns); |
4378 | |
4379 | // INSTRrs form. |
4380 | if (auto Fns = selectShiftedRegister(Root&: RHS)) |
4381 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[1][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4382 | MIRBuilder, RenderFns: Fns); |
4383 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[2][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, |
4384 | MIRBuilder); |
4385 | } |
4386 | |
4387 | MachineInstr * |
4388 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, |
4389 | MachineOperand &RHS, |
4390 | MachineIRBuilder &MIRBuilder) const { |
4391 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4392 | {{AArch64::ADDXri, AArch64::ADDWri}, |
4393 | {AArch64::ADDXrs, AArch64::ADDWrs}, |
4394 | {AArch64::ADDXrr, AArch64::ADDWrr}, |
4395 | {AArch64::SUBXri, AArch64::SUBWri}, |
4396 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
4397 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder); |
4398 | } |
4399 | |
4400 | MachineInstr * |
4401 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, |
4402 | MachineOperand &RHS, |
4403 | MachineIRBuilder &MIRBuilder) const { |
4404 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4405 | {{AArch64::ADDSXri, AArch64::ADDSWri}, |
4406 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, |
4407 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, |
4408 | {AArch64::SUBSXri, AArch64::SUBSWri}, |
4409 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; |
4410 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4411 | } |
4412 | |
4413 | MachineInstr * |
4414 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, |
4415 | MachineOperand &RHS, |
4416 | MachineIRBuilder &MIRBuilder) const { |
4417 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4418 | {{AArch64::SUBSXri, AArch64::SUBSWri}, |
4419 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, |
4420 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, |
4421 | {AArch64::ADDSXri, AArch64::ADDSWri}, |
4422 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; |
4423 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4424 | } |
4425 | |
4426 | MachineInstr * |
4427 | AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, |
4428 | MachineOperand &RHS, |
4429 | MachineIRBuilder &MIRBuilder) const { |
4430 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4431 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4432 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4433 | static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr}; |
4434 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4435 | } |
4436 | |
4437 | MachineInstr * |
4438 | AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, |
4439 | MachineOperand &RHS, |
4440 | MachineIRBuilder &MIRBuilder) const { |
4441 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4442 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4443 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4444 | static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr}; |
4445 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4446 | } |
4447 | |
4448 | MachineInstr * |
4449 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
4450 | MachineIRBuilder &MIRBuilder) const { |
4451 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4452 | bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4453 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; |
4454 | return emitADDS(Dst: MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); |
4455 | } |
4456 | |
4457 | MachineInstr * |
4458 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, |
4459 | MachineIRBuilder &MIRBuilder) const { |
4460 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4461 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4462 | LLT Ty = MRI.getType(Reg: LHS.getReg()); |
4463 | unsigned RegSize = Ty.getSizeInBits(); |
4464 | bool Is32Bit = (RegSize == 32); |
4465 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, |
4466 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, |
4467 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; |
4468 | // ANDS needs a logical immediate for its immediate form. Check if we can |
4469 | // fold one in. |
4470 | if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) { |
4471 | int64_t Imm = ValAndVReg->Value.getSExtValue(); |
4472 | |
4473 | if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) { |
4474 | auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[0][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}); |
4475 | TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
4476 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); |
4477 | return &*TstMI; |
4478 | } |
4479 | } |
4480 | |
4481 | if (auto Fns = selectLogicalShiftedRegister(Root&: RHS)) |
4482 | return emitInstr(Opcode: OpcTable[1][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns); |
4483 | return emitInstr(Opcode: OpcTable[2][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder); |
4484 | } |
4485 | |
4486 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( |
4487 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
4488 | MachineIRBuilder &MIRBuilder) const { |
4489 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!" ); |
4490 | assert(Predicate.isPredicate() && "Expected predicate?" ); |
4491 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4492 | LLT CmpTy = MRI.getType(Reg: LHS.getReg()); |
4493 | assert(!CmpTy.isVector() && "Expected scalar or pointer" ); |
4494 | unsigned Size = CmpTy.getSizeInBits(); |
4495 | (void)Size; |
4496 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?" ); |
4497 | // Fold the compare into a cmn or tst if possible. |
4498 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) |
4499 | return FoldCmp; |
4500 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg()); |
4501 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); |
4502 | } |
4503 | |
4504 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( |
4505 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { |
4506 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4507 | #ifndef NDEBUG |
4508 | LLT Ty = MRI.getType(Reg: Dst); |
4509 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && |
4510 | "Expected a 32-bit scalar register?" ); |
4511 | #endif |
4512 | const Register ZReg = AArch64::WZR; |
4513 | AArch64CC::CondCode CC1, CC2; |
4514 | changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2); |
4515 | auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1); |
4516 | if (CC2 == AArch64CC::AL) |
4517 | return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, |
4518 | MIRBuilder); |
4519 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; |
4520 | Register Def1Reg = MRI.createVirtualRegister(RegClass: RC); |
4521 | Register Def2Reg = MRI.createVirtualRegister(RegClass: RC); |
4522 | auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2); |
4523 | emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, MIRBuilder); |
4524 | emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC2, MIRBuilder); |
4525 | auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); |
4526 | constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); |
4527 | return &*OrMI; |
4528 | } |
4529 | |
4530 | MachineInstr *AArch64InstructionSelector::emitFPCompare( |
4531 | Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
4532 | std::optional<CmpInst::Predicate> Pred) const { |
4533 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4534 | LLT Ty = MRI.getType(Reg: LHS); |
4535 | if (Ty.isVector()) |
4536 | return nullptr; |
4537 | unsigned OpSize = Ty.getSizeInBits(); |
4538 | assert(OpSize == 16 || OpSize == 32 || OpSize == 64); |
4539 | |
4540 | // If this is a compare against +0.0, then we don't have |
4541 | // to explicitly materialize a constant. |
4542 | const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI); |
4543 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); |
4544 | |
4545 | auto IsEqualityPred = [](CmpInst::Predicate P) { |
4546 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || |
4547 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; |
4548 | }; |
4549 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { |
4550 | // Try commutating the operands. |
4551 | const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI); |
4552 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { |
4553 | ShouldUseImm = true; |
4554 | std::swap(a&: LHS, b&: RHS); |
4555 | } |
4556 | } |
4557 | unsigned CmpOpcTbl[2][3] = { |
4558 | {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr}, |
4559 | {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}}; |
4560 | unsigned CmpOpc = |
4561 | CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)]; |
4562 | |
4563 | // Partially build the compare. Decide if we need to add a use for the |
4564 | // third operand based off whether or not we're comparing against 0.0. |
4565 | auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS); |
4566 | CmpMI.setMIFlags(MachineInstr::NoFPExcept); |
4567 | if (!ShouldUseImm) |
4568 | CmpMI.addUse(RegNo: RHS); |
4569 | constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); |
4570 | return &*CmpMI; |
4571 | } |
4572 | |
4573 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( |
4574 | std::optional<Register> Dst, Register Op1, Register Op2, |
4575 | MachineIRBuilder &MIRBuilder) const { |
4576 | // We implement a vector concat by: |
4577 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest |
4578 | // 2. Insert the upper vector into the destination's upper element |
4579 | // TODO: some of this code is common with G_BUILD_VECTOR handling. |
4580 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4581 | |
4582 | const LLT Op1Ty = MRI.getType(Reg: Op1); |
4583 | const LLT Op2Ty = MRI.getType(Reg: Op2); |
4584 | |
4585 | if (Op1Ty != Op2Ty) { |
4586 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys" ); |
4587 | return nullptr; |
4588 | } |
4589 | assert(Op1Ty.isVector() && "Expected a vector for vector concat" ); |
4590 | |
4591 | if (Op1Ty.getSizeInBits() >= 128) { |
4592 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors" ); |
4593 | return nullptr; |
4594 | } |
4595 | |
4596 | // At the moment we just support 64 bit vector concats. |
4597 | if (Op1Ty.getSizeInBits() != 64) { |
4598 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors" ); |
4599 | return nullptr; |
4600 | } |
4601 | |
4602 | const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits()); |
4603 | const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); |
4604 | const TargetRegisterClass *DstRC = |
4605 | getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: 2), RB: FPRBank); |
4606 | |
4607 | MachineInstr *WidenedOp1 = |
4608 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder); |
4609 | MachineInstr *WidenedOp2 = |
4610 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder); |
4611 | if (!WidenedOp1 || !WidenedOp2) { |
4612 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value" ); |
4613 | return nullptr; |
4614 | } |
4615 | |
4616 | // Now do the insert of the upper element. |
4617 | unsigned InsertOpc, InsSubRegIdx; |
4618 | std::tie(args&: InsertOpc, args&: InsSubRegIdx) = |
4619 | getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits()); |
4620 | |
4621 | if (!Dst) |
4622 | Dst = MRI.createVirtualRegister(RegClass: DstRC); |
4623 | auto InsElt = |
4624 | MIRBuilder |
4625 | .buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: 0).getReg()}) |
4626 | .addImm(Val: 1) /* Lane index */ |
4627 | .addUse(RegNo: WidenedOp2->getOperand(i: 0).getReg()) |
4628 | .addImm(Val: 0); |
4629 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); |
4630 | return &*InsElt; |
4631 | } |
4632 | |
4633 | MachineInstr * |
4634 | AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, |
4635 | Register Src2, AArch64CC::CondCode Pred, |
4636 | MachineIRBuilder &MIRBuilder) const { |
4637 | auto &MRI = *MIRBuilder.getMRI(); |
4638 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst); |
4639 | // If we used a register class, then this won't necessarily have an LLT. |
4640 | // Compute the size based off whether or not we have a class or bank. |
4641 | unsigned Size; |
4642 | if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) |
4643 | Size = TRI.getRegSizeInBits(*RC); |
4644 | else |
4645 | Size = MRI.getType(Reg: Dst).getSizeInBits(); |
4646 | // Some opcodes use s1. |
4647 | assert(Size <= 64 && "Expected 64 bits or less only!" ); |
4648 | static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; |
4649 | unsigned Opc = OpcTable[Size == 64]; |
4650 | auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred); |
4651 | constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); |
4652 | return &*CSINC; |
4653 | } |
4654 | |
4655 | MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I, |
4656 | Register CarryReg) { |
4657 | MachineRegisterInfo *MRI = MIB.getMRI(); |
4658 | unsigned Opcode = I.getOpcode(); |
4659 | |
4660 | // If the instruction is a SUB, we need to negate the carry, |
4661 | // because borrowing is indicated by carry-flag == 0. |
4662 | bool NeedsNegatedCarry = |
4663 | (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE); |
4664 | |
4665 | // If the previous instruction will already produce the correct carry, do not |
4666 | // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences |
4667 | // generated during legalization of wide add/sub. This optimization depends on |
4668 | // these sequences not being interrupted by other instructions. |
4669 | // We have to select the previous instruction before the carry-using |
4670 | // instruction is deleted by the calling function, otherwise the previous |
4671 | // instruction might become dead and would get deleted. |
4672 | MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg); |
4673 | if (SrcMI == I.getPrevNode()) { |
4674 | if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) { |
4675 | bool ProducesNegatedCarry = CarrySrcMI->isSub(); |
4676 | if (NeedsNegatedCarry == ProducesNegatedCarry && |
4677 | CarrySrcMI->isUnsigned() && |
4678 | CarrySrcMI->getCarryOutReg() == CarryReg && |
4679 | selectAndRestoreState(I&: *SrcMI)) |
4680 | return nullptr; |
4681 | } |
4682 | } |
4683 | |
4684 | Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass); |
4685 | |
4686 | if (NeedsNegatedCarry) { |
4687 | // (0 - Carry) sets !C in NZCV when Carry == 1 |
4688 | Register ZReg = AArch64::WZR; |
4689 | return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB); |
4690 | } |
4691 | |
4692 | // (Carry - 1) sets !C in NZCV when Carry == 0 |
4693 | auto Fns = select12BitValueWithLeftShift(Immed: 1); |
4694 | return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns); |
4695 | } |
4696 | |
4697 | bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I, |
4698 | MachineRegisterInfo &MRI) { |
4699 | auto &CarryMI = cast<GAddSubCarryOut>(Val&: I); |
4700 | |
4701 | if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) { |
4702 | // Set NZCV carry according to carry-in VReg |
4703 | emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg()); |
4704 | } |
4705 | |
4706 | // Emit the operation and get the correct condition code. |
4707 | auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(), |
4708 | LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB); |
4709 | |
4710 | Register CarryOutReg = CarryMI.getCarryOutReg(); |
4711 | |
4712 | // Don't convert carry-out to VReg if it is never used |
4713 | if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) { |
4714 | // Now, put the overflow result in the register given by the first operand |
4715 | // to the overflow op. CSINC increments the result when the predicate is |
4716 | // false, so to get the increment when it's true, we need to use the |
4717 | // inverse. In this case, we want to increment when carry is set. |
4718 | Register ZReg = AArch64::WZR; |
4719 | emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg, |
4720 | Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB); |
4721 | } |
4722 | |
4723 | I.eraseFromParent(); |
4724 | return true; |
4725 | } |
4726 | |
4727 | std::pair<MachineInstr *, AArch64CC::CondCode> |
4728 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, |
4729 | MachineOperand &LHS, |
4730 | MachineOperand &RHS, |
4731 | MachineIRBuilder &MIRBuilder) const { |
4732 | switch (Opcode) { |
4733 | default: |
4734 | llvm_unreachable("Unexpected opcode!" ); |
4735 | case TargetOpcode::G_SADDO: |
4736 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4737 | case TargetOpcode::G_UADDO: |
4738 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4739 | case TargetOpcode::G_SSUBO: |
4740 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4741 | case TargetOpcode::G_USUBO: |
4742 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4743 | case TargetOpcode::G_SADDE: |
4744 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4745 | case TargetOpcode::G_UADDE: |
4746 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4747 | case TargetOpcode::G_SSUBE: |
4748 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4749 | case TargetOpcode::G_USUBE: |
4750 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4751 | } |
4752 | } |
4753 | |
4754 | /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be |
4755 | /// expressed as a conjunction. |
4756 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by |
4757 | /// changing the conditions on the CMP tests. |
4758 | /// (this means we can call emitConjunctionRec() with |
4759 | /// Negate==true on this sub-tree) |
4760 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we |
4761 | /// cannot do the negation naturally. We are required to |
4762 | /// emit the subtree first in this case. |
4763 | /// \param WillNegate Is true if are called when the result of this |
4764 | /// subexpression must be negated. This happens when the |
4765 | /// outer expression is an OR. We can use this fact to know |
4766 | /// that we have a double negation (or (or ...) ...) that |
4767 | /// can be implemented for free. |
4768 | static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, |
4769 | bool WillNegate, MachineRegisterInfo &MRI, |
4770 | unsigned Depth = 0) { |
4771 | if (!MRI.hasOneNonDBGUse(RegNo: Val)) |
4772 | return false; |
4773 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4774 | unsigned Opcode = ValDef->getOpcode(); |
4775 | if (isa<GAnyCmp>(Val: ValDef)) { |
4776 | CanNegate = true; |
4777 | MustBeFirst = false; |
4778 | return true; |
4779 | } |
4780 | // Protect against exponential runtime and stack overflow. |
4781 | if (Depth > 6) |
4782 | return false; |
4783 | if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { |
4784 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4785 | Register O0 = ValDef->getOperand(i: 1).getReg(); |
4786 | Register O1 = ValDef->getOperand(i: 2).getReg(); |
4787 | bool CanNegateL; |
4788 | bool MustBeFirstL; |
4789 | if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4790 | return false; |
4791 | bool CanNegateR; |
4792 | bool MustBeFirstR; |
4793 | if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4794 | return false; |
4795 | |
4796 | if (MustBeFirstL && MustBeFirstR) |
4797 | return false; |
4798 | |
4799 | if (IsOR) { |
4800 | // For an OR expression we need to be able to naturally negate at least |
4801 | // one side or we cannot do the transformation at all. |
4802 | if (!CanNegateL && !CanNegateR) |
4803 | return false; |
4804 | // If we the result of the OR will be negated and we can naturally negate |
4805 | // the leaves, then this sub-tree as a whole negates naturally. |
4806 | CanNegate = WillNegate && CanNegateL && CanNegateR; |
4807 | // If we cannot naturally negate the whole sub-tree, then this must be |
4808 | // emitted first. |
4809 | MustBeFirst = !CanNegate; |
4810 | } else { |
4811 | assert(Opcode == TargetOpcode::G_AND && "Must be G_AND" ); |
4812 | // We cannot naturally negate an AND operation. |
4813 | CanNegate = false; |
4814 | MustBeFirst = MustBeFirstL || MustBeFirstR; |
4815 | } |
4816 | return true; |
4817 | } |
4818 | return false; |
4819 | } |
4820 | |
4821 | MachineInstr *AArch64InstructionSelector::emitConditionalComparison( |
4822 | Register LHS, Register RHS, CmpInst::Predicate CC, |
4823 | AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, |
4824 | MachineIRBuilder &MIB) const { |
4825 | // TODO: emit CMN as an optimization. |
4826 | auto &MRI = *MIB.getMRI(); |
4827 | LLT OpTy = MRI.getType(Reg: LHS); |
4828 | unsigned CCmpOpc; |
4829 | std::optional<ValueAndVReg> C; |
4830 | if (CmpInst::isIntPredicate(P: CC)) { |
4831 | assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); |
4832 | C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
4833 | if (C && C->Value.ult(32)) |
4834 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi; |
4835 | else |
4836 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; |
4837 | } else { |
4838 | assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 || |
4839 | OpTy.getSizeInBits() == 64); |
4840 | switch (OpTy.getSizeInBits()) { |
4841 | case 16: |
4842 | assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons" ); |
4843 | CCmpOpc = AArch64::FCCMPHrr; |
4844 | break; |
4845 | case 32: |
4846 | CCmpOpc = AArch64::FCCMPSrr; |
4847 | break; |
4848 | case 64: |
4849 | CCmpOpc = AArch64::FCCMPDrr; |
4850 | break; |
4851 | default: |
4852 | return nullptr; |
4853 | } |
4854 | } |
4855 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4856 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC); |
4857 | auto CCmp = |
4858 | MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS}); |
4859 | if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi) |
4860 | CCmp.addImm(Val: C->Value.getZExtValue()); |
4861 | else |
4862 | CCmp.addReg(RegNo: RHS); |
4863 | CCmp.addImm(Val: NZCV).addImm(Val: Predicate); |
4864 | constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); |
4865 | return &*CCmp; |
4866 | } |
4867 | |
4868 | MachineInstr *AArch64InstructionSelector::emitConjunctionRec( |
4869 | Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, |
4870 | AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { |
4871 | // We're at a tree leaf, produce a conditional comparison operation. |
4872 | auto &MRI = *MIB.getMRI(); |
4873 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4874 | unsigned Opcode = ValDef->getOpcode(); |
4875 | if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) { |
4876 | Register LHS = Cmp->getLHSReg(); |
4877 | Register RHS = Cmp->getRHSReg(); |
4878 | CmpInst::Predicate CC = Cmp->getCond(); |
4879 | if (Negate) |
4880 | CC = CmpInst::getInversePredicate(pred: CC); |
4881 | if (isa<GICmp>(Val: Cmp)) { |
4882 | OutCC = changeICMPPredToAArch64CC(P: CC); |
4883 | } else { |
4884 | // Handle special FP cases. |
4885 | AArch64CC::CondCode ; |
4886 | changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC); |
4887 | // Some floating point conditions can't be tested with a single condition |
4888 | // code. Construct an additional comparison in this case. |
4889 | if (ExtraCC != AArch64CC::AL) { |
4890 | MachineInstr *; |
4891 | if (!CCOp) |
4892 | ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC); |
4893 | else |
4894 | ExtraCmp = |
4895 | emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB); |
4896 | CCOp = ExtraCmp->getOperand(i: 0).getReg(); |
4897 | Predicate = ExtraCC; |
4898 | } |
4899 | } |
4900 | |
4901 | // Produce a normal comparison if we are first in the chain |
4902 | if (!CCOp) { |
4903 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS); |
4904 | if (isa<GICmp>(Val: Cmp)) |
4905 | return emitSUBS(Dst, LHS&: Cmp->getOperand(i: 2), RHS&: Cmp->getOperand(i: 3), MIRBuilder&: MIB); |
4906 | return emitFPCompare(LHS: Cmp->getOperand(i: 2).getReg(), |
4907 | RHS: Cmp->getOperand(i: 3).getReg(), MIRBuilder&: MIB); |
4908 | } |
4909 | // Otherwise produce a ccmp. |
4910 | return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); |
4911 | } |
4912 | assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree" ); |
4913 | |
4914 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4915 | |
4916 | Register LHS = ValDef->getOperand(i: 1).getReg(); |
4917 | bool CanNegateL; |
4918 | bool MustBeFirstL; |
4919 | bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI); |
4920 | assert(ValidL && "Valid conjunction/disjunction tree" ); |
4921 | (void)ValidL; |
4922 | |
4923 | Register RHS = ValDef->getOperand(i: 2).getReg(); |
4924 | bool CanNegateR; |
4925 | bool MustBeFirstR; |
4926 | bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI); |
4927 | assert(ValidR && "Valid conjunction/disjunction tree" ); |
4928 | (void)ValidR; |
4929 | |
4930 | // Swap sub-tree that must come first to the right side. |
4931 | if (MustBeFirstL) { |
4932 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree" ); |
4933 | std::swap(a&: LHS, b&: RHS); |
4934 | std::swap(a&: CanNegateL, b&: CanNegateR); |
4935 | std::swap(a&: MustBeFirstL, b&: MustBeFirstR); |
4936 | } |
4937 | |
4938 | bool NegateR; |
4939 | bool NegateAfterR; |
4940 | bool NegateL; |
4941 | bool NegateAfterAll; |
4942 | if (Opcode == TargetOpcode::G_OR) { |
4943 | // Swap the sub-tree that we can negate naturally to the left. |
4944 | if (!CanNegateL) { |
4945 | assert(CanNegateR && "at least one side must be negatable" ); |
4946 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree" ); |
4947 | assert(!Negate); |
4948 | std::swap(a&: LHS, b&: RHS); |
4949 | NegateR = false; |
4950 | NegateAfterR = true; |
4951 | } else { |
4952 | // Negate the left sub-tree if possible, otherwise negate the result. |
4953 | NegateR = CanNegateR; |
4954 | NegateAfterR = !CanNegateR; |
4955 | } |
4956 | NegateL = true; |
4957 | NegateAfterAll = !Negate; |
4958 | } else { |
4959 | assert(Opcode == TargetOpcode::G_AND && |
4960 | "Valid conjunction/disjunction tree" ); |
4961 | assert(!Negate && "Valid conjunction/disjunction tree" ); |
4962 | |
4963 | NegateL = false; |
4964 | NegateR = false; |
4965 | NegateAfterR = false; |
4966 | NegateAfterAll = false; |
4967 | } |
4968 | |
4969 | // Emit sub-trees. |
4970 | AArch64CC::CondCode RHSCC; |
4971 | MachineInstr *CmpR = |
4972 | emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB); |
4973 | if (NegateAfterR) |
4974 | RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC); |
4975 | MachineInstr *CmpL = emitConjunctionRec( |
4976 | Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: 0).getReg(), Predicate: RHSCC, MIB); |
4977 | if (NegateAfterAll) |
4978 | OutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4979 | return CmpL; |
4980 | } |
4981 | |
4982 | MachineInstr *AArch64InstructionSelector::emitConjunction( |
4983 | Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { |
4984 | bool DummyCanNegate; |
4985 | bool DummyMustBeFirst; |
4986 | if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false, |
4987 | MRI&: *MIB.getMRI())) |
4988 | return nullptr; |
4989 | return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register(), Predicate: AArch64CC::AL, MIB); |
4990 | } |
4991 | |
4992 | bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, |
4993 | MachineInstr &CondMI) { |
4994 | AArch64CC::CondCode AArch64CC; |
4995 | MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB); |
4996 | if (!ConjMI) |
4997 | return false; |
4998 | |
4999 | emitSelect(Dst: SelI.getReg(Idx: 0), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB); |
5000 | SelI.eraseFromParent(); |
5001 | return true; |
5002 | } |
5003 | |
5004 | bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { |
5005 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
5006 | // We want to recognize this pattern: |
5007 | // |
5008 | // $z = G_FCMP pred, $x, $y |
5009 | // ... |
5010 | // $w = G_SELECT $z, $a, $b |
5011 | // |
5012 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with |
5013 | // some copies/truncs in between.) |
5014 | // |
5015 | // If we see this, then we can emit something like this: |
5016 | // |
5017 | // fcmp $x, $y |
5018 | // fcsel $w, $a, $b, pred |
5019 | // |
5020 | // Rather than emitting both of the rather long sequences in the standard |
5021 | // G_FCMP/G_SELECT select methods. |
5022 | |
5023 | // First, check if the condition is defined by a compare. |
5024 | MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
5025 | |
5026 | // We can only fold if all of the defs have one use. |
5027 | Register CondDefReg = CondDef->getOperand(i: 0).getReg(); |
5028 | if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) { |
5029 | // Unless it's another select. |
5030 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) { |
5031 | if (CondDef == &UI) |
5032 | continue; |
5033 | if (UI.getOpcode() != TargetOpcode::G_SELECT) |
5034 | return false; |
5035 | } |
5036 | } |
5037 | |
5038 | // Is the condition defined by a compare? |
5039 | unsigned CondOpc = CondDef->getOpcode(); |
5040 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { |
5041 | if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef)) |
5042 | return true; |
5043 | return false; |
5044 | } |
5045 | |
5046 | AArch64CC::CondCode CondCode; |
5047 | if (CondOpc == TargetOpcode::G_ICMP) { |
5048 | auto Pred = |
5049 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
5050 | CondCode = changeICMPPredToAArch64CC(P: Pred); |
5051 | emitIntegerCompare(LHS&: CondDef->getOperand(i: 2), RHS&: CondDef->getOperand(i: 3), |
5052 | Predicate&: CondDef->getOperand(i: 1), MIRBuilder&: MIB); |
5053 | } else { |
5054 | // Get the condition code for the select. |
5055 | auto Pred = |
5056 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
5057 | AArch64CC::CondCode CondCode2; |
5058 | changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2); |
5059 | |
5060 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two |
5061 | // instructions to emit the comparison. |
5062 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be |
5063 | // unnecessary. |
5064 | if (CondCode2 != AArch64CC::AL) |
5065 | return false; |
5066 | |
5067 | if (!emitFPCompare(LHS: CondDef->getOperand(i: 2).getReg(), |
5068 | RHS: CondDef->getOperand(i: 3).getReg(), MIRBuilder&: MIB)) { |
5069 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n" ); |
5070 | return false; |
5071 | } |
5072 | } |
5073 | |
5074 | // Emit the select. |
5075 | emitSelect(Dst: I.getOperand(i: 0).getReg(), True: I.getOperand(i: 2).getReg(), |
5076 | False: I.getOperand(i: 3).getReg(), CC: CondCode, MIB); |
5077 | I.eraseFromParent(); |
5078 | return true; |
5079 | } |
5080 | |
5081 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( |
5082 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
5083 | MachineIRBuilder &MIRBuilder) const { |
5084 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && |
5085 | "Unexpected MachineOperand" ); |
5086 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5087 | // We want to find this sort of thing: |
5088 | // x = G_SUB 0, y |
5089 | // G_ICMP z, x |
5090 | // |
5091 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. |
5092 | // e.g: |
5093 | // |
5094 | // cmn z, y |
5095 | |
5096 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB |
5097 | MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
5098 | MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
5099 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); |
5100 | // Given this: |
5101 | // |
5102 | // x = G_SUB 0, y |
5103 | // G_ICMP x, z |
5104 | // |
5105 | // Produce this: |
5106 | // |
5107 | // cmn y, z |
5108 | if (isCMN(MaybeSub: LHSDef, Pred: P, MRI)) |
5109 | return emitCMN(LHS&: LHSDef->getOperand(i: 2), RHS, MIRBuilder); |
5110 | |
5111 | // Same idea here, but with the RHS of the compare instead: |
5112 | // |
5113 | // Given this: |
5114 | // |
5115 | // x = G_SUB 0, y |
5116 | // G_ICMP z, x |
5117 | // |
5118 | // Produce this: |
5119 | // |
5120 | // cmn z, y |
5121 | if (isCMN(MaybeSub: RHSDef, Pred: P, MRI)) |
5122 | return emitCMN(LHS, RHS&: RHSDef->getOperand(i: 2), MIRBuilder); |
5123 | |
5124 | // Given this: |
5125 | // |
5126 | // z = G_AND x, y |
5127 | // G_ICMP z, 0 |
5128 | // |
5129 | // Produce this if the compare is signed: |
5130 | // |
5131 | // tst x, y |
5132 | if (!CmpInst::isUnsigned(predicate: P) && LHSDef && |
5133 | LHSDef->getOpcode() == TargetOpcode::G_AND) { |
5134 | // Make sure that the RHS is 0. |
5135 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI); |
5136 | if (!ValAndVReg || ValAndVReg->Value != 0) |
5137 | return nullptr; |
5138 | |
5139 | return emitTST(LHS&: LHSDef->getOperand(i: 1), |
5140 | RHS&: LHSDef->getOperand(i: 2), MIRBuilder); |
5141 | } |
5142 | |
5143 | return nullptr; |
5144 | } |
5145 | |
5146 | bool AArch64InstructionSelector::selectShuffleVector( |
5147 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5148 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5149 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
5150 | const LLT Src1Ty = MRI.getType(Reg: Src1Reg); |
5151 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
5152 | const LLT Src2Ty = MRI.getType(Reg: Src2Reg); |
5153 | ArrayRef<int> Mask = I.getOperand(i: 3).getShuffleMask(); |
5154 | |
5155 | MachineBasicBlock &MBB = *I.getParent(); |
5156 | MachineFunction &MF = *MBB.getParent(); |
5157 | LLVMContext &Ctx = MF.getFunction().getContext(); |
5158 | |
5159 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if |
5160 | // it's originated from a <1 x T> type. Those should have been lowered into |
5161 | // G_BUILD_VECTOR earlier. |
5162 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { |
5163 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ); |
5164 | return false; |
5165 | } |
5166 | |
5167 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; |
5168 | |
5169 | SmallVector<Constant *, 64> CstIdxs; |
5170 | for (int Val : Mask) { |
5171 | // For now, any undef indexes we'll just assume to be 0. This should be |
5172 | // optimized in future, e.g. to select DUP etc. |
5173 | Val = Val < 0 ? 0 : Val; |
5174 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { |
5175 | unsigned Offset = Byte + Val * BytesPerElt; |
5176 | CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset)); |
5177 | } |
5178 | } |
5179 | |
5180 | // Use a constant pool to load the index vector for TBL. |
5181 | Constant *CPVal = ConstantVector::get(V: CstIdxs); |
5182 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB); |
5183 | if (!IndexLoad) { |
5184 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool" ); |
5185 | return false; |
5186 | } |
5187 | |
5188 | if (DstTy.getSizeInBits() != 128) { |
5189 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty" ); |
5190 | // This case can be done with TBL1. |
5191 | MachineInstr *Concat = |
5192 | emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB); |
5193 | if (!Concat) { |
5194 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1" ); |
5195 | return false; |
5196 | } |
5197 | |
5198 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. |
5199 | IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, |
5200 | IndexLoad->getOperand(0).getReg(), MIB); |
5201 | |
5202 | auto TBL1 = MIB.buildInstr( |
5203 | AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, |
5204 | {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); |
5205 | constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); |
5206 | |
5207 | auto Copy = |
5208 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) |
5209 | .addReg(TBL1.getReg(0), 0, AArch64::dsub); |
5210 | RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); |
5211 | I.eraseFromParent(); |
5212 | return true; |
5213 | } |
5214 | |
5215 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive |
5216 | // Q registers for regalloc. |
5217 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; |
5218 | auto RegSeq = createQTuple(Regs, MIB); |
5219 | auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, |
5220 | {RegSeq, IndexLoad->getOperand(0)}); |
5221 | constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); |
5222 | I.eraseFromParent(); |
5223 | return true; |
5224 | } |
5225 | |
5226 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( |
5227 | std::optional<Register> DstReg, Register SrcReg, Register EltReg, |
5228 | unsigned LaneIdx, const RegisterBank &RB, |
5229 | MachineIRBuilder &MIRBuilder) const { |
5230 | MachineInstr *InsElt = nullptr; |
5231 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5232 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5233 | |
5234 | // Create a register to define with the insert if one wasn't passed in. |
5235 | if (!DstReg) |
5236 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
5237 | |
5238 | unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits(); |
5239 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; |
5240 | |
5241 | if (RB.getID() == AArch64::FPRRegBankID) { |
5242 | auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder); |
5243 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5244 | .addImm(Val: LaneIdx) |
5245 | .addUse(RegNo: InsSub->getOperand(i: 0).getReg()) |
5246 | .addImm(Val: 0); |
5247 | } else { |
5248 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5249 | .addImm(Val: LaneIdx) |
5250 | .addUse(RegNo: EltReg); |
5251 | } |
5252 | |
5253 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); |
5254 | return InsElt; |
5255 | } |
5256 | |
5257 | bool AArch64InstructionSelector::selectUSMovFromExtend( |
5258 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5259 | if (MI.getOpcode() != TargetOpcode::G_SEXT && |
5260 | MI.getOpcode() != TargetOpcode::G_ZEXT && |
5261 | MI.getOpcode() != TargetOpcode::G_ANYEXT) |
5262 | return false; |
5263 | bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; |
5264 | const Register DefReg = MI.getOperand(i: 0).getReg(); |
5265 | const LLT DstTy = MRI.getType(Reg: DefReg); |
5266 | unsigned DstSize = DstTy.getSizeInBits(); |
5267 | |
5268 | if (DstSize != 32 && DstSize != 64) |
5269 | return false; |
5270 | |
5271 | MachineInstr * = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT, |
5272 | Reg: MI.getOperand(i: 1).getReg(), MRI); |
5273 | int64_t Lane; |
5274 | if (!Extract || !mi_match(R: Extract->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Lane))) |
5275 | return false; |
5276 | Register Src0 = Extract->getOperand(i: 1).getReg(); |
5277 | |
5278 | const LLT &VecTy = MRI.getType(Reg: Src0); |
5279 | |
5280 | if (VecTy.getSizeInBits() != 128) { |
5281 | const MachineInstr *ScalarToVector = emitScalarToVector( |
5282 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); |
5283 | assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!" ); |
5284 | Src0 = ScalarToVector->getOperand(i: 0).getReg(); |
5285 | } |
5286 | |
5287 | unsigned Opcode; |
5288 | if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) |
5289 | Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; |
5290 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) |
5291 | Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; |
5292 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) |
5293 | Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; |
5294 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) |
5295 | Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; |
5296 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) |
5297 | Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; |
5298 | else |
5299 | llvm_unreachable("Unexpected type combo for S/UMov!" ); |
5300 | |
5301 | // We may need to generate one of these, depending on the type and sign of the |
5302 | // input: |
5303 | // DstReg = SMOV Src0, Lane; |
5304 | // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; |
5305 | MachineInstr *ExtI = nullptr; |
5306 | if (DstSize == 64 && !IsSigned) { |
5307 | Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
5308 | MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5309 | ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) |
5310 | .addImm(0) |
5311 | .addUse(NewReg) |
5312 | .addImm(AArch64::sub_32); |
5313 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); |
5314 | } else |
5315 | ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5316 | |
5317 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); |
5318 | MI.eraseFromParent(); |
5319 | return true; |
5320 | } |
5321 | |
5322 | bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, |
5323 | MachineRegisterInfo &MRI) { |
5324 | assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); |
5325 | |
5326 | // Get information on the destination. |
5327 | Register DstReg = I.getOperand(i: 0).getReg(); |
5328 | const LLT DstTy = MRI.getType(Reg: DstReg); |
5329 | unsigned VecSize = DstTy.getSizeInBits(); |
5330 | |
5331 | // Get information on the element we want to insert into the destination. |
5332 | Register EltReg = I.getOperand(i: 2).getReg(); |
5333 | const LLT EltTy = MRI.getType(Reg: EltReg); |
5334 | unsigned EltSize = EltTy.getSizeInBits(); |
5335 | if (EltSize < 8 || EltSize > 64) |
5336 | return false; |
5337 | |
5338 | // Find the definition of the index. Bail out if it's not defined by a |
5339 | // G_CONSTANT. |
5340 | Register IdxReg = I.getOperand(i: 3).getReg(); |
5341 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: IdxReg, MRI); |
5342 | if (!VRegAndVal) |
5343 | return false; |
5344 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); |
5345 | |
5346 | // Perform the lane insert. |
5347 | Register SrcReg = I.getOperand(i: 1).getReg(); |
5348 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); |
5349 | |
5350 | if (VecSize < 128) { |
5351 | // If the vector we're inserting into is smaller than 128 bits, widen it |
5352 | // to 128 to do the insert. |
5353 | MachineInstr *ScalarToVec = |
5354 | emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); |
5355 | if (!ScalarToVec) |
5356 | return false; |
5357 | SrcReg = ScalarToVec->getOperand(i: 0).getReg(); |
5358 | } |
5359 | |
5360 | // Create an insert into a new FPR128 register. |
5361 | // Note that if our vector is already 128 bits, we end up emitting an extra |
5362 | // register. |
5363 | MachineInstr *InsMI = |
5364 | emitLaneInsert(DstReg: std::nullopt, SrcReg, EltReg, LaneIdx, RB: EltRB, MIRBuilder&: MIB); |
5365 | |
5366 | if (VecSize < 128) { |
5367 | // If we had to widen to perform the insert, then we have to demote back to |
5368 | // the original size to get the result we want. |
5369 | if (!emitNarrowVector(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(), MIB, MRI)) |
5370 | return false; |
5371 | } else { |
5372 | // No widening needed. |
5373 | InsMI->getOperand(i: 0).setReg(DstReg); |
5374 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); |
5375 | } |
5376 | |
5377 | I.eraseFromParent(); |
5378 | return true; |
5379 | } |
5380 | |
5381 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8( |
5382 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5383 | unsigned int Op; |
5384 | if (DstSize == 128) { |
5385 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5386 | return nullptr; |
5387 | Op = AArch64::MOVIv16b_ns; |
5388 | } else { |
5389 | Op = AArch64::MOVIv8b_ns; |
5390 | } |
5391 | |
5392 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5393 | |
5394 | if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) { |
5395 | Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val); |
5396 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5397 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5398 | return &*Mov; |
5399 | } |
5400 | return nullptr; |
5401 | } |
5402 | |
5403 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16( |
5404 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5405 | bool Inv) { |
5406 | |
5407 | unsigned int Op; |
5408 | if (DstSize == 128) { |
5409 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5410 | return nullptr; |
5411 | Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16; |
5412 | } else { |
5413 | Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16; |
5414 | } |
5415 | |
5416 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5417 | uint64_t Shift; |
5418 | |
5419 | if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) { |
5420 | Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val); |
5421 | Shift = 0; |
5422 | } else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) { |
5423 | Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val); |
5424 | Shift = 8; |
5425 | } else |
5426 | return nullptr; |
5427 | |
5428 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5429 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5430 | return &*Mov; |
5431 | } |
5432 | |
5433 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32( |
5434 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5435 | bool Inv) { |
5436 | |
5437 | unsigned int Op; |
5438 | if (DstSize == 128) { |
5439 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5440 | return nullptr; |
5441 | Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32; |
5442 | } else { |
5443 | Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32; |
5444 | } |
5445 | |
5446 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5447 | uint64_t Shift; |
5448 | |
5449 | if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) { |
5450 | Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val); |
5451 | Shift = 0; |
5452 | } else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) { |
5453 | Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val); |
5454 | Shift = 8; |
5455 | } else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) { |
5456 | Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val); |
5457 | Shift = 16; |
5458 | } else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) { |
5459 | Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val); |
5460 | Shift = 24; |
5461 | } else |
5462 | return nullptr; |
5463 | |
5464 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5465 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5466 | return &*Mov; |
5467 | } |
5468 | |
5469 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64( |
5470 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5471 | |
5472 | unsigned int Op; |
5473 | if (DstSize == 128) { |
5474 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5475 | return nullptr; |
5476 | Op = AArch64::MOVIv2d_ns; |
5477 | } else { |
5478 | Op = AArch64::MOVID; |
5479 | } |
5480 | |
5481 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5482 | if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) { |
5483 | Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val); |
5484 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5485 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5486 | return &*Mov; |
5487 | } |
5488 | return nullptr; |
5489 | } |
5490 | |
5491 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s( |
5492 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5493 | bool Inv) { |
5494 | |
5495 | unsigned int Op; |
5496 | if (DstSize == 128) { |
5497 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5498 | return nullptr; |
5499 | Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl; |
5500 | } else { |
5501 | Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl; |
5502 | } |
5503 | |
5504 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5505 | uint64_t Shift; |
5506 | |
5507 | if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) { |
5508 | Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val); |
5509 | Shift = 264; |
5510 | } else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) { |
5511 | Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val); |
5512 | Shift = 272; |
5513 | } else |
5514 | return nullptr; |
5515 | |
5516 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5517 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5518 | return &*Mov; |
5519 | } |
5520 | |
5521 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP( |
5522 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5523 | |
5524 | unsigned int Op; |
5525 | bool IsWide = false; |
5526 | if (DstSize == 128) { |
5527 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5528 | return nullptr; |
5529 | Op = AArch64::FMOVv4f32_ns; |
5530 | IsWide = true; |
5531 | } else { |
5532 | Op = AArch64::FMOVv2f32_ns; |
5533 | } |
5534 | |
5535 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5536 | |
5537 | if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) { |
5538 | Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val); |
5539 | } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) { |
5540 | Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val); |
5541 | Op = AArch64::FMOVv2f64_ns; |
5542 | } else |
5543 | return nullptr; |
5544 | |
5545 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5546 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5547 | return &*Mov; |
5548 | } |
5549 | |
5550 | bool AArch64InstructionSelector::selectIndexedExtLoad( |
5551 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5552 | auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI); |
5553 | Register Dst = ExtLd.getDstReg(); |
5554 | Register WriteBack = ExtLd.getWritebackReg(); |
5555 | Register Base = ExtLd.getBaseReg(); |
5556 | Register Offset = ExtLd.getOffsetReg(); |
5557 | LLT Ty = MRI.getType(Reg: Dst); |
5558 | assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs. |
5559 | unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits(); |
5560 | bool IsPre = ExtLd.isPre(); |
5561 | bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd); |
5562 | bool InsertIntoXReg = false; |
5563 | bool IsDst64 = Ty.getSizeInBits() == 64; |
5564 | |
5565 | unsigned Opc = 0; |
5566 | LLT NewLdDstTy; |
5567 | LLT s32 = LLT::scalar(SizeInBits: 32); |
5568 | LLT s64 = LLT::scalar(SizeInBits: 64); |
5569 | |
5570 | if (MemSizeBits == 8) { |
5571 | if (IsSExt) { |
5572 | if (IsDst64) |
5573 | Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
5574 | else |
5575 | Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
5576 | NewLdDstTy = IsDst64 ? s64 : s32; |
5577 | } else { |
5578 | Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
5579 | InsertIntoXReg = IsDst64; |
5580 | NewLdDstTy = s32; |
5581 | } |
5582 | } else if (MemSizeBits == 16) { |
5583 | if (IsSExt) { |
5584 | if (IsDst64) |
5585 | Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
5586 | else |
5587 | Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
5588 | NewLdDstTy = IsDst64 ? s64 : s32; |
5589 | } else { |
5590 | Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
5591 | InsertIntoXReg = IsDst64; |
5592 | NewLdDstTy = s32; |
5593 | } |
5594 | } else if (MemSizeBits == 32) { |
5595 | if (IsSExt) { |
5596 | Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
5597 | NewLdDstTy = s64; |
5598 | } else { |
5599 | Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
5600 | InsertIntoXReg = IsDst64; |
5601 | NewLdDstTy = s32; |
5602 | } |
5603 | } else { |
5604 | llvm_unreachable("Unexpected size for indexed load" ); |
5605 | } |
5606 | |
5607 | if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5608 | return false; // We should be on gpr. |
5609 | |
5610 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5611 | if (!Cst) |
5612 | return false; // Shouldn't happen, but just in case. |
5613 | |
5614 | auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base}) |
5615 | .addImm(Val: Cst->getSExtValue()); |
5616 | LdMI.cloneMemRefs(OtherMI: ExtLd); |
5617 | constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI); |
5618 | // Make sure to select the load with the MemTy as the dest type, and then |
5619 | // insert into X reg if needed. |
5620 | if (InsertIntoXReg) { |
5621 | // Generate a SUBREG_TO_REG. |
5622 | auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {}) |
5623 | .addImm(0) |
5624 | .addUse(LdMI.getReg(1)) |
5625 | .addImm(AArch64::sub_32); |
5626 | RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass, |
5627 | MRI); |
5628 | } else { |
5629 | auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: 1)); |
5630 | selectCopy(*Copy, TII, MRI, TRI, RBI); |
5631 | } |
5632 | MI.eraseFromParent(); |
5633 | |
5634 | return true; |
5635 | } |
5636 | |
5637 | bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI, |
5638 | MachineRegisterInfo &MRI) { |
5639 | auto &Ld = cast<GIndexedLoad>(Val&: MI); |
5640 | Register Dst = Ld.getDstReg(); |
5641 | Register WriteBack = Ld.getWritebackReg(); |
5642 | Register Base = Ld.getBaseReg(); |
5643 | Register Offset = Ld.getOffsetReg(); |
5644 | assert(MRI.getType(Dst).getSizeInBits() <= 128 && |
5645 | "Unexpected type for indexed load" ); |
5646 | unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes(); |
5647 | |
5648 | if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes()) |
5649 | return selectIndexedExtLoad(MI, MRI); |
5650 | |
5651 | unsigned Opc = 0; |
5652 | if (Ld.isPre()) { |
5653 | static constexpr unsigned GPROpcodes[] = { |
5654 | AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre, |
5655 | AArch64::LDRXpre}; |
5656 | static constexpr unsigned FPROpcodes[] = { |
5657 | AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre, |
5658 | AArch64::LDRQpre}; |
5659 | if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5660 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5661 | else |
5662 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5663 | } else { |
5664 | static constexpr unsigned GPROpcodes[] = { |
5665 | AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost, |
5666 | AArch64::LDRXpost}; |
5667 | static constexpr unsigned FPROpcodes[] = { |
5668 | AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost, |
5669 | AArch64::LDRDpost, AArch64::LDRQpost}; |
5670 | if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5671 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5672 | else |
5673 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5674 | } |
5675 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5676 | if (!Cst) |
5677 | return false; // Shouldn't happen, but just in case. |
5678 | auto LdMI = |
5679 | MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst->getSExtValue()); |
5680 | LdMI.cloneMemRefs(OtherMI: Ld); |
5681 | constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI); |
5682 | MI.eraseFromParent(); |
5683 | return true; |
5684 | } |
5685 | |
5686 | bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I, |
5687 | MachineRegisterInfo &MRI) { |
5688 | Register Dst = I.getWritebackReg(); |
5689 | Register Val = I.getValueReg(); |
5690 | Register Base = I.getBaseReg(); |
5691 | Register Offset = I.getOffsetReg(); |
5692 | LLT ValTy = MRI.getType(Reg: Val); |
5693 | assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store" ); |
5694 | |
5695 | unsigned Opc = 0; |
5696 | if (I.isPre()) { |
5697 | static constexpr unsigned GPROpcodes[] = { |
5698 | AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre, |
5699 | AArch64::STRXpre}; |
5700 | static constexpr unsigned FPROpcodes[] = { |
5701 | AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre, |
5702 | AArch64::STRQpre}; |
5703 | |
5704 | if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5705 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5706 | else |
5707 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5708 | } else { |
5709 | static constexpr unsigned GPROpcodes[] = { |
5710 | AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost, |
5711 | AArch64::STRXpost}; |
5712 | static constexpr unsigned FPROpcodes[] = { |
5713 | AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost, |
5714 | AArch64::STRDpost, AArch64::STRQpost}; |
5715 | |
5716 | if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5717 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5718 | else |
5719 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5720 | } |
5721 | |
5722 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5723 | if (!Cst) |
5724 | return false; // Shouldn't happen, but just in case. |
5725 | auto Str = |
5726 | MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst->getSExtValue()); |
5727 | Str.cloneMemRefs(OtherMI: I); |
5728 | constrainSelectedInstRegOperands(*Str, TII, TRI, RBI); |
5729 | I.eraseFromParent(); |
5730 | return true; |
5731 | } |
5732 | |
5733 | MachineInstr * |
5734 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, |
5735 | MachineIRBuilder &MIRBuilder, |
5736 | MachineRegisterInfo &MRI) { |
5737 | LLT DstTy = MRI.getType(Reg: Dst); |
5738 | unsigned DstSize = DstTy.getSizeInBits(); |
5739 | if (CV->isNullValue()) { |
5740 | if (DstSize == 128) { |
5741 | auto Mov = |
5742 | MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); |
5743 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5744 | return &*Mov; |
5745 | } |
5746 | |
5747 | if (DstSize == 64) { |
5748 | auto Mov = |
5749 | MIRBuilder |
5750 | .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) |
5751 | .addImm(0); |
5752 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) |
5753 | .addReg(Mov.getReg(0), 0, AArch64::dsub); |
5754 | RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); |
5755 | return &*Copy; |
5756 | } |
5757 | } |
5758 | |
5759 | if (CV->getSplatValue()) { |
5760 | APInt DefBits = APInt::getSplat(NewLen: DstSize, V: CV->getUniqueInteger()); |
5761 | MachineInstr *NewOp; |
5762 | bool Inv = false; |
5763 | if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5764 | (NewOp = tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5765 | (NewOp = |
5766 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5767 | (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5768 | (NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5769 | (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder))) |
5770 | return NewOp; |
5771 | |
5772 | DefBits = ~DefBits; |
5773 | Inv = true; |
5774 | if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5775 | (NewOp = |
5776 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5777 | (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv))) |
5778 | return NewOp; |
5779 | } |
5780 | |
5781 | auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder); |
5782 | if (!CPLoad) { |
5783 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!" ); |
5784 | return nullptr; |
5785 | } |
5786 | |
5787 | auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: 0)); |
5788 | RBI.constrainGenericRegister( |
5789 | Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: 0).getReg()), MRI); |
5790 | return &*Copy; |
5791 | } |
5792 | |
5793 | bool AArch64InstructionSelector::tryOptConstantBuildVec( |
5794 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { |
5795 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5796 | unsigned DstSize = DstTy.getSizeInBits(); |
5797 | assert(DstSize <= 128 && "Unexpected build_vec type!" ); |
5798 | if (DstSize < 32) |
5799 | return false; |
5800 | // Check if we're building a constant vector, in which case we want to |
5801 | // generate a constant pool load instead of a vector insert sequence. |
5802 | SmallVector<Constant *, 16> Csts; |
5803 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { |
5804 | // Try to find G_CONSTANT or G_FCONSTANT |
5805 | auto *OpMI = |
5806 | getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI); |
5807 | if (OpMI) |
5808 | Csts.emplace_back( |
5809 | Args: const_cast<ConstantInt *>(OpMI->getOperand(i: 1).getCImm())); |
5810 | else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT, |
5811 | Reg: I.getOperand(i: Idx).getReg(), MRI))) |
5812 | Csts.emplace_back( |
5813 | Args: const_cast<ConstantFP *>(OpMI->getOperand(i: 1).getFPImm())); |
5814 | else |
5815 | return false; |
5816 | } |
5817 | Constant *CV = ConstantVector::get(V: Csts); |
5818 | if (!emitConstantVector(Dst: I.getOperand(i: 0).getReg(), CV, MIRBuilder&: MIB, MRI)) |
5819 | return false; |
5820 | I.eraseFromParent(); |
5821 | return true; |
5822 | } |
5823 | |
5824 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( |
5825 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5826 | // Given: |
5827 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef |
5828 | // |
5829 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. |
5830 | Register Dst = I.getOperand(i: 0).getReg(); |
5831 | Register EltReg = I.getOperand(i: 1).getReg(); |
5832 | LLT EltTy = MRI.getType(Reg: EltReg); |
5833 | // If the index isn't on the same bank as its elements, then this can't be a |
5834 | // SUBREG_TO_REG. |
5835 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); |
5836 | const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); |
5837 | if (EltRB != DstRB) |
5838 | return false; |
5839 | if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: 2), P: [&MRI](const MachineOperand &Op) { |
5840 | return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI); |
5841 | })) |
5842 | return false; |
5843 | unsigned SubReg; |
5844 | const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB); |
5845 | if (!EltRC) |
5846 | return false; |
5847 | const TargetRegisterClass *DstRC = |
5848 | getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB); |
5849 | if (!DstRC) |
5850 | return false; |
5851 | if (!getSubRegForClass(EltRC, TRI, SubReg)) |
5852 | return false; |
5853 | auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) |
5854 | .addImm(0) |
5855 | .addUse(EltReg) |
5856 | .addImm(SubReg); |
5857 | I.eraseFromParent(); |
5858 | constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); |
5859 | return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI); |
5860 | } |
5861 | |
5862 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, |
5863 | MachineRegisterInfo &MRI) { |
5864 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5865 | // Until we port more of the optimized selections, for now just use a vector |
5866 | // insert sequence. |
5867 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5868 | const LLT EltTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
5869 | unsigned EltSize = EltTy.getSizeInBits(); |
5870 | |
5871 | if (tryOptConstantBuildVec(I, DstTy, MRI)) |
5872 | return true; |
5873 | if (tryOptBuildVecToSubregToReg(I, MRI)) |
5874 | return true; |
5875 | |
5876 | if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) |
5877 | return false; // Don't support all element types yet. |
5878 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(i: 1).getReg(), MRI, TRI); |
5879 | |
5880 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5881 | MachineInstr *ScalarToVec = |
5882 | emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC, |
5883 | Scalar: I.getOperand(i: 1).getReg(), MIRBuilder&: MIB); |
5884 | if (!ScalarToVec) |
5885 | return false; |
5886 | |
5887 | Register DstVec = ScalarToVec->getOperand(i: 0).getReg(); |
5888 | unsigned DstSize = DstTy.getSizeInBits(); |
5889 | |
5890 | // Keep track of the last MI we inserted. Later on, we might be able to save |
5891 | // a copy using it. |
5892 | MachineInstr *PrevMI = nullptr; |
5893 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { |
5894 | // Note that if we don't do a subregister copy, we can end up making an |
5895 | // extra register. |
5896 | PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: I.getOperand(i).getReg(), |
5897 | LaneIdx: i - 1, RB, MIRBuilder&: MIB); |
5898 | DstVec = PrevMI->getOperand(i: 0).getReg(); |
5899 | } |
5900 | |
5901 | // If DstTy's size in bits is less than 128, then emit a subregister copy |
5902 | // from DstVec to the last register we've defined. |
5903 | if (DstSize < 128) { |
5904 | // Force this to be FPR using the destination vector. |
5905 | const TargetRegisterClass *RC = |
5906 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI)); |
5907 | if (!RC) |
5908 | return false; |
5909 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
5910 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
5911 | return false; |
5912 | } |
5913 | |
5914 | unsigned SubReg = 0; |
5915 | if (!getSubRegForClass(RC, TRI, SubReg)) |
5916 | return false; |
5917 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
5918 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize |
5919 | << "\n" ); |
5920 | return false; |
5921 | } |
5922 | |
5923 | Register Reg = MRI.createVirtualRegister(RegClass: RC); |
5924 | Register DstReg = I.getOperand(i: 0).getReg(); |
5925 | |
5926 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: 0, SubReg); |
5927 | MachineOperand &RegOp = I.getOperand(i: 1); |
5928 | RegOp.setReg(Reg); |
5929 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
5930 | } else { |
5931 | // We don't need a subregister copy. Save a copy by re-using the |
5932 | // destination register on the final insert. |
5933 | assert(PrevMI && "PrevMI was null?" ); |
5934 | PrevMI->getOperand(i: 0).setReg(I.getOperand(i: 0).getReg()); |
5935 | constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); |
5936 | } |
5937 | |
5938 | I.eraseFromParent(); |
5939 | return true; |
5940 | } |
5941 | |
5942 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, |
5943 | unsigned NumVecs, |
5944 | MachineInstr &I) { |
5945 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
5946 | assert(Opc && "Expected an opcode?" ); |
5947 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
5948 | auto &MRI = *MIB.getMRI(); |
5949 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5950 | unsigned Size = Ty.getSizeInBits(); |
5951 | assert((Size == 64 || Size == 128) && |
5952 | "Destination must be 64 bits or 128 bits?" ); |
5953 | unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; |
5954 | auto Ptr = I.getOperand(i: I.getNumOperands() - 1).getReg(); |
5955 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?" ); |
5956 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr}); |
5957 | Load.cloneMemRefs(OtherMI: I); |
5958 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); |
5959 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
5960 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
5961 | auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {}) |
5962 | .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx); |
5963 | // Emit the subreg copies and immediately select them. |
5964 | // FIXME: We should refactor our copy code into an emitCopy helper and |
5965 | // clean up uses of this pattern elsewhere in the selector. |
5966 | selectCopy(*Vec, TII, MRI, TRI, RBI); |
5967 | } |
5968 | return true; |
5969 | } |
5970 | |
5971 | bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic( |
5972 | unsigned Opc, unsigned NumVecs, MachineInstr &I) { |
5973 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
5974 | assert(Opc && "Expected an opcode?" ); |
5975 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
5976 | auto &MRI = *MIB.getMRI(); |
5977 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5978 | bool Narrow = Ty.getSizeInBits() == 64; |
5979 | |
5980 | auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1; |
5981 | SmallVector<Register, 4> Regs(NumVecs); |
5982 | std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(), |
5983 | unary_op: [](auto MO) { return MO.getReg(); }); |
5984 | |
5985 | if (Narrow) { |
5986 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
5987 | return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB) |
5988 | ->getOperand(0) |
5989 | .getReg(); |
5990 | }); |
5991 | Ty = Ty.multiplyElements(Factor: 2); |
5992 | } |
5993 | |
5994 | Register Tuple = createQTuple(Regs, MIB); |
5995 | auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI); |
5996 | if (!LaneNo) |
5997 | return false; |
5998 | |
5999 | Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg(); |
6000 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {}) |
6001 | .addReg(RegNo: Tuple) |
6002 | .addImm(Val: LaneNo->getZExtValue()) |
6003 | .addReg(RegNo: Ptr); |
6004 | Load.cloneMemRefs(OtherMI: I); |
6005 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); |
6006 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
6007 | unsigned SubReg = AArch64::qsub0; |
6008 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
6009 | auto Vec = MIB.buildInstr(TargetOpcode::COPY, |
6010 | {Narrow ? DstOp(&AArch64::FPR128RegClass) |
6011 | : DstOp(I.getOperand(Idx).getReg())}, |
6012 | {}) |
6013 | .addReg(SelectedLoadDst, 0, SubReg + Idx); |
6014 | Register WideReg = Vec.getReg(0); |
6015 | // Emit the subreg copies and immediately select them. |
6016 | selectCopy(*Vec, TII, MRI, TRI, RBI); |
6017 | if (Narrow && |
6018 | !emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI)) |
6019 | return false; |
6020 | } |
6021 | return true; |
6022 | } |
6023 | |
6024 | void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I, |
6025 | unsigned NumVecs, |
6026 | unsigned Opc) { |
6027 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
6028 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6029 | Register Ptr = I.getOperand(i: 1 + NumVecs).getReg(); |
6030 | |
6031 | SmallVector<Register, 2> Regs(NumVecs); |
6032 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
6033 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
6034 | |
6035 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) |
6036 | : createDTuple(Regs, MIB); |
6037 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr}); |
6038 | Store.cloneMemRefs(OtherMI: I); |
6039 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); |
6040 | } |
6041 | |
6042 | bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic( |
6043 | MachineInstr &I, unsigned NumVecs, unsigned Opc) { |
6044 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
6045 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6046 | bool Narrow = Ty.getSizeInBits() == 64; |
6047 | |
6048 | SmallVector<Register, 2> Regs(NumVecs); |
6049 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
6050 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
6051 | |
6052 | if (Narrow) |
6053 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
6054 | return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB) |
6055 | ->getOperand(0) |
6056 | .getReg(); |
6057 | }); |
6058 | |
6059 | Register Tuple = createQTuple(Regs, MIB); |
6060 | |
6061 | auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: 1 + NumVecs).getReg(), MRI); |
6062 | if (!LaneNo) |
6063 | return false; |
6064 | Register Ptr = I.getOperand(i: 1 + NumVecs + 1).getReg(); |
6065 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {}) |
6066 | .addReg(RegNo: Tuple) |
6067 | .addImm(Val: LaneNo->getZExtValue()) |
6068 | .addReg(RegNo: Ptr); |
6069 | Store.cloneMemRefs(OtherMI: I); |
6070 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); |
6071 | return true; |
6072 | } |
6073 | |
6074 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( |
6075 | MachineInstr &I, MachineRegisterInfo &MRI) { |
6076 | // Find the intrinsic ID. |
6077 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6078 | |
6079 | const LLT S8 = LLT::scalar(SizeInBits: 8); |
6080 | const LLT S16 = LLT::scalar(SizeInBits: 16); |
6081 | const LLT S32 = LLT::scalar(SizeInBits: 32); |
6082 | const LLT S64 = LLT::scalar(SizeInBits: 64); |
6083 | const LLT P0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
6084 | // Select the instruction. |
6085 | switch (IntrinID) { |
6086 | default: |
6087 | return false; |
6088 | case Intrinsic::aarch64_ldxp: |
6089 | case Intrinsic::aarch64_ldaxp: { |
6090 | auto NewI = MIB.buildInstr( |
6091 | IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, |
6092 | {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, |
6093 | {I.getOperand(3)}); |
6094 | NewI.cloneMemRefs(I); |
6095 | constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); |
6096 | break; |
6097 | } |
6098 | case Intrinsic::trap: |
6099 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); |
6100 | break; |
6101 | case Intrinsic::debugtrap: |
6102 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); |
6103 | break; |
6104 | case Intrinsic::ubsantrap: |
6105 | MIB.buildInstr(AArch64::BRK, {}, {}) |
6106 | .addImm(I.getOperand(1).getImm() | ('U' << 8)); |
6107 | break; |
6108 | case Intrinsic::aarch64_neon_ld1x2: { |
6109 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6110 | unsigned Opc = 0; |
6111 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6112 | Opc = AArch64::LD1Twov8b; |
6113 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6114 | Opc = AArch64::LD1Twov16b; |
6115 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6116 | Opc = AArch64::LD1Twov4h; |
6117 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6118 | Opc = AArch64::LD1Twov8h; |
6119 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6120 | Opc = AArch64::LD1Twov2s; |
6121 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6122 | Opc = AArch64::LD1Twov4s; |
6123 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6124 | Opc = AArch64::LD1Twov2d; |
6125 | else if (Ty == S64 || Ty == P0) |
6126 | Opc = AArch64::LD1Twov1d; |
6127 | else |
6128 | llvm_unreachable("Unexpected type for ld1x2!" ); |
6129 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6130 | break; |
6131 | } |
6132 | case Intrinsic::aarch64_neon_ld1x3: { |
6133 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6134 | unsigned Opc = 0; |
6135 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6136 | Opc = AArch64::LD1Threev8b; |
6137 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6138 | Opc = AArch64::LD1Threev16b; |
6139 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6140 | Opc = AArch64::LD1Threev4h; |
6141 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6142 | Opc = AArch64::LD1Threev8h; |
6143 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6144 | Opc = AArch64::LD1Threev2s; |
6145 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6146 | Opc = AArch64::LD1Threev4s; |
6147 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6148 | Opc = AArch64::LD1Threev2d; |
6149 | else if (Ty == S64 || Ty == P0) |
6150 | Opc = AArch64::LD1Threev1d; |
6151 | else |
6152 | llvm_unreachable("Unexpected type for ld1x3!" ); |
6153 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6154 | break; |
6155 | } |
6156 | case Intrinsic::aarch64_neon_ld1x4: { |
6157 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6158 | unsigned Opc = 0; |
6159 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6160 | Opc = AArch64::LD1Fourv8b; |
6161 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6162 | Opc = AArch64::LD1Fourv16b; |
6163 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6164 | Opc = AArch64::LD1Fourv4h; |
6165 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6166 | Opc = AArch64::LD1Fourv8h; |
6167 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6168 | Opc = AArch64::LD1Fourv2s; |
6169 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6170 | Opc = AArch64::LD1Fourv4s; |
6171 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6172 | Opc = AArch64::LD1Fourv2d; |
6173 | else if (Ty == S64 || Ty == P0) |
6174 | Opc = AArch64::LD1Fourv1d; |
6175 | else |
6176 | llvm_unreachable("Unexpected type for ld1x4!" ); |
6177 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6178 | break; |
6179 | } |
6180 | case Intrinsic::aarch64_neon_ld2: { |
6181 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6182 | unsigned Opc = 0; |
6183 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6184 | Opc = AArch64::LD2Twov8b; |
6185 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6186 | Opc = AArch64::LD2Twov16b; |
6187 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6188 | Opc = AArch64::LD2Twov4h; |
6189 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6190 | Opc = AArch64::LD2Twov8h; |
6191 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6192 | Opc = AArch64::LD2Twov2s; |
6193 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6194 | Opc = AArch64::LD2Twov4s; |
6195 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6196 | Opc = AArch64::LD2Twov2d; |
6197 | else if (Ty == S64 || Ty == P0) |
6198 | Opc = AArch64::LD1Twov1d; |
6199 | else |
6200 | llvm_unreachable("Unexpected type for ld2!" ); |
6201 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6202 | break; |
6203 | } |
6204 | case Intrinsic::aarch64_neon_ld2lane: { |
6205 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6206 | unsigned Opc; |
6207 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6208 | Opc = AArch64::LD2i8; |
6209 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6210 | Opc = AArch64::LD2i16; |
6211 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6212 | Opc = AArch64::LD2i32; |
6213 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6214 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6215 | Opc = AArch64::LD2i64; |
6216 | else |
6217 | llvm_unreachable("Unexpected type for st2lane!" ); |
6218 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 2, I)) |
6219 | return false; |
6220 | break; |
6221 | } |
6222 | case Intrinsic::aarch64_neon_ld2r: { |
6223 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6224 | unsigned Opc = 0; |
6225 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6226 | Opc = AArch64::LD2Rv8b; |
6227 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6228 | Opc = AArch64::LD2Rv16b; |
6229 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6230 | Opc = AArch64::LD2Rv4h; |
6231 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6232 | Opc = AArch64::LD2Rv8h; |
6233 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6234 | Opc = AArch64::LD2Rv2s; |
6235 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6236 | Opc = AArch64::LD2Rv4s; |
6237 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6238 | Opc = AArch64::LD2Rv2d; |
6239 | else if (Ty == S64 || Ty == P0) |
6240 | Opc = AArch64::LD2Rv1d; |
6241 | else |
6242 | llvm_unreachable("Unexpected type for ld2r!" ); |
6243 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6244 | break; |
6245 | } |
6246 | case Intrinsic::aarch64_neon_ld3: { |
6247 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6248 | unsigned Opc = 0; |
6249 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6250 | Opc = AArch64::LD3Threev8b; |
6251 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6252 | Opc = AArch64::LD3Threev16b; |
6253 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6254 | Opc = AArch64::LD3Threev4h; |
6255 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6256 | Opc = AArch64::LD3Threev8h; |
6257 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6258 | Opc = AArch64::LD3Threev2s; |
6259 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6260 | Opc = AArch64::LD3Threev4s; |
6261 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6262 | Opc = AArch64::LD3Threev2d; |
6263 | else if (Ty == S64 || Ty == P0) |
6264 | Opc = AArch64::LD1Threev1d; |
6265 | else |
6266 | llvm_unreachable("Unexpected type for ld3!" ); |
6267 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6268 | break; |
6269 | } |
6270 | case Intrinsic::aarch64_neon_ld3lane: { |
6271 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6272 | unsigned Opc; |
6273 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6274 | Opc = AArch64::LD3i8; |
6275 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6276 | Opc = AArch64::LD3i16; |
6277 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6278 | Opc = AArch64::LD3i32; |
6279 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6280 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6281 | Opc = AArch64::LD3i64; |
6282 | else |
6283 | llvm_unreachable("Unexpected type for st3lane!" ); |
6284 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 3, I)) |
6285 | return false; |
6286 | break; |
6287 | } |
6288 | case Intrinsic::aarch64_neon_ld3r: { |
6289 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6290 | unsigned Opc = 0; |
6291 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6292 | Opc = AArch64::LD3Rv8b; |
6293 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6294 | Opc = AArch64::LD3Rv16b; |
6295 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6296 | Opc = AArch64::LD3Rv4h; |
6297 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6298 | Opc = AArch64::LD3Rv8h; |
6299 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6300 | Opc = AArch64::LD3Rv2s; |
6301 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6302 | Opc = AArch64::LD3Rv4s; |
6303 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6304 | Opc = AArch64::LD3Rv2d; |
6305 | else if (Ty == S64 || Ty == P0) |
6306 | Opc = AArch64::LD3Rv1d; |
6307 | else |
6308 | llvm_unreachable("Unexpected type for ld3r!" ); |
6309 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6310 | break; |
6311 | } |
6312 | case Intrinsic::aarch64_neon_ld4: { |
6313 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6314 | unsigned Opc = 0; |
6315 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6316 | Opc = AArch64::LD4Fourv8b; |
6317 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6318 | Opc = AArch64::LD4Fourv16b; |
6319 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6320 | Opc = AArch64::LD4Fourv4h; |
6321 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6322 | Opc = AArch64::LD4Fourv8h; |
6323 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6324 | Opc = AArch64::LD4Fourv2s; |
6325 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6326 | Opc = AArch64::LD4Fourv4s; |
6327 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6328 | Opc = AArch64::LD4Fourv2d; |
6329 | else if (Ty == S64 || Ty == P0) |
6330 | Opc = AArch64::LD1Fourv1d; |
6331 | else |
6332 | llvm_unreachable("Unexpected type for ld4!" ); |
6333 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6334 | break; |
6335 | } |
6336 | case Intrinsic::aarch64_neon_ld4lane: { |
6337 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6338 | unsigned Opc; |
6339 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6340 | Opc = AArch64::LD4i8; |
6341 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6342 | Opc = AArch64::LD4i16; |
6343 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6344 | Opc = AArch64::LD4i32; |
6345 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6346 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6347 | Opc = AArch64::LD4i64; |
6348 | else |
6349 | llvm_unreachable("Unexpected type for st4lane!" ); |
6350 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 4, I)) |
6351 | return false; |
6352 | break; |
6353 | } |
6354 | case Intrinsic::aarch64_neon_ld4r: { |
6355 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6356 | unsigned Opc = 0; |
6357 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6358 | Opc = AArch64::LD4Rv8b; |
6359 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6360 | Opc = AArch64::LD4Rv16b; |
6361 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6362 | Opc = AArch64::LD4Rv4h; |
6363 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6364 | Opc = AArch64::LD4Rv8h; |
6365 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6366 | Opc = AArch64::LD4Rv2s; |
6367 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6368 | Opc = AArch64::LD4Rv4s; |
6369 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6370 | Opc = AArch64::LD4Rv2d; |
6371 | else if (Ty == S64 || Ty == P0) |
6372 | Opc = AArch64::LD4Rv1d; |
6373 | else |
6374 | llvm_unreachable("Unexpected type for ld4r!" ); |
6375 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6376 | break; |
6377 | } |
6378 | case Intrinsic::aarch64_neon_st1x2: { |
6379 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6380 | unsigned Opc; |
6381 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6382 | Opc = AArch64::ST1Twov8b; |
6383 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6384 | Opc = AArch64::ST1Twov16b; |
6385 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6386 | Opc = AArch64::ST1Twov4h; |
6387 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6388 | Opc = AArch64::ST1Twov8h; |
6389 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6390 | Opc = AArch64::ST1Twov2s; |
6391 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6392 | Opc = AArch64::ST1Twov4s; |
6393 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6394 | Opc = AArch64::ST1Twov2d; |
6395 | else if (Ty == S64 || Ty == P0) |
6396 | Opc = AArch64::ST1Twov1d; |
6397 | else |
6398 | llvm_unreachable("Unexpected type for st1x2!" ); |
6399 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6400 | break; |
6401 | } |
6402 | case Intrinsic::aarch64_neon_st1x3: { |
6403 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6404 | unsigned Opc; |
6405 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6406 | Opc = AArch64::ST1Threev8b; |
6407 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6408 | Opc = AArch64::ST1Threev16b; |
6409 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6410 | Opc = AArch64::ST1Threev4h; |
6411 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6412 | Opc = AArch64::ST1Threev8h; |
6413 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6414 | Opc = AArch64::ST1Threev2s; |
6415 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6416 | Opc = AArch64::ST1Threev4s; |
6417 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6418 | Opc = AArch64::ST1Threev2d; |
6419 | else if (Ty == S64 || Ty == P0) |
6420 | Opc = AArch64::ST1Threev1d; |
6421 | else |
6422 | llvm_unreachable("Unexpected type for st1x3!" ); |
6423 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6424 | break; |
6425 | } |
6426 | case Intrinsic::aarch64_neon_st1x4: { |
6427 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6428 | unsigned Opc; |
6429 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6430 | Opc = AArch64::ST1Fourv8b; |
6431 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6432 | Opc = AArch64::ST1Fourv16b; |
6433 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6434 | Opc = AArch64::ST1Fourv4h; |
6435 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6436 | Opc = AArch64::ST1Fourv8h; |
6437 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6438 | Opc = AArch64::ST1Fourv2s; |
6439 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6440 | Opc = AArch64::ST1Fourv4s; |
6441 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6442 | Opc = AArch64::ST1Fourv2d; |
6443 | else if (Ty == S64 || Ty == P0) |
6444 | Opc = AArch64::ST1Fourv1d; |
6445 | else |
6446 | llvm_unreachable("Unexpected type for st1x4!" ); |
6447 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6448 | break; |
6449 | } |
6450 | case Intrinsic::aarch64_neon_st2: { |
6451 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6452 | unsigned Opc; |
6453 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6454 | Opc = AArch64::ST2Twov8b; |
6455 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6456 | Opc = AArch64::ST2Twov16b; |
6457 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6458 | Opc = AArch64::ST2Twov4h; |
6459 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6460 | Opc = AArch64::ST2Twov8h; |
6461 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6462 | Opc = AArch64::ST2Twov2s; |
6463 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6464 | Opc = AArch64::ST2Twov4s; |
6465 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6466 | Opc = AArch64::ST2Twov2d; |
6467 | else if (Ty == S64 || Ty == P0) |
6468 | Opc = AArch64::ST1Twov1d; |
6469 | else |
6470 | llvm_unreachable("Unexpected type for st2!" ); |
6471 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6472 | break; |
6473 | } |
6474 | case Intrinsic::aarch64_neon_st3: { |
6475 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6476 | unsigned Opc; |
6477 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6478 | Opc = AArch64::ST3Threev8b; |
6479 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6480 | Opc = AArch64::ST3Threev16b; |
6481 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6482 | Opc = AArch64::ST3Threev4h; |
6483 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6484 | Opc = AArch64::ST3Threev8h; |
6485 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6486 | Opc = AArch64::ST3Threev2s; |
6487 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6488 | Opc = AArch64::ST3Threev4s; |
6489 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6490 | Opc = AArch64::ST3Threev2d; |
6491 | else if (Ty == S64 || Ty == P0) |
6492 | Opc = AArch64::ST1Threev1d; |
6493 | else |
6494 | llvm_unreachable("Unexpected type for st3!" ); |
6495 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6496 | break; |
6497 | } |
6498 | case Intrinsic::aarch64_neon_st4: { |
6499 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6500 | unsigned Opc; |
6501 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6502 | Opc = AArch64::ST4Fourv8b; |
6503 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6504 | Opc = AArch64::ST4Fourv16b; |
6505 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6506 | Opc = AArch64::ST4Fourv4h; |
6507 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6508 | Opc = AArch64::ST4Fourv8h; |
6509 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6510 | Opc = AArch64::ST4Fourv2s; |
6511 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6512 | Opc = AArch64::ST4Fourv4s; |
6513 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6514 | Opc = AArch64::ST4Fourv2d; |
6515 | else if (Ty == S64 || Ty == P0) |
6516 | Opc = AArch64::ST1Fourv1d; |
6517 | else |
6518 | llvm_unreachable("Unexpected type for st4!" ); |
6519 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6520 | break; |
6521 | } |
6522 | case Intrinsic::aarch64_neon_st2lane: { |
6523 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6524 | unsigned Opc; |
6525 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6526 | Opc = AArch64::ST2i8; |
6527 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6528 | Opc = AArch64::ST2i16; |
6529 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6530 | Opc = AArch64::ST2i32; |
6531 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6532 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6533 | Opc = AArch64::ST2i64; |
6534 | else |
6535 | llvm_unreachable("Unexpected type for st2lane!" ); |
6536 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 2, Opc)) |
6537 | return false; |
6538 | break; |
6539 | } |
6540 | case Intrinsic::aarch64_neon_st3lane: { |
6541 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6542 | unsigned Opc; |
6543 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6544 | Opc = AArch64::ST3i8; |
6545 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6546 | Opc = AArch64::ST3i16; |
6547 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6548 | Opc = AArch64::ST3i32; |
6549 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6550 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6551 | Opc = AArch64::ST3i64; |
6552 | else |
6553 | llvm_unreachable("Unexpected type for st3lane!" ); |
6554 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 3, Opc)) |
6555 | return false; |
6556 | break; |
6557 | } |
6558 | case Intrinsic::aarch64_neon_st4lane: { |
6559 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6560 | unsigned Opc; |
6561 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6562 | Opc = AArch64::ST4i8; |
6563 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6564 | Opc = AArch64::ST4i16; |
6565 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6566 | Opc = AArch64::ST4i32; |
6567 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6568 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6569 | Opc = AArch64::ST4i64; |
6570 | else |
6571 | llvm_unreachable("Unexpected type for st4lane!" ); |
6572 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 4, Opc)) |
6573 | return false; |
6574 | break; |
6575 | } |
6576 | case Intrinsic::aarch64_mops_memset_tag: { |
6577 | // Transform |
6578 | // %dst:gpr(p0) = \ |
6579 | // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), |
6580 | // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) |
6581 | // where %dst is updated, into |
6582 | // %Rd:GPR64common, %Rn:GPR64) = \ |
6583 | // MOPSMemorySetTaggingPseudo \ |
6584 | // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 |
6585 | // where Rd and Rn are tied. |
6586 | // It is expected that %val has been extended to s64 in legalization. |
6587 | // Note that the order of the size/value operands are swapped. |
6588 | |
6589 | Register DstDef = I.getOperand(i: 0).getReg(); |
6590 | // I.getOperand(1) is the intrinsic function |
6591 | Register DstUse = I.getOperand(i: 2).getReg(); |
6592 | Register ValUse = I.getOperand(i: 3).getReg(); |
6593 | Register SizeUse = I.getOperand(i: 4).getReg(); |
6594 | |
6595 | // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. |
6596 | // Therefore an additional virtual register is requried for the updated size |
6597 | // operand. This value is not accessible via the semantics of the intrinsic. |
6598 | Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
6599 | |
6600 | auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, |
6601 | {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); |
6602 | Memset.cloneMemRefs(I); |
6603 | constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); |
6604 | break; |
6605 | } |
6606 | } |
6607 | |
6608 | I.eraseFromParent(); |
6609 | return true; |
6610 | } |
6611 | |
6612 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, |
6613 | MachineRegisterInfo &MRI) { |
6614 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6615 | |
6616 | switch (IntrinID) { |
6617 | default: |
6618 | break; |
6619 | case Intrinsic::aarch64_crypto_sha1h: { |
6620 | Register DstReg = I.getOperand(i: 0).getReg(); |
6621 | Register SrcReg = I.getOperand(i: 2).getReg(); |
6622 | |
6623 | // FIXME: Should this be an assert? |
6624 | if (MRI.getType(Reg: DstReg).getSizeInBits() != 32 || |
6625 | MRI.getType(Reg: SrcReg).getSizeInBits() != 32) |
6626 | return false; |
6627 | |
6628 | // The operation has to happen on FPRs. Set up some new FPR registers for |
6629 | // the source and destination if they are on GPRs. |
6630 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
6631 | SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); |
6632 | MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: 2)}); |
6633 | |
6634 | // Make sure the copy ends up getting constrained properly. |
6635 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), |
6636 | AArch64::GPR32RegClass, MRI); |
6637 | } |
6638 | |
6639 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) |
6640 | DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); |
6641 | |
6642 | // Actually insert the instruction. |
6643 | auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); |
6644 | constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); |
6645 | |
6646 | // Did we create a new register for the destination? |
6647 | if (DstReg != I.getOperand(i: 0).getReg()) { |
6648 | // Yep. Copy the result of the instruction back into the original |
6649 | // destination. |
6650 | MIB.buildCopy(Res: {I.getOperand(i: 0)}, Op: {DstReg}); |
6651 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), |
6652 | AArch64::GPR32RegClass, MRI); |
6653 | } |
6654 | |
6655 | I.eraseFromParent(); |
6656 | return true; |
6657 | } |
6658 | case Intrinsic::frameaddress: |
6659 | case Intrinsic::returnaddress: { |
6660 | MachineFunction &MF = *I.getParent()->getParent(); |
6661 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6662 | |
6663 | unsigned Depth = I.getOperand(i: 2).getImm(); |
6664 | Register DstReg = I.getOperand(i: 0).getReg(); |
6665 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); |
6666 | |
6667 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { |
6668 | if (!MFReturnAddr) { |
6669 | // Insert the copy from LR/X30 into the entry block, before it can be |
6670 | // clobbered by anything. |
6671 | MFI.setReturnAddressIsTaken(true); |
6672 | MFReturnAddr = getFunctionLiveInPhysReg( |
6673 | MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc()); |
6674 | } |
6675 | |
6676 | if (STI.hasPAuth()) { |
6677 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); |
6678 | } else { |
6679 | MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); |
6680 | MIB.buildInstr(AArch64::XPACLRI); |
6681 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); |
6682 | } |
6683 | |
6684 | I.eraseFromParent(); |
6685 | return true; |
6686 | } |
6687 | |
6688 | MFI.setFrameAddressIsTaken(true); |
6689 | Register FrameAddr(AArch64::FP); |
6690 | while (Depth--) { |
6691 | Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); |
6692 | auto Ldr = |
6693 | MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); |
6694 | constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); |
6695 | FrameAddr = NextFrame; |
6696 | } |
6697 | |
6698 | if (IntrinID == Intrinsic::frameaddress) |
6699 | MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr}); |
6700 | else { |
6701 | MFI.setReturnAddressIsTaken(true); |
6702 | |
6703 | if (STI.hasPAuth()) { |
6704 | Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
6705 | MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); |
6706 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); |
6707 | } else { |
6708 | MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) |
6709 | .addImm(1); |
6710 | MIB.buildInstr(AArch64::XPACLRI); |
6711 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); |
6712 | } |
6713 | } |
6714 | |
6715 | I.eraseFromParent(); |
6716 | return true; |
6717 | } |
6718 | case Intrinsic::swift_async_context_addr: |
6719 | auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, |
6720 | {Register(AArch64::FP)}) |
6721 | .addImm(8) |
6722 | .addImm(0); |
6723 | constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); |
6724 | |
6725 | MF->getFrameInfo().setFrameAddressIsTaken(true); |
6726 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
6727 | I.eraseFromParent(); |
6728 | return true; |
6729 | } |
6730 | return false; |
6731 | } |
6732 | |
6733 | InstructionSelector::ComplexRendererFns |
6734 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { |
6735 | auto MaybeImmed = getImmedFromMO(Root); |
6736 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
6737 | return std::nullopt; |
6738 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; |
6739 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6740 | } |
6741 | |
6742 | InstructionSelector::ComplexRendererFns |
6743 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { |
6744 | auto MaybeImmed = getImmedFromMO(Root); |
6745 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
6746 | return std::nullopt; |
6747 | uint64_t Enc = 31 - *MaybeImmed; |
6748 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6749 | } |
6750 | |
6751 | InstructionSelector::ComplexRendererFns |
6752 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { |
6753 | auto MaybeImmed = getImmedFromMO(Root); |
6754 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
6755 | return std::nullopt; |
6756 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; |
6757 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6758 | } |
6759 | |
6760 | InstructionSelector::ComplexRendererFns |
6761 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { |
6762 | auto MaybeImmed = getImmedFromMO(Root); |
6763 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
6764 | return std::nullopt; |
6765 | uint64_t Enc = 63 - *MaybeImmed; |
6766 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6767 | } |
6768 | |
6769 | /// Helper to select an immediate value that can be represented as a 12-bit |
6770 | /// value shifted left by either 0 or 12. If it is possible to do so, return |
6771 | /// the immediate and shift value. If not, return std::nullopt. |
6772 | /// |
6773 | /// Used by selectArithImmed and selectNegArithImmed. |
6774 | InstructionSelector::ComplexRendererFns |
6775 | AArch64InstructionSelector::select12BitValueWithLeftShift( |
6776 | uint64_t Immed) const { |
6777 | unsigned ShiftAmt; |
6778 | if (Immed >> 12 == 0) { |
6779 | ShiftAmt = 0; |
6780 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
6781 | ShiftAmt = 12; |
6782 | Immed = Immed >> 12; |
6783 | } else |
6784 | return std::nullopt; |
6785 | |
6786 | unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt); |
6787 | return {{ |
6788 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); }, |
6789 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); }, |
6790 | }}; |
6791 | } |
6792 | |
6793 | /// SelectArithImmed - Select an immediate value that can be represented as |
6794 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with |
6795 | /// Val set to the 12-bit value and Shift set to the shifter operand. |
6796 | InstructionSelector::ComplexRendererFns |
6797 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { |
6798 | // This function is called from the addsub_shifted_imm ComplexPattern, |
6799 | // which lists [imm] as the list of opcode it's interested in, however |
6800 | // we still need to check whether the operand is actually an immediate |
6801 | // here because the ComplexPattern opcode list is only used in |
6802 | // root-level opcode matching. |
6803 | auto MaybeImmed = getImmedFromMO(Root); |
6804 | if (MaybeImmed == std::nullopt) |
6805 | return std::nullopt; |
6806 | return select12BitValueWithLeftShift(Immed: *MaybeImmed); |
6807 | } |
6808 | |
6809 | /// SelectNegArithImmed - As above, but negates the value before trying to |
6810 | /// select it. |
6811 | InstructionSelector::ComplexRendererFns |
6812 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { |
6813 | // We need a register here, because we need to know if we have a 64 or 32 |
6814 | // bit immediate. |
6815 | if (!Root.isReg()) |
6816 | return std::nullopt; |
6817 | auto MaybeImmed = getImmedFromMO(Root); |
6818 | if (MaybeImmed == std::nullopt) |
6819 | return std::nullopt; |
6820 | uint64_t Immed = *MaybeImmed; |
6821 | |
6822 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
6823 | // have the opposite effect on the C flag, so this pattern mustn't match under |
6824 | // those circumstances. |
6825 | if (Immed == 0) |
6826 | return std::nullopt; |
6827 | |
6828 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on |
6829 | // the root. |
6830 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6831 | if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == 32) |
6832 | Immed = ~((uint32_t)Immed) + 1; |
6833 | else |
6834 | Immed = ~Immed + 1ULL; |
6835 | |
6836 | if (Immed & 0xFFFFFFFFFF000000ULL) |
6837 | return std::nullopt; |
6838 | |
6839 | Immed &= 0xFFFFFFULL; |
6840 | return select12BitValueWithLeftShift(Immed); |
6841 | } |
6842 | |
6843 | /// Return true if it is worth folding MI into an extended register. That is, |
6844 | /// if it's safe to pull it into the addressing mode of a load or store as a |
6845 | /// shift. |
6846 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( |
6847 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { |
6848 | // Always fold if there is one use, or if we're optimizing for size. |
6849 | Register DefReg = MI.getOperand(i: 0).getReg(); |
6850 | if (MRI.hasOneNonDBGUse(RegNo: DefReg) || |
6851 | MI.getParent()->getParent()->getFunction().hasOptSize()) |
6852 | return true; |
6853 | |
6854 | // It's better to avoid folding and recomputing shifts when we don't have a |
6855 | // fastpath. |
6856 | if (!STI.hasAddrLSLFast()) |
6857 | return false; |
6858 | |
6859 | // We have a fastpath, so folding a shift in and potentially computing it |
6860 | // many times may be beneficial. Check if this is only used in memory ops. |
6861 | // If it is, then we should fold. |
6862 | return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg), |
6863 | P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); |
6864 | } |
6865 | |
6866 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { |
6867 | switch (Type) { |
6868 | case AArch64_AM::SXTB: |
6869 | case AArch64_AM::SXTH: |
6870 | case AArch64_AM::SXTW: |
6871 | return true; |
6872 | default: |
6873 | return false; |
6874 | } |
6875 | } |
6876 | |
6877 | InstructionSelector::ComplexRendererFns |
6878 | AArch64InstructionSelector::selectExtendedSHL( |
6879 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, |
6880 | unsigned SizeInBytes, bool WantsExt) const { |
6881 | assert(Base.isReg() && "Expected base to be a register operand" ); |
6882 | assert(Offset.isReg() && "Expected offset to be a register operand" ); |
6883 | |
6884 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6885 | MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg()); |
6886 | |
6887 | unsigned OffsetOpc = OffsetInst->getOpcode(); |
6888 | bool LookedThroughZExt = false; |
6889 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { |
6890 | // Try to look through a ZEXT. |
6891 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) |
6892 | return std::nullopt; |
6893 | |
6894 | OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: 1).getReg()); |
6895 | OffsetOpc = OffsetInst->getOpcode(); |
6896 | LookedThroughZExt = true; |
6897 | |
6898 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) |
6899 | return std::nullopt; |
6900 | } |
6901 | // Make sure that the memory op is a valid size. |
6902 | int64_t LegalShiftVal = Log2_32(Value: SizeInBytes); |
6903 | if (LegalShiftVal == 0) |
6904 | return std::nullopt; |
6905 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI)) |
6906 | return std::nullopt; |
6907 | |
6908 | // Now, try to find the specific G_CONSTANT. Start by assuming that the |
6909 | // register we will offset is the LHS, and the register containing the |
6910 | // constant is the RHS. |
6911 | Register OffsetReg = OffsetInst->getOperand(i: 1).getReg(); |
6912 | Register ConstantReg = OffsetInst->getOperand(i: 2).getReg(); |
6913 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
6914 | if (!ValAndVReg) { |
6915 | // We didn't get a constant on the RHS. If the opcode is a shift, then |
6916 | // we're done. |
6917 | if (OffsetOpc == TargetOpcode::G_SHL) |
6918 | return std::nullopt; |
6919 | |
6920 | // If we have a G_MUL, we can use either register. Try looking at the RHS. |
6921 | std::swap(a&: OffsetReg, b&: ConstantReg); |
6922 | ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
6923 | if (!ValAndVReg) |
6924 | return std::nullopt; |
6925 | } |
6926 | |
6927 | // The value must fit into 3 bits, and must be positive. Make sure that is |
6928 | // true. |
6929 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); |
6930 | |
6931 | // Since we're going to pull this into a shift, the constant value must be |
6932 | // a power of 2. If we got a multiply, then we need to check this. |
6933 | if (OffsetOpc == TargetOpcode::G_MUL) { |
6934 | if (!llvm::has_single_bit<uint32_t>(Value: ImmVal)) |
6935 | return std::nullopt; |
6936 | |
6937 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. |
6938 | ImmVal = Log2_32(Value: ImmVal); |
6939 | } |
6940 | |
6941 | if ((ImmVal & 0x7) != ImmVal) |
6942 | return std::nullopt; |
6943 | |
6944 | // We are only allowed to shift by LegalShiftVal. This shift value is built |
6945 | // into the instruction, so we can't just use whatever we want. |
6946 | if (ImmVal != LegalShiftVal) |
6947 | return std::nullopt; |
6948 | |
6949 | unsigned SignExtend = 0; |
6950 | if (WantsExt) { |
6951 | // Check if the offset is defined by an extend, unless we looked through a |
6952 | // G_ZEXT earlier. |
6953 | if (!LookedThroughZExt) { |
6954 | MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI); |
6955 | auto Ext = getExtendTypeForInst(MI&: *ExtInst, MRI, IsLoadStore: true); |
6956 | if (Ext == AArch64_AM::InvalidShiftExtend) |
6957 | return std::nullopt; |
6958 | |
6959 | SignExtend = isSignExtendShiftType(Type: Ext) ? 1 : 0; |
6960 | // We only support SXTW for signed extension here. |
6961 | if (SignExtend && Ext != AArch64_AM::SXTW) |
6962 | return std::nullopt; |
6963 | OffsetReg = ExtInst->getOperand(i: 1).getReg(); |
6964 | } |
6965 | |
6966 | // Need a 32-bit wide register here. |
6967 | MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg())); |
6968 | OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); |
6969 | } |
6970 | |
6971 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an |
6972 | // offset. Signify that we are shifting by setting the shift flag to 1. |
6973 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); }, |
6974 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); }, |
6975 | [=](MachineInstrBuilder &MIB) { |
6976 | // Need to add both immediates here to make sure that they are both |
6977 | // added to the instruction. |
6978 | MIB.addImm(Val: SignExtend); |
6979 | MIB.addImm(Val: 1); |
6980 | }}}; |
6981 | } |
6982 | |
6983 | /// This is used for computing addresses like this: |
6984 | /// |
6985 | /// ldr x1, [x2, x3, lsl #3] |
6986 | /// |
6987 | /// Where x2 is the base register, and x3 is an offset register. The shift-left |
6988 | /// is a constant value specific to this load instruction. That is, we'll never |
6989 | /// see anything other than a 3 here (which corresponds to the size of the |
6990 | /// element being loaded.) |
6991 | InstructionSelector::ComplexRendererFns |
6992 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( |
6993 | MachineOperand &Root, unsigned SizeInBytes) const { |
6994 | if (!Root.isReg()) |
6995 | return std::nullopt; |
6996 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6997 | |
6998 | // We want to find something like this: |
6999 | // |
7000 | // val = G_CONSTANT LegalShiftVal |
7001 | // shift = G_SHL off_reg val |
7002 | // ptr = G_PTR_ADD base_reg shift |
7003 | // x = G_LOAD ptr |
7004 | // |
7005 | // And fold it into this addressing mode: |
7006 | // |
7007 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] |
7008 | |
7009 | // Check if we can find the G_PTR_ADD. |
7010 | MachineInstr *PtrAdd = |
7011 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7012 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI)) |
7013 | return std::nullopt; |
7014 | |
7015 | // Now, try to match an opcode which will match our specific offset. |
7016 | // We want a G_SHL or a G_MUL. |
7017 | MachineInstr *OffsetInst = |
7018 | getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7019 | return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: 1), |
7020 | Offset&: OffsetInst->getOperand(i: 0), SizeInBytes, |
7021 | /*WantsExt=*/false); |
7022 | } |
7023 | |
7024 | /// This is used for computing addresses like this: |
7025 | /// |
7026 | /// ldr x1, [x2, x3] |
7027 | /// |
7028 | /// Where x2 is the base register, and x3 is an offset register. |
7029 | /// |
7030 | /// When possible (or profitable) to fold a G_PTR_ADD into the address |
7031 | /// calculation, this will do so. Otherwise, it will return std::nullopt. |
7032 | InstructionSelector::ComplexRendererFns |
7033 | AArch64InstructionSelector::selectAddrModeRegisterOffset( |
7034 | MachineOperand &Root) const { |
7035 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7036 | |
7037 | // We need a GEP. |
7038 | MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg()); |
7039 | if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) |
7040 | return std::nullopt; |
7041 | |
7042 | // If this is used more than once, let's not bother folding. |
7043 | // TODO: Check if they are memory ops. If they are, then we can still fold |
7044 | // without having to recompute anything. |
7045 | if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: 0).getReg())) |
7046 | return std::nullopt; |
7047 | |
7048 | // Base is the GEP's LHS, offset is its RHS. |
7049 | return {{[=](MachineInstrBuilder &MIB) { |
7050 | MIB.addUse(RegNo: Gep->getOperand(i: 1).getReg()); |
7051 | }, |
7052 | [=](MachineInstrBuilder &MIB) { |
7053 | MIB.addUse(RegNo: Gep->getOperand(i: 2).getReg()); |
7054 | }, |
7055 | [=](MachineInstrBuilder &MIB) { |
7056 | // Need to add both immediates here to make sure that they are both |
7057 | // added to the instruction. |
7058 | MIB.addImm(Val: 0); |
7059 | MIB.addImm(Val: 0); |
7060 | }}}; |
7061 | } |
7062 | |
7063 | /// This is intended to be equivalent to selectAddrModeXRO in |
7064 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. |
7065 | InstructionSelector::ComplexRendererFns |
7066 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, |
7067 | unsigned SizeInBytes) const { |
7068 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7069 | if (!Root.isReg()) |
7070 | return std::nullopt; |
7071 | MachineInstr *PtrAdd = |
7072 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7073 | if (!PtrAdd) |
7074 | return std::nullopt; |
7075 | |
7076 | // Check for an immediates which cannot be encoded in the [base + imm] |
7077 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll |
7078 | // end up with code like: |
7079 | // |
7080 | // mov x0, wide |
7081 | // add x1 base, x0 |
7082 | // ldr x2, [x1, x0] |
7083 | // |
7084 | // In this situation, we can use the [base, xreg] addressing mode to save an |
7085 | // add/sub: |
7086 | // |
7087 | // mov x0, wide |
7088 | // ldr x2, [base, x0] |
7089 | auto ValAndVReg = |
7090 | getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7091 | if (ValAndVReg) { |
7092 | unsigned Scale = Log2_32(Value: SizeInBytes); |
7093 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); |
7094 | |
7095 | // Skip immediates that can be selected in the load/store addresing |
7096 | // mode. |
7097 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && |
7098 | ImmOff < (0x1000 << Scale)) |
7099 | return std::nullopt; |
7100 | |
7101 | // Helper lambda to decide whether or not it is preferable to emit an add. |
7102 | auto isPreferredADD = [](int64_t ImmOff) { |
7103 | // Constants in [0x0, 0xfff] can be encoded in an add. |
7104 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
7105 | return true; |
7106 | |
7107 | // Can it be encoded in an add lsl #12? |
7108 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) |
7109 | return false; |
7110 | |
7111 | // It can be encoded in an add lsl #12, but we may not want to. If it is |
7112 | // possible to select this as a single movz, then prefer that. A single |
7113 | // movz is faster than an add with a shift. |
7114 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
7115 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
7116 | }; |
7117 | |
7118 | // If the immediate can be encoded in a single add/sub, then bail out. |
7119 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
7120 | return std::nullopt; |
7121 | } |
7122 | |
7123 | // Try to fold shifts into the addressing mode. |
7124 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); |
7125 | if (AddrModeFns) |
7126 | return AddrModeFns; |
7127 | |
7128 | // If that doesn't work, see if it's possible to fold in registers from |
7129 | // a GEP. |
7130 | return selectAddrModeRegisterOffset(Root); |
7131 | } |
7132 | |
7133 | /// This is used for computing addresses like this: |
7134 | /// |
7135 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] |
7136 | /// |
7137 | /// Where we have a 64-bit base register, a 32-bit offset register, and an |
7138 | /// extend (which may or may not be signed). |
7139 | InstructionSelector::ComplexRendererFns |
7140 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, |
7141 | unsigned SizeInBytes) const { |
7142 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7143 | |
7144 | MachineInstr *PtrAdd = |
7145 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7146 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI)) |
7147 | return std::nullopt; |
7148 | |
7149 | MachineOperand &LHS = PtrAdd->getOperand(i: 1); |
7150 | MachineOperand &RHS = PtrAdd->getOperand(i: 2); |
7151 | MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
7152 | |
7153 | // The first case is the same as selectAddrModeXRO, except we need an extend. |
7154 | // In this case, we try to find a shift and extend, and fold them into the |
7155 | // addressing mode. |
7156 | // |
7157 | // E.g. |
7158 | // |
7159 | // off_reg = G_Z/S/ANYEXT ext_reg |
7160 | // val = G_CONSTANT LegalShiftVal |
7161 | // shift = G_SHL off_reg val |
7162 | // ptr = G_PTR_ADD base_reg shift |
7163 | // x = G_LOAD ptr |
7164 | // |
7165 | // In this case we can get a load like this: |
7166 | // |
7167 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] |
7168 | auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: 0), |
7169 | SizeInBytes, /*WantsExt=*/true); |
7170 | if (ExtendedShl) |
7171 | return ExtendedShl; |
7172 | |
7173 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. |
7174 | // |
7175 | // e.g. |
7176 | // ldr something, [base_reg, ext_reg, sxtw] |
7177 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI)) |
7178 | return std::nullopt; |
7179 | |
7180 | // Check if this is an extend. We'll get an extend type if it is. |
7181 | AArch64_AM::ShiftExtendType Ext = |
7182 | getExtendTypeForInst(MI&: *OffsetInst, MRI, /*IsLoadStore=*/true); |
7183 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7184 | return std::nullopt; |
7185 | |
7186 | // Need a 32-bit wide register. |
7187 | MachineIRBuilder MIB(*PtrAdd); |
7188 | Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), |
7189 | AArch64::GPR32RegClass, MIB); |
7190 | unsigned SignExtend = Ext == AArch64_AM::SXTW; |
7191 | |
7192 | // Base is LHS, offset is ExtReg. |
7193 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); }, |
7194 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7195 | [=](MachineInstrBuilder &MIB) { |
7196 | MIB.addImm(Val: SignExtend); |
7197 | MIB.addImm(Val: 0); |
7198 | }}}; |
7199 | } |
7200 | |
7201 | /// Select a "register plus unscaled signed 9-bit immediate" address. This |
7202 | /// should only match when there is an offset that is not valid for a scaled |
7203 | /// immediate addressing mode. The "Size" argument is the size in bytes of the |
7204 | /// memory reference, which is needed here to know what is valid for a scaled |
7205 | /// immediate. |
7206 | InstructionSelector::ComplexRendererFns |
7207 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, |
7208 | unsigned Size) const { |
7209 | MachineRegisterInfo &MRI = |
7210 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7211 | |
7212 | if (!Root.isReg()) |
7213 | return std::nullopt; |
7214 | |
7215 | if (!isBaseWithConstantOffset(Root, MRI)) |
7216 | return std::nullopt; |
7217 | |
7218 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7219 | |
7220 | MachineOperand &OffImm = RootDef->getOperand(i: 2); |
7221 | if (!OffImm.isReg()) |
7222 | return std::nullopt; |
7223 | MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg()); |
7224 | if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) |
7225 | return std::nullopt; |
7226 | int64_t RHSC; |
7227 | MachineOperand &RHSOp1 = RHS->getOperand(i: 1); |
7228 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) |
7229 | return std::nullopt; |
7230 | RHSC = RHSOp1.getCImm()->getSExtValue(); |
7231 | |
7232 | if (RHSC >= -256 && RHSC < 256) { |
7233 | MachineOperand &Base = RootDef->getOperand(i: 1); |
7234 | return {{ |
7235 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); }, |
7236 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); }, |
7237 | }}; |
7238 | } |
7239 | return std::nullopt; |
7240 | } |
7241 | |
7242 | InstructionSelector::ComplexRendererFns |
7243 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, |
7244 | unsigned Size, |
7245 | MachineRegisterInfo &MRI) const { |
7246 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) |
7247 | return std::nullopt; |
7248 | MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: 1).getReg()); |
7249 | if (Adrp.getOpcode() != AArch64::ADRP) |
7250 | return std::nullopt; |
7251 | |
7252 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. |
7253 | auto Offset = Adrp.getOperand(i: 1).getOffset(); |
7254 | if (Offset % Size != 0) |
7255 | return std::nullopt; |
7256 | |
7257 | auto GV = Adrp.getOperand(i: 1).getGlobal(); |
7258 | if (GV->isThreadLocal()) |
7259 | return std::nullopt; |
7260 | |
7261 | auto &MF = *RootDef.getParent()->getParent(); |
7262 | if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size) |
7263 | return std::nullopt; |
7264 | |
7265 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget()); |
7266 | MachineIRBuilder MIRBuilder(RootDef); |
7267 | Register AdrpReg = Adrp.getOperand(i: 0).getReg(); |
7268 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); }, |
7269 | [=](MachineInstrBuilder &MIB) { |
7270 | MIB.addGlobalAddress(GV, Offset, |
7271 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | |
7272 | AArch64II::MO_NC); |
7273 | }}}; |
7274 | } |
7275 | |
7276 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The |
7277 | /// "Size" argument is the size in bytes of the memory reference, which |
7278 | /// determines the scale. |
7279 | InstructionSelector::ComplexRendererFns |
7280 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, |
7281 | unsigned Size) const { |
7282 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); |
7283 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7284 | |
7285 | if (!Root.isReg()) |
7286 | return std::nullopt; |
7287 | |
7288 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7289 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { |
7290 | return {{ |
7291 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: 1)); }, |
7292 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7293 | }}; |
7294 | } |
7295 | |
7296 | CodeModel::Model CM = MF.getTarget().getCodeModel(); |
7297 | // Check if we can fold in the ADD of small code model ADRP + ADD address. |
7298 | if (CM == CodeModel::Small) { |
7299 | auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI); |
7300 | if (OpFns) |
7301 | return OpFns; |
7302 | } |
7303 | |
7304 | if (isBaseWithConstantOffset(Root, MRI)) { |
7305 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7306 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7307 | MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg()); |
7308 | MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg()); |
7309 | |
7310 | int64_t RHSC = (int64_t)RHSDef->getOperand(i: 1).getCImm()->getZExtValue(); |
7311 | unsigned Scale = Log2_32(Value: Size); |
7312 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
7313 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) |
7314 | return {{ |
7315 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: 1)); }, |
7316 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7317 | }}; |
7318 | |
7319 | return {{ |
7320 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); }, |
7321 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7322 | }}; |
7323 | } |
7324 | } |
7325 | |
7326 | // Before falling back to our general case, check if the unscaled |
7327 | // instructions can handle this. If so, that's preferable. |
7328 | if (selectAddrModeUnscaled(Root, Size)) |
7329 | return std::nullopt; |
7330 | |
7331 | return {{ |
7332 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); }, |
7333 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7334 | }}; |
7335 | } |
7336 | |
7337 | /// Given a shift instruction, return the correct shift type for that |
7338 | /// instruction. |
7339 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { |
7340 | switch (MI.getOpcode()) { |
7341 | default: |
7342 | return AArch64_AM::InvalidShiftExtend; |
7343 | case TargetOpcode::G_SHL: |
7344 | return AArch64_AM::LSL; |
7345 | case TargetOpcode::G_LSHR: |
7346 | return AArch64_AM::LSR; |
7347 | case TargetOpcode::G_ASHR: |
7348 | return AArch64_AM::ASR; |
7349 | case TargetOpcode::G_ROTR: |
7350 | return AArch64_AM::ROR; |
7351 | } |
7352 | } |
7353 | |
7354 | /// Select a "shifted register" operand. If the value is not shifted, set the |
7355 | /// shift operand to a default value of "lsl 0". |
7356 | InstructionSelector::ComplexRendererFns |
7357 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, |
7358 | bool AllowROR) const { |
7359 | if (!Root.isReg()) |
7360 | return std::nullopt; |
7361 | MachineRegisterInfo &MRI = |
7362 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7363 | |
7364 | // Check if the operand is defined by an instruction which corresponds to |
7365 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. |
7366 | MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg()); |
7367 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst); |
7368 | if (ShType == AArch64_AM::InvalidShiftExtend) |
7369 | return std::nullopt; |
7370 | if (ShType == AArch64_AM::ROR && !AllowROR) |
7371 | return std::nullopt; |
7372 | if (!isWorthFoldingIntoExtendedReg(MI&: *ShiftInst, MRI)) |
7373 | return std::nullopt; |
7374 | |
7375 | // Need an immediate on the RHS. |
7376 | MachineOperand &ShiftRHS = ShiftInst->getOperand(i: 2); |
7377 | auto Immed = getImmedFromMO(Root: ShiftRHS); |
7378 | if (!Immed) |
7379 | return std::nullopt; |
7380 | |
7381 | // We have something that we can fold. Fold in the shift's LHS and RHS into |
7382 | // the instruction. |
7383 | MachineOperand &ShiftLHS = ShiftInst->getOperand(i: 1); |
7384 | Register ShiftReg = ShiftLHS.getReg(); |
7385 | |
7386 | unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits(); |
7387 | unsigned Val = *Immed & (NumBits - 1); |
7388 | unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val); |
7389 | |
7390 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); }, |
7391 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}}; |
7392 | } |
7393 | |
7394 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( |
7395 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { |
7396 | unsigned Opc = MI.getOpcode(); |
7397 | |
7398 | // Handle explicit extend instructions first. |
7399 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { |
7400 | unsigned Size; |
7401 | if (Opc == TargetOpcode::G_SEXT) |
7402 | Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7403 | else |
7404 | Size = MI.getOperand(i: 2).getImm(); |
7405 | assert(Size != 64 && "Extend from 64 bits?" ); |
7406 | switch (Size) { |
7407 | case 8: |
7408 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; |
7409 | case 16: |
7410 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; |
7411 | case 32: |
7412 | return AArch64_AM::SXTW; |
7413 | default: |
7414 | return AArch64_AM::InvalidShiftExtend; |
7415 | } |
7416 | } |
7417 | |
7418 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { |
7419 | unsigned Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7420 | assert(Size != 64 && "Extend from 64 bits?" ); |
7421 | switch (Size) { |
7422 | case 8: |
7423 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; |
7424 | case 16: |
7425 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; |
7426 | case 32: |
7427 | return AArch64_AM::UXTW; |
7428 | default: |
7429 | return AArch64_AM::InvalidShiftExtend; |
7430 | } |
7431 | } |
7432 | |
7433 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask |
7434 | // on the RHS. |
7435 | if (Opc != TargetOpcode::G_AND) |
7436 | return AArch64_AM::InvalidShiftExtend; |
7437 | |
7438 | std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: 2)); |
7439 | if (!MaybeAndMask) |
7440 | return AArch64_AM::InvalidShiftExtend; |
7441 | uint64_t AndMask = *MaybeAndMask; |
7442 | switch (AndMask) { |
7443 | default: |
7444 | return AArch64_AM::InvalidShiftExtend; |
7445 | case 0xFF: |
7446 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
7447 | case 0xFFFF: |
7448 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
7449 | case 0xFFFFFFFF: |
7450 | return AArch64_AM::UXTW; |
7451 | } |
7452 | } |
7453 | |
7454 | Register AArch64InstructionSelector::moveScalarRegClass( |
7455 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { |
7456 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
7457 | auto Ty = MRI.getType(Reg); |
7458 | assert(!Ty.isVector() && "Expected scalars only!" ); |
7459 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) |
7460 | return Reg; |
7461 | |
7462 | // Create a copy and immediately select it. |
7463 | // FIXME: We should have an emitCopy function? |
7464 | auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg}); |
7465 | selectCopy(*Copy, TII, MRI, TRI, RBI); |
7466 | return Copy.getReg(Idx: 0); |
7467 | } |
7468 | |
7469 | /// Select an "extended register" operand. This operand folds in an extend |
7470 | /// followed by an optional left shift. |
7471 | InstructionSelector::ComplexRendererFns |
7472 | AArch64InstructionSelector::selectArithExtendedRegister( |
7473 | MachineOperand &Root) const { |
7474 | if (!Root.isReg()) |
7475 | return std::nullopt; |
7476 | MachineRegisterInfo &MRI = |
7477 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7478 | |
7479 | uint64_t ShiftVal = 0; |
7480 | Register ExtReg; |
7481 | AArch64_AM::ShiftExtendType Ext; |
7482 | MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI); |
7483 | if (!RootDef) |
7484 | return std::nullopt; |
7485 | |
7486 | if (!isWorthFoldingIntoExtendedReg(MI&: *RootDef, MRI)) |
7487 | return std::nullopt; |
7488 | |
7489 | // Check if we can fold a shift and an extend. |
7490 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { |
7491 | // Look for a constant on the RHS of the shift. |
7492 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7493 | std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS); |
7494 | if (!MaybeShiftVal) |
7495 | return std::nullopt; |
7496 | ShiftVal = *MaybeShiftVal; |
7497 | if (ShiftVal > 4) |
7498 | return std::nullopt; |
7499 | // Look for a valid extend instruction on the LHS of the shift. |
7500 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7501 | MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
7502 | if (!ExtDef) |
7503 | return std::nullopt; |
7504 | Ext = getExtendTypeForInst(MI&: *ExtDef, MRI); |
7505 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7506 | return std::nullopt; |
7507 | ExtReg = ExtDef->getOperand(i: 1).getReg(); |
7508 | } else { |
7509 | // Didn't get a shift. Try just folding an extend. |
7510 | Ext = getExtendTypeForInst(MI&: *RootDef, MRI); |
7511 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7512 | return std::nullopt; |
7513 | ExtReg = RootDef->getOperand(i: 1).getReg(); |
7514 | |
7515 | // If we have a 32 bit instruction which zeroes out the high half of a |
7516 | // register, we get an implicit zero extend for free. Check if we have one. |
7517 | // FIXME: We actually emit the extend right now even though we don't have |
7518 | // to. |
7519 | if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == 32) { |
7520 | MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg); |
7521 | if (isDef32(MI: *ExtInst)) |
7522 | return std::nullopt; |
7523 | } |
7524 | } |
7525 | |
7526 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister |
7527 | // copy. |
7528 | MachineIRBuilder MIB(*RootDef); |
7529 | ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); |
7530 | |
7531 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7532 | [=](MachineInstrBuilder &MIB) { |
7533 | MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal)); |
7534 | }}}; |
7535 | } |
7536 | |
7537 | InstructionSelector::ComplexRendererFns |
7538 | AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const { |
7539 | if (!Root.isReg()) |
7540 | return std::nullopt; |
7541 | MachineRegisterInfo &MRI = |
7542 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7543 | |
7544 | auto = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI); |
7545 | while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST && |
7546 | STI.isLittleEndian()) |
7547 | Extract = |
7548 | getDefSrcRegIgnoringCopies(Reg: Extract->MI->getOperand(i: 1).getReg(), MRI); |
7549 | if (!Extract) |
7550 | return std::nullopt; |
7551 | |
7552 | if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { |
7553 | if (Extract->Reg == Extract->MI->getOperand(i: 1).getReg()) { |
7554 | Register ExtReg = Extract->MI->getOperand(i: 2).getReg(); |
7555 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7556 | } |
7557 | } |
7558 | if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) { |
7559 | LLT SrcTy = MRI.getType(Reg: Extract->MI->getOperand(i: 1).getReg()); |
7560 | auto LaneIdx = getIConstantVRegValWithLookThrough( |
7561 | VReg: Extract->MI->getOperand(i: 2).getReg(), MRI); |
7562 | if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) && |
7563 | LaneIdx->Value.getSExtValue() == 1) { |
7564 | Register ExtReg = Extract->MI->getOperand(i: 1).getReg(); |
7565 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7566 | } |
7567 | } |
7568 | |
7569 | return std::nullopt; |
7570 | } |
7571 | |
7572 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, |
7573 | const MachineInstr &MI, |
7574 | int OpIdx) const { |
7575 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7576 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7577 | "Expected G_CONSTANT" ); |
7578 | std::optional<int64_t> CstVal = |
7579 | getIConstantVRegSExtVal(VReg: MI.getOperand(i: 0).getReg(), MRI); |
7580 | assert(CstVal && "Expected constant value" ); |
7581 | MIB.addImm(Val: *CstVal); |
7582 | } |
7583 | |
7584 | void AArch64InstructionSelector::renderLogicalImm32( |
7585 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7586 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7587 | "Expected G_CONSTANT" ); |
7588 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7589 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 32); |
7590 | MIB.addImm(Val: Enc); |
7591 | } |
7592 | |
7593 | void AArch64InstructionSelector::renderLogicalImm64( |
7594 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7595 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7596 | "Expected G_CONSTANT" ); |
7597 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7598 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 64); |
7599 | MIB.addImm(Val: Enc); |
7600 | } |
7601 | |
7602 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, |
7603 | const MachineInstr &MI, |
7604 | int OpIdx) const { |
7605 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7606 | "Expected G_FCONSTANT" ); |
7607 | MIB.addImm( |
7608 | Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7609 | } |
7610 | |
7611 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, |
7612 | const MachineInstr &MI, |
7613 | int OpIdx) const { |
7614 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7615 | "Expected G_FCONSTANT" ); |
7616 | MIB.addImm( |
7617 | Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7618 | } |
7619 | |
7620 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, |
7621 | const MachineInstr &MI, |
7622 | int OpIdx) const { |
7623 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7624 | "Expected G_FCONSTANT" ); |
7625 | MIB.addImm( |
7626 | Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7627 | } |
7628 | |
7629 | void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( |
7630 | MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { |
7631 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7632 | "Expected G_FCONSTANT" ); |
7633 | MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: 1) |
7634 | .getFPImm() |
7635 | ->getValueAPF() |
7636 | .bitcastToAPInt() |
7637 | .getZExtValue())); |
7638 | } |
7639 | |
7640 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( |
7641 | const MachineInstr &MI, unsigned NumBytes) const { |
7642 | if (!MI.mayLoadOrStore()) |
7643 | return false; |
7644 | assert(MI.hasOneMemOperand() && |
7645 | "Expected load/store to have only one mem op!" ); |
7646 | return (*MI.memoperands_begin())->getSize() == NumBytes; |
7647 | } |
7648 | |
7649 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { |
7650 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7651 | if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() != 32) |
7652 | return false; |
7653 | |
7654 | // Only return true if we know the operation will zero-out the high half of |
7655 | // the 64-bit register. Truncates can be subregister copies, which don't |
7656 | // zero out the high bits. Copies and other copy-like instructions can be |
7657 | // fed by truncates, or could be lowered as subregister copies. |
7658 | switch (MI.getOpcode()) { |
7659 | default: |
7660 | return true; |
7661 | case TargetOpcode::COPY: |
7662 | case TargetOpcode::G_BITCAST: |
7663 | case TargetOpcode::G_TRUNC: |
7664 | case TargetOpcode::G_PHI: |
7665 | return false; |
7666 | } |
7667 | } |
7668 | |
7669 | |
7670 | // Perform fixups on the given PHI instruction's operands to force them all |
7671 | // to be the same as the destination regbank. |
7672 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, |
7673 | const AArch64RegisterBankInfo &RBI) { |
7674 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI" ); |
7675 | Register DstReg = MI.getOperand(i: 0).getReg(); |
7676 | const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg); |
7677 | assert(DstRB && "Expected PHI dst to have regbank assigned" ); |
7678 | MachineIRBuilder MIB(MI); |
7679 | |
7680 | // Go through each operand and ensure it has the same regbank. |
7681 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) { |
7682 | if (!MO.isReg()) |
7683 | continue; |
7684 | Register OpReg = MO.getReg(); |
7685 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg); |
7686 | if (RB != DstRB) { |
7687 | // Insert a cross-bank copy. |
7688 | auto *OpDef = MRI.getVRegDef(Reg: OpReg); |
7689 | const LLT &Ty = MRI.getType(Reg: OpReg); |
7690 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); |
7691 | |
7692 | // Any instruction we insert must appear after all PHIs in the block |
7693 | // for the block to be valid MIR. |
7694 | MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator()); |
7695 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) |
7696 | InsertPt = OpDefBB.getFirstNonPHI(); |
7697 | MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt); |
7698 | auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg); |
7699 | MRI.setRegBank(Reg: Copy.getReg(Idx: 0), RegBank: *DstRB); |
7700 | MO.setReg(Copy.getReg(Idx: 0)); |
7701 | } |
7702 | } |
7703 | } |
7704 | |
7705 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { |
7706 | // We're looking for PHIs, build a list so we don't invalidate iterators. |
7707 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7708 | SmallVector<MachineInstr *, 32> Phis; |
7709 | for (auto &BB : MF) { |
7710 | for (auto &MI : BB) { |
7711 | if (MI.getOpcode() == TargetOpcode::G_PHI) |
7712 | Phis.emplace_back(Args: &MI); |
7713 | } |
7714 | } |
7715 | |
7716 | for (auto *MI : Phis) { |
7717 | // We need to do some work here if the operand types are < 16 bit and they |
7718 | // are split across fpr/gpr banks. Since all types <32b on gpr |
7719 | // end up being assigned gpr32 regclasses, we can end up with PHIs here |
7720 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't |
7721 | // be selecting heterogenous regbanks for operands if possible, but we |
7722 | // still need to be able to deal with it here. |
7723 | // |
7724 | // To fix this, if we have a gpr-bank operand < 32b in size and at least |
7725 | // one other operand is on the fpr bank, then we add cross-bank copies |
7726 | // to homogenize the operand banks. For simplicity the bank that we choose |
7727 | // to settle on is whatever bank the def operand has. For example: |
7728 | // |
7729 | // %endbb: |
7730 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 |
7731 | // => |
7732 | // %bb2: |
7733 | // ... |
7734 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) |
7735 | // ... |
7736 | // %endbb: |
7737 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 |
7738 | bool HasGPROp = false, HasFPROp = false; |
7739 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) { |
7740 | if (!MO.isReg()) |
7741 | continue; |
7742 | const LLT &Ty = MRI.getType(Reg: MO.getReg()); |
7743 | if (!Ty.isValid() || !Ty.isScalar()) |
7744 | break; |
7745 | if (Ty.getSizeInBits() >= 32) |
7746 | break; |
7747 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg()); |
7748 | // If for some reason we don't have a regbank yet. Don't try anything. |
7749 | if (!RB) |
7750 | break; |
7751 | |
7752 | if (RB->getID() == AArch64::GPRRegBankID) |
7753 | HasGPROp = true; |
7754 | else |
7755 | HasFPROp = true; |
7756 | } |
7757 | // We have heterogenous regbanks, need to fixup. |
7758 | if (HasGPROp && HasFPROp) |
7759 | fixupPHIOpBanks(MI&: *MI, MRI, RBI); |
7760 | } |
7761 | } |
7762 | |
7763 | namespace llvm { |
7764 | InstructionSelector * |
7765 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, |
7766 | AArch64Subtarget &Subtarget, |
7767 | AArch64RegisterBankInfo &RBI) { |
7768 | return new AArch64InstructionSelector(TM, Subtarget, RBI); |
7769 | } |
7770 | } |
7771 | |