ARMTargetTransformInfo.h source code [llvm/lib/Target/ARM/ARMTargetTransformInfo.h]

1	//===- ARMTargetTransformInfo.h - ARM specific TTI --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file a TargetTransformInfo::Concept conforming object specific to the
11	/// ARM target machine. It uses the target's detailed information to
12	/// provide more precise answers to certain TTI queries, while letting the
13	/// target independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18	#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20	#include "ARM.h"
21	#include "ARMSubtarget.h"
22	#include "ARMTargetMachine.h"
23	#include "llvm/ADT/ArrayRef.h"
24	#include "llvm/Analysis/TargetTransformInfo.h"
25	#include "llvm/CodeGen/BasicTTIImpl.h"
26	#include "llvm/IR/Constant.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/TargetParser/SubtargetFeature.h"
29	#include <optional>
30
31	namespace llvm {
32
33	class APInt;
34	class ARMTargetLowering;
35	class Instruction;
36	class Loop;
37	class SCEV;
38	class ScalarEvolution;
39	class Type;
40	class Value;
41
42	namespace TailPredication {
43	enum Mode {
44	Disabled = `0`,
45	EnabledNoReductions,
46	Enabled,
47	ForceEnabledNoReductions,
48	ForceEnabled
49	};
50	}
51
52	// For controlling conversion of memcpy into Tail Predicated loop.
53	namespace TPLoop {
54	enum MemTransfer { ForceDisabled = `0`, ForceEnabled, Allow };
55	}
56
57	class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
58	using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59	using TTI = TargetTransformInfo;
60
61	friend BaseT;
62
63	const ARMSubtarget *ST;
64	const ARMTargetLowering *TLI;
65
66	// Currently the following features are excluded from InlineFeaturesAllowed.
67	// ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68	// Depending on whether they are set or unset, different
69	// instructions/registers are available. For example, inlining a callee with
70	// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71	// fail if the callee uses ARM only instructions, e.g. in inline asm.
72	const FeatureBitset InlineFeaturesAllowed = {
73	ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
74	ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
75	ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
76	ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
77	ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
78	ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
79	ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
80	ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
81	ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
82	ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
83	ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
84	ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
85	ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
86	ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
87	ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
88	ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
89	ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
90	ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
91	ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
92	ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
93	ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
94	ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
95	ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
96	ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
97	};
98
99	const ARMSubtarget getST() const* { return ST; }
100	const ARMTargetLowering getTLI() const* { return TLI; }
101
102	public:
103	explicit ARMTTIImpl(const ARMBaseTargetMachine TM, const* Function &F)
104	: BaseT (TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
105	TLI(ST->getTargetLowering()) {}
106
107	bool areInlineCompatible(const Function *Caller,
108	const Function Callee) const*;
109
110	bool enableInterleavedAccessVectorization() { return true; }
111
112	TTI::AddressingModeKind
113	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const;
114
115	/// Floating-point computation using ARMv8 AArch32 Advanced
116	/// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117	/// and Arm MVE are IEEE-754 compliant.
118	bool isFPVectorizationPotentiallyUnsafe() {
119	return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
120	}
121
122	std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
123	IntrinsicInst &II) const;
124	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
125	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
126	APInt &UndefElts2, APInt &UndefElts3,
127	std::function<void(Instruction , unsigned*, APInt, APInt &)>
128	SimplifyAndSetOp) const;
129
130	/// \name Scalar TTI Implementations
131	/// @{
132
133	InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
134	const APInt &Imm, Type *Ty);
135
136	using BaseT::getIntImmCost;
137	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
138	TTI::TargetCostKind CostKind);
139
140	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
141	const APInt &Imm, Type *Ty,
142	TTI::TargetCostKind CostKind,
143	Instruction Inst = nullptr*);
144
145	/// @}
146
147	/// \name Vector TTI Implementations
148	/// @{
149
150	unsigned getNumberOfRegisters(unsigned ClassID) const {
151	bool Vector = (ClassID == `1`);
152	if (Vector) {
153	if (ST->hasNEON())
154	return `16`;
155	if (ST->hasMVEIntegerOps())
156	return `8`;
157	return `0`;
158	}
159
160	if (ST->isThumb1Only())
161	return `8`;
162	return `13`;
163	}
164
165	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
166	switch (K) {
167	case TargetTransformInfo::RGK_Scalar:
168	return TypeSize::getFixed(ExactSize: `32`);
169	case TargetTransformInfo::RGK_FixedWidthVector:
170	if (ST->hasNEON())
171	return TypeSize::getFixed(ExactSize: `128`);
172	if (ST->hasMVEIntegerOps())
173	return TypeSize::getFixed(ExactSize: `128`);
174	return TypeSize::getFixed(ExactSize: `0`);
175	case TargetTransformInfo::RGK_ScalableVector:
176	return TypeSize::getScalable(MinimumSize: `0`);
177	}
178	llvm_unreachable("Unsupported register kind");
179	}
180
181	unsigned getMaxInterleaveFactor(ElementCount VF) {
182	return ST->getMaxInterleaveFactor();
183	}
184
185	bool isProfitableLSRChainElement(Instruction *I);
186
187	bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
188
189	bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
190	return isLegalMaskedLoad(DataTy, Alignment);
191	}
192
193	bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
194	// For MVE, we have a custom lowering pass that will already have custom
195	// legalised any gathers that we can lower to MVE intrinsics, and want to
196	// expand all the rest. The pass runs before the masked intrinsic lowering
197	// pass.
198	return true;
199	}
200
201	bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
202	return forceScalarizeMaskedGather(VTy, Alignment);
203	}
204
205	bool isLegalMaskedGather(Type *Ty, Align Alignment);
206
207	bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
208	return isLegalMaskedGather(Ty, Alignment);
209	}
210
211	InstructionCost getMemcpyCost(const Instruction *I);
212
213	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
214	return ST->getMaxInlineSizeThreshold();
215	}
216
217	int getNumMemOps(const IntrinsicInst I) const*;
218
219	InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
220	ArrayRef<int> Mask,
221	TTI::TargetCostKind CostKind, int Index,
222	VectorType *SubTp,
223	ArrayRef<const Value *> Args = std::nullopt,
224	const Instruction CxtI = nullptr*);
225
226	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
227	TTI::ReductionFlags Flags) const;
228
229	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
230	TTI::ReductionFlags Flags) const;
231
232	bool shouldExpandReduction(const IntrinsicInst II) const* { return false; }
233
234	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
235	const Instruction I = nullptr*);
236
237	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
238	TTI::CastContextHint CCH,
239	TTI::TargetCostKind CostKind,
240	const Instruction I = nullptr*);
241
242	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
243	CmpInst::Predicate VecPred,
244	TTI::TargetCostKind CostKind,
245	const Instruction I = nullptr*);
246
247	using BaseT::getVectorInstrCost;
248	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
249	TTI::TargetCostKind CostKind,
250	unsigned Index, Value Op0, Value Op1);
251
252	InstructionCost getAddressComputationCost(Type Val, ScalarEvolution SE,
253	const SCEV *Ptr);
254
255	InstructionCost getArithmeticInstrCost(
256	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
257	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
258	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
259	ArrayRef<const Value *> Args = std::nullopt,
260	const Instruction CxtI = nullptr*);
261
262	InstructionCost
263	getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
264	unsigned AddressSpace, TTI::TargetCostKind CostKind,
265	TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
266	const Instruction I = nullptr*);
267
268	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
269	Align Alignment, unsigned AddressSpace,
270	TTI::TargetCostKind CostKind);
271
272	InstructionCost getInterleavedMemoryOpCost(
273	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
274	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
275	bool UseMaskForCond = false, bool UseMaskForGaps = false);
276
277	InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
278	const Value Ptr, bool* VariableMask,
279	Align Alignment,
280	TTI::TargetCostKind CostKind,
281	const Instruction I = nullptr*);
282
283	InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
284	std::optional<FastMathFlags> FMF,
285	TTI::TargetCostKind CostKind);
286	InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
287	Type ResTy, VectorType ValTy,
288	FastMathFlags FMF,
289	TTI::TargetCostKind CostKind);
290	InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
291	VectorType *ValTy,
292	TTI::TargetCostKind CostKind);
293
294	InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
295	FastMathFlags FMF,
296	TTI::TargetCostKind CostKind);
297
298	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
299	TTI::TargetCostKind CostKind);
300
301	/// getScalingFactorCost - Return the cost of the scaling used in
302	/// addressing mode represented by AM.
303	/// If the AM is supported, the return value must be >= 0.
304	/// If the AM is not supported, the return value must be negative.
305	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
306	int64_t BaseOffset, bool HasBaseReg,
307	int64_t Scale, unsigned AddrSpace) const;
308
309	bool maybeLoweredToCall(Instruction &I);
310	bool isLoweredToCall(const Function *F);
311	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
312	AssumptionCache &AC,
313	TargetLibraryInfo *LibInfo,
314	HardwareLoopInfo &HWLoopInfo);
315	bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
316	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
317	TTI::UnrollingPreferences &UP,
318	OptimizationRemarkEmitter *ORE);
319
320	TailFoldingStyle
321	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
322
323	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
324	TTI::PeelingPreferences &PP);
325	bool shouldBuildLookupTablesForConstant(Constant C) const* {
326	// In the ROPI and RWPI relocation models we can't have pointers to global
327	// variables or functions in constant data, so don't convert switches to
328	// lookup tables if any of the values would need relocation.
329	if (ST->isROPI() \|\| ST->isRWPI())
330	return !C->needsDynamicRelocation();
331
332	return true;
333	}
334
335	bool hasArmWideBranch(bool Thumb) const;
336
337	/// @}
338	};
339
340	/// isVREVMask - Check if a vector shuffle corresponds to a VREV
341	/// instruction with the specified blocksize. (The order of the elements
342	/// within each block of the vector is reversed.)
343	inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
344	assert((BlockSize == `16` \|\| BlockSize == `32` \|\| BlockSize == `64`) &&
345	"Only possible block sizes for VREV are: 16, 32, 64");
346
347	unsigned EltSz = VT.getScalarSizeInBits();
348	if (EltSz != `8` && EltSz != `16` && EltSz != `32`)
349	return false;
350
351	unsigned BlockElts = M [`0`] + `1`;
352	// If the first shuffle index is UNDEF, be optimistic.
353	if (M [`0`] < `0`)
354	BlockElts = BlockSize / EltSz;
355
356	if (BlockSize <= EltSz \|\| BlockSize != BlockElts * EltSz)
357	return false;
358
359	for (unsigned i = `0`, e = M.size(); i < e; ++i) {
360	if (M [i] < `0`)
361	continue; // ignore UNDEF indices
362	if ((unsigned)M [i] != (i - i % BlockElts) + (BlockElts - `1` - i % BlockElts))
363	return false;
364	}
365
366	return true;
367	}
368
369	} // end namespace llvm
370
371	#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
372

source code of llvm/lib/Target/ARM/ARMTargetTransformInfo.h