X86TargetTransformInfo.h source code [llvm/lib/Target/X86/X86TargetTransformInfo.h]

1	//===-- X86TargetTransformInfo.h - X86 specific TTI -------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file a TargetTransformInfo::Concept conforming object specific to the
10	/// X86 target machine. It uses the target's detailed information to
11	/// provide more precise answers to certain TTI queries, while letting the
12	/// target independent and default TTI implementations handle the rest.
13	///
14	//===----------------------------------------------------------------------===//
15
16	#ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
17	#define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
18
19	#include "X86TargetMachine.h"
20	#include "llvm/Analysis/TargetTransformInfo.h"
21	#include "llvm/CodeGen/BasicTTIImpl.h"
22	#include <optional>
23
24	namespace llvm {
25
26	class InstCombiner;
27
28	class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
29	typedef BasicTTIImplBase<X86TTIImpl> BaseT;
30	typedef TargetTransformInfo TTI;
31	friend BaseT;
32
33	const X86Subtarget *ST;
34	const X86TargetLowering *TLI;
35
36	const X86Subtarget getST() const* { return ST; }
37	const X86TargetLowering getTLI() const* { return TLI; }
38
39	const FeatureBitset InlineFeatureIgnoreList = {
40	// This indicates the CPU is 64 bit capable not that we are in 64-bit
41	// mode.
42	X86::FeatureX86_64,
43
44	// These features don't have any intrinsics or ABI effect.
45	X86::FeatureNOPL,
46	X86::FeatureCX16,
47	X86::FeatureLAHFSAHF64,
48
49	// Some older targets can be setup to fold unaligned loads.
50	X86::FeatureSSEUnalignedMem,
51
52	// Codegen control options.
53	X86::TuningFast11ByteNOP,
54	X86::TuningFast15ByteNOP,
55	X86::TuningFastBEXTR,
56	X86::TuningFastHorizontalOps,
57	X86::TuningFastLZCNT,
58	X86::TuningFastScalarFSQRT,
59	X86::TuningFastSHLDRotate,
60	X86::TuningFastScalarShiftMasks,
61	X86::TuningFastVectorShiftMasks,
62	X86::TuningFastVariableCrossLaneShuffle,
63	X86::TuningFastVariablePerLaneShuffle,
64	X86::TuningFastVectorFSQRT,
65	X86::TuningLEAForSP,
66	X86::TuningLEAUsesAG,
67	X86::TuningLZCNTFalseDeps,
68	X86::TuningBranchFusion,
69	X86::TuningMacroFusion,
70	X86::TuningPadShortFunctions,
71	X86::TuningPOPCNTFalseDeps,
72	X86::TuningMULCFalseDeps,
73	X86::TuningPERMFalseDeps,
74	X86::TuningRANGEFalseDeps,
75	X86::TuningGETMANTFalseDeps,
76	X86::TuningMULLQFalseDeps,
77	X86::TuningSlow3OpsLEA,
78	X86::TuningSlowDivide32,
79	X86::TuningSlowDivide64,
80	X86::TuningSlowIncDec,
81	X86::TuningSlowLEA,
82	X86::TuningSlowPMADDWD,
83	X86::TuningSlowPMULLD,
84	X86::TuningSlowSHLD,
85	X86::TuningSlowTwoMemOps,
86	X86::TuningSlowUAMem16,
87	X86::TuningPreferMaskRegisters,
88	X86::TuningInsertVZEROUPPER,
89	X86::TuningUseSLMArithCosts,
90	X86::TuningUseGLMDivSqrtCosts,
91	X86::TuningNoDomainDelay,
92	X86::TuningNoDomainDelayMov,
93	X86::TuningNoDomainDelayShuffle,
94	X86::TuningNoDomainDelayBlend,
95	X86::TuningPreferShiftShuffle,
96	X86::TuningFastImmVectorShift,
97	X86::TuningFastDPWSSD,
98
99	// Perf-tuning flags.
100	X86::TuningFastGather,
101	X86::TuningSlowUAMem32,
102	X86::TuningAllowLight256Bit,
103
104	// Based on whether user set the -mprefer-vector-width command line.
105	X86::TuningPrefer128Bit,
106	X86::TuningPrefer256Bit,
107
108	// CPU name enums. These just follow CPU string.
109	X86::ProcIntelAtom
110	};
111
112	public:
113	explicit X86TTIImpl(const X86TargetMachine TM, const* Function &F)
114	: BaseT (TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
115	TLI(ST->getTargetLowering()) {}
116
117	/// \name Scalar TTI Implementations
118	/// @{
119	TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
120
121	/// @}
122
123	/// \name Cache TTI Implementation
124	/// @{
125	std::optional<unsigned> getCacheSize(
126	TargetTransformInfo::CacheLevel Level) const override;
127	std::optional<unsigned> getCacheAssociativity(
128	TargetTransformInfo::CacheLevel Level) const override;
129	/// @}
130
131	/// \name Vector TTI Implementations
132	/// @{
133
134	unsigned getNumberOfRegisters(unsigned ClassID) const;
135	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
136	unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
137	unsigned getMaxInterleaveFactor(ElementCount VF);
138	InstructionCost getArithmeticInstrCost(
139	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
140	TTI::OperandValueInfo Op1Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
141	TTI::OperandValueInfo Op2Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
142	ArrayRef<const Value *> Args = std::nullopt,
143	const Instruction CxtI = nullptr*);
144	InstructionCost getAltInstrCost(VectorType VecTy, unsigned* Opcode0,
145	unsigned Opcode1,
146	const SmallBitVector &OpcodeMask,
147	TTI::TargetCostKind CostKind) const;
148
149	InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
150	ArrayRef<int> Mask,
151	TTI::TargetCostKind CostKind, int Index,
152	VectorType *SubTp,
153	ArrayRef<const Value *> Args = std::nullopt,
154	const Instruction CxtI = nullptr*);
155	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
156	TTI::CastContextHint CCH,
157	TTI::TargetCostKind CostKind,
158	const Instruction I = nullptr*);
159	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
160	CmpInst::Predicate VecPred,
161	TTI::TargetCostKind CostKind,
162	const Instruction I = nullptr*);
163	using BaseT::getVectorInstrCost;
164	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
165	TTI::TargetCostKind CostKind,
166	unsigned Index, Value Op0, Value Op1);
167	InstructionCost getScalarizationOverhead(VectorType *Ty,
168	const APInt &DemandedElts,
169	bool Insert, bool Extract,
170	TTI::TargetCostKind CostKind);
171	InstructionCost getReplicationShuffleCost(Type EltTy, int* ReplicationFactor,
172	int VF,
173	const APInt &DemandedDstElts,
174	TTI::TargetCostKind CostKind);
175	InstructionCost
176	getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
177	unsigned AddressSpace, TTI::TargetCostKind CostKind,
178	TTI::OperandValueInfo OpInfo = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
179	const Instruction I = nullptr*);
180	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
181	Align Alignment, unsigned AddressSpace,
182	TTI::TargetCostKind CostKind);
183	InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
184	const Value Ptr, bool* VariableMask,
185	Align Alignment,
186	TTI::TargetCostKind CostKind,
187	const Instruction *I);
188	InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
189	const Value *Base,
190	const TTI::PointersChainInfo &Info,
191	Type *AccessTy,
192	TTI::TargetCostKind CostKind);
193	InstructionCost getAddressComputationCost(Type PtrTy, ScalarEvolution SE,
194	const SCEV *Ptr);
195
196	std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
197	IntrinsicInst &II) const;
198	std::optional<Value *>
199	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
200	APInt DemandedMask, KnownBits &Known,
201	bool &KnownBitsComputed) const;
202	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
203	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
204	APInt &UndefElts2, APInt &UndefElts3,
205	std::function<void(Instruction , unsigned*, APInt, APInt &)>
206	SimplifyAndSetOp) const;
207
208	unsigned getAtomicMemIntrinsicMaxElementSize() const;
209
210	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
211	TTI::TargetCostKind CostKind);
212
213	InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
214	std::optional<FastMathFlags> FMF,
215	TTI::TargetCostKind CostKind);
216
217	InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty,
218	TTI::TargetCostKind CostKind,
219	FastMathFlags FMF);
220
221	InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
222	FastMathFlags FMF,
223	TTI::TargetCostKind CostKind);
224
225	InstructionCost getInterleavedMemoryOpCost(
226	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
227	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
228	bool UseMaskForCond = false, bool UseMaskForGaps = false);
229	InstructionCost getInterleavedMemoryOpCostAVX512(
230	unsigned Opcode, FixedVectorType VecTy, unsigned* Factor,
231	ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
232	TTI::TargetCostKind CostKind, bool UseMaskForCond = false,
233	bool UseMaskForGaps = false);
234
235	InstructionCost getIntImmCost(int64_t);
236
237	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
238	TTI::TargetCostKind CostKind);
239
240	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
241	const Instruction I = nullptr*);
242
243	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
244	const APInt &Imm, Type *Ty,
245	TTI::TargetCostKind CostKind,
246	Instruction Inst = nullptr*);
247	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
248	const APInt &Imm, Type *Ty,
249	TTI::TargetCostKind CostKind);
250	/// Return the cost of the scaling factor used in the addressing
251	/// mode represented by AM for this target, for a load/store
252	/// of the specified type.
253	/// If the AM is supported, the return value must be >= 0.
254	/// If the AM is not supported, it returns a negative value.
255	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
256	int64_t BaseOffset, bool HasBaseReg,
257	int64_t Scale, unsigned AddrSpace) const;
258
259	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
260	const TargetTransformInfo::LSRCost &C2);
261	bool canMacroFuseCmp();
262	bool isLegalMaskedLoad(Type *DataType, Align Alignment);
263	bool isLegalMaskedStore(Type *DataType, Align Alignment);
264	bool isLegalNTLoad(Type *DataType, Align Alignment);
265	bool isLegalNTStore(Type *DataType, Align Alignment);
266	bool isLegalBroadcastLoad(Type ElementTy, ElementCount NumElements) const*;
267	bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
268	bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
269	return forceScalarizeMaskedGather(VTy, Alignment);
270	}
271	bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment);
272	bool isLegalMaskedGather(Type *DataType, Align Alignment);
273	bool isLegalMaskedScatter(Type *DataType, Align Alignment);
274	bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment);
275	bool isLegalMaskedCompressStore(Type *DataType, Align Alignment);
276	bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
277	const SmallBitVector &OpcodeMask) const;
278	bool hasDivRemOp(Type DataType, bool* IsSigned);
279	bool isExpensiveToSpeculativelyExecute(const Instruction *I);
280	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
281	bool areInlineCompatible(const Function *Caller,
282	const Function Callee) const*;
283	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
284	const ArrayRef<Type > &Type) const*;
285
286	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
287	return ST->getMaxInlineSizeThreshold();
288	}
289
290	TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
291	bool IsZeroCmp) const;
292	bool prefersVectorizedAddressing() const;
293	bool supportsEfficientVectorElementLoadStore() const;
294	bool enableInterleavedAccessVectorization();
295
296	private:
297	bool supportsGather() const;
298	InstructionCost getGSScalarCost(unsigned Opcode, TTI::TargetCostKind CostKind,
299	Type DataTy, bool* VariableMask,
300	Align Alignment, unsigned AddressSpace);
301	InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
302	Type DataTy, const* Value *Ptr,
303	Align Alignment, unsigned AddressSpace);
304
305	int getGatherOverhead() const;
306	int getScatterOverhead() const;
307
308	/// @}
309	};
310
311	} // end namespace llvm
312
313	#endif
314

source code of llvm/lib/Target/X86/X86TargetTransformInfo.h