1 | //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file a TargetTransformInfo::Concept conforming object specific to the |
10 | /// X86 target machine. It uses the target's detailed information to |
11 | /// provide more precise answers to certain TTI queries, while letting the |
12 | /// target independent and default TTI implementations handle the rest. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
17 | #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
18 | |
19 | #include "X86TargetMachine.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/BasicTTIImpl.h" |
22 | #include <optional> |
23 | |
24 | namespace llvm { |
25 | |
26 | class InstCombiner; |
27 | |
28 | class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> { |
29 | typedef BasicTTIImplBase<X86TTIImpl> BaseT; |
30 | typedef TargetTransformInfo TTI; |
31 | friend BaseT; |
32 | |
33 | const X86Subtarget *ST; |
34 | const X86TargetLowering *TLI; |
35 | |
36 | const X86Subtarget *getST() const { return ST; } |
37 | const X86TargetLowering *getTLI() const { return TLI; } |
38 | |
39 | const FeatureBitset InlineFeatureIgnoreList = { |
40 | // This indicates the CPU is 64 bit capable not that we are in 64-bit |
41 | // mode. |
42 | X86::FeatureX86_64, |
43 | |
44 | // These features don't have any intrinsics or ABI effect. |
45 | X86::FeatureNOPL, |
46 | X86::FeatureCX16, |
47 | X86::FeatureLAHFSAHF64, |
48 | |
49 | // Some older targets can be setup to fold unaligned loads. |
50 | X86::FeatureSSEUnalignedMem, |
51 | |
52 | // Codegen control options. |
53 | X86::TuningFast11ByteNOP, |
54 | X86::TuningFast15ByteNOP, |
55 | X86::TuningFastBEXTR, |
56 | X86::TuningFastHorizontalOps, |
57 | X86::TuningFastLZCNT, |
58 | X86::TuningFastScalarFSQRT, |
59 | X86::TuningFastSHLDRotate, |
60 | X86::TuningFastScalarShiftMasks, |
61 | X86::TuningFastVectorShiftMasks, |
62 | X86::TuningFastVariableCrossLaneShuffle, |
63 | X86::TuningFastVariablePerLaneShuffle, |
64 | X86::TuningFastVectorFSQRT, |
65 | X86::TuningLEAForSP, |
66 | X86::TuningLEAUsesAG, |
67 | X86::TuningLZCNTFalseDeps, |
68 | X86::TuningBranchFusion, |
69 | X86::TuningMacroFusion, |
70 | X86::TuningPadShortFunctions, |
71 | X86::TuningPOPCNTFalseDeps, |
72 | X86::TuningMULCFalseDeps, |
73 | X86::TuningPERMFalseDeps, |
74 | X86::TuningRANGEFalseDeps, |
75 | X86::TuningGETMANTFalseDeps, |
76 | X86::TuningMULLQFalseDeps, |
77 | X86::TuningSlow3OpsLEA, |
78 | X86::TuningSlowDivide32, |
79 | X86::TuningSlowDivide64, |
80 | X86::TuningSlowIncDec, |
81 | X86::TuningSlowLEA, |
82 | X86::TuningSlowPMADDWD, |
83 | X86::TuningSlowPMULLD, |
84 | X86::TuningSlowSHLD, |
85 | X86::TuningSlowTwoMemOps, |
86 | X86::TuningSlowUAMem16, |
87 | X86::TuningPreferMaskRegisters, |
88 | X86::TuningInsertVZEROUPPER, |
89 | X86::TuningUseSLMArithCosts, |
90 | X86::TuningUseGLMDivSqrtCosts, |
91 | X86::TuningNoDomainDelay, |
92 | X86::TuningNoDomainDelayMov, |
93 | X86::TuningNoDomainDelayShuffle, |
94 | X86::TuningNoDomainDelayBlend, |
95 | X86::TuningPreferShiftShuffle, |
96 | X86::TuningFastImmVectorShift, |
97 | X86::TuningFastDPWSSD, |
98 | |
99 | // Perf-tuning flags. |
100 | X86::TuningFastGather, |
101 | X86::TuningSlowUAMem32, |
102 | X86::TuningAllowLight256Bit, |
103 | |
104 | // Based on whether user set the -mprefer-vector-width command line. |
105 | X86::TuningPrefer128Bit, |
106 | X86::TuningPrefer256Bit, |
107 | |
108 | // CPU name enums. These just follow CPU string. |
109 | X86::ProcIntelAtom |
110 | }; |
111 | |
112 | public: |
113 | explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) |
114 | : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), |
115 | TLI(ST->getTargetLowering()) {} |
116 | |
117 | /// \name Scalar TTI Implementations |
118 | /// @{ |
119 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); |
120 | |
121 | /// @} |
122 | |
123 | /// \name Cache TTI Implementation |
124 | /// @{ |
125 | std::optional<unsigned> getCacheSize( |
126 | TargetTransformInfo::CacheLevel Level) const override; |
127 | std::optional<unsigned> getCacheAssociativity( |
128 | TargetTransformInfo::CacheLevel Level) const override; |
129 | /// @} |
130 | |
131 | /// \name Vector TTI Implementations |
132 | /// @{ |
133 | |
134 | unsigned getNumberOfRegisters(unsigned ClassID) const; |
135 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; |
136 | unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; |
137 | unsigned getMaxInterleaveFactor(ElementCount VF); |
138 | InstructionCost getArithmeticInstrCost( |
139 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
140 | TTI::OperandValueInfo Op1Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None}, |
141 | TTI::OperandValueInfo Op2Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None}, |
142 | ArrayRef<const Value *> Args = std::nullopt, |
143 | const Instruction *CxtI = nullptr); |
144 | InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, |
145 | unsigned Opcode1, |
146 | const SmallBitVector &OpcodeMask, |
147 | TTI::TargetCostKind CostKind) const; |
148 | |
149 | InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, |
150 | ArrayRef<int> Mask, |
151 | TTI::TargetCostKind CostKind, int Index, |
152 | VectorType *SubTp, |
153 | ArrayRef<const Value *> Args = std::nullopt, |
154 | const Instruction *CxtI = nullptr); |
155 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
156 | TTI::CastContextHint CCH, |
157 | TTI::TargetCostKind CostKind, |
158 | const Instruction *I = nullptr); |
159 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
160 | CmpInst::Predicate VecPred, |
161 | TTI::TargetCostKind CostKind, |
162 | const Instruction *I = nullptr); |
163 | using BaseT::getVectorInstrCost; |
164 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
165 | TTI::TargetCostKind CostKind, |
166 | unsigned Index, Value *Op0, Value *Op1); |
167 | InstructionCost getScalarizationOverhead(VectorType *Ty, |
168 | const APInt &DemandedElts, |
169 | bool Insert, bool , |
170 | TTI::TargetCostKind CostKind); |
171 | InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, |
172 | int VF, |
173 | const APInt &DemandedDstElts, |
174 | TTI::TargetCostKind CostKind); |
175 | InstructionCost |
176 | getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
177 | unsigned AddressSpace, TTI::TargetCostKind CostKind, |
178 | TTI::OperandValueInfo OpInfo = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None}, |
179 | const Instruction *I = nullptr); |
180 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
181 | Align Alignment, unsigned AddressSpace, |
182 | TTI::TargetCostKind CostKind); |
183 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
184 | const Value *Ptr, bool VariableMask, |
185 | Align Alignment, |
186 | TTI::TargetCostKind CostKind, |
187 | const Instruction *I); |
188 | InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, |
189 | const Value *Base, |
190 | const TTI::PointersChainInfo &Info, |
191 | Type *AccessTy, |
192 | TTI::TargetCostKind CostKind); |
193 | InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, |
194 | const SCEV *Ptr); |
195 | |
196 | std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
197 | IntrinsicInst &II) const; |
198 | std::optional<Value *> |
199 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
200 | APInt DemandedMask, KnownBits &Known, |
201 | bool &KnownBitsComputed) const; |
202 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
203 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
204 | APInt &UndefElts2, APInt &UndefElts3, |
205 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
206 | SimplifyAndSetOp) const; |
207 | |
208 | unsigned getAtomicMemIntrinsicMaxElementSize() const; |
209 | |
210 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
211 | TTI::TargetCostKind CostKind); |
212 | |
213 | InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
214 | std::optional<FastMathFlags> FMF, |
215 | TTI::TargetCostKind CostKind); |
216 | |
217 | InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, |
218 | TTI::TargetCostKind CostKind, |
219 | FastMathFlags FMF); |
220 | |
221 | InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, |
222 | FastMathFlags FMF, |
223 | TTI::TargetCostKind CostKind); |
224 | |
225 | InstructionCost getInterleavedMemoryOpCost( |
226 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
227 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
228 | bool UseMaskForCond = false, bool UseMaskForGaps = false); |
229 | InstructionCost getInterleavedMemoryOpCostAVX512( |
230 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
231 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
232 | TTI::TargetCostKind CostKind, bool UseMaskForCond = false, |
233 | bool UseMaskForGaps = false); |
234 | |
235 | InstructionCost getIntImmCost(int64_t); |
236 | |
237 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
238 | TTI::TargetCostKind CostKind); |
239 | |
240 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
241 | const Instruction *I = nullptr); |
242 | |
243 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
244 | const APInt &Imm, Type *Ty, |
245 | TTI::TargetCostKind CostKind, |
246 | Instruction *Inst = nullptr); |
247 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
248 | const APInt &Imm, Type *Ty, |
249 | TTI::TargetCostKind CostKind); |
250 | /// Return the cost of the scaling factor used in the addressing |
251 | /// mode represented by AM for this target, for a load/store |
252 | /// of the specified type. |
253 | /// If the AM is supported, the return value must be >= 0. |
254 | /// If the AM is not supported, it returns a negative value. |
255 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
256 | int64_t BaseOffset, bool HasBaseReg, |
257 | int64_t Scale, unsigned AddrSpace) const; |
258 | |
259 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
260 | const TargetTransformInfo::LSRCost &C2); |
261 | bool canMacroFuseCmp(); |
262 | bool isLegalMaskedLoad(Type *DataType, Align Alignment); |
263 | bool isLegalMaskedStore(Type *DataType, Align Alignment); |
264 | bool isLegalNTLoad(Type *DataType, Align Alignment); |
265 | bool isLegalNTStore(Type *DataType, Align Alignment); |
266 | bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const; |
267 | bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment); |
268 | bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { |
269 | return forceScalarizeMaskedGather(VTy, Alignment); |
270 | } |
271 | bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment); |
272 | bool isLegalMaskedGather(Type *DataType, Align Alignment); |
273 | bool isLegalMaskedScatter(Type *DataType, Align Alignment); |
274 | bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment); |
275 | bool isLegalMaskedCompressStore(Type *DataType, Align Alignment); |
276 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, |
277 | const SmallBitVector &OpcodeMask) const; |
278 | bool hasDivRemOp(Type *DataType, bool IsSigned); |
279 | bool isExpensiveToSpeculativelyExecute(const Instruction *I); |
280 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty); |
281 | bool areInlineCompatible(const Function *Caller, |
282 | const Function *Callee) const; |
283 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
284 | const ArrayRef<Type *> &Type) const; |
285 | |
286 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { |
287 | return ST->getMaxInlineSizeThreshold(); |
288 | } |
289 | |
290 | TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, |
291 | bool IsZeroCmp) const; |
292 | bool prefersVectorizedAddressing() const; |
293 | bool supportsEfficientVectorElementLoadStore() const; |
294 | bool enableInterleavedAccessVectorization(); |
295 | |
296 | private: |
297 | bool supportsGather() const; |
298 | InstructionCost getGSScalarCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
299 | Type *DataTy, bool VariableMask, |
300 | Align Alignment, unsigned AddressSpace); |
301 | InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
302 | Type *DataTy, const Value *Ptr, |
303 | Align Alignment, unsigned AddressSpace); |
304 | |
305 | int getGatherOverhead() const; |
306 | int getScatterOverhead() const; |
307 | |
308 | /// @} |
309 | }; |
310 | |
311 | } // end namespace llvm |
312 | |
313 | #endif |
314 | |