AMDGPUTargetTransformInfo.h source code [llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h]

1	//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file a TargetTransformInfo::Concept conforming object specific to the
11	/// AMDGPU target machine. It uses the target's detailed information to
12	/// provide more precise answers to certain TTI queries, while letting the
13	/// target independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18	#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
20	#include "AMDGPU.h"
21	#include "llvm/CodeGen/BasicTTIImpl.h"
22	#include <optional>
23
24	namespace llvm {
25
26	class AMDGPUTargetMachine;
27	class GCNSubtarget;
28	class InstCombiner;
29	class Loop;
30	class ScalarEvolution;
31	class SITargetLowering;
32	class Type;
33	class Value;
34
35	class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
36	using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
37	using TTI = TargetTransformInfo;
38
39	friend BaseT;
40
41	Triple TargetTriple;
42
43	const TargetSubtargetInfo *ST;
44	const TargetLoweringBase *TLI;
45
46	const TargetSubtargetInfo getST() const* { return ST; }
47	const TargetLoweringBase getTLI() const* { return TLI; }
48
49	public:
50	explicit AMDGPUTTIImpl(const AMDGPUTargetMachine TM, const* Function &F);
51
52	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
53	TTI::UnrollingPreferences &UP,
54	OptimizationRemarkEmitter *ORE);
55
56	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
57	TTI::PeelingPreferences &PP);
58
59	int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
60	};
61
62	class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
63	using BaseT = BasicTTIImplBase<GCNTTIImpl>;
64	using TTI = TargetTransformInfo;
65
66	friend BaseT;
67
68	const GCNSubtarget *ST;
69	const SITargetLowering *TLI;
70	AMDGPUTTIImpl CommonTTI;
71	bool IsGraphics;
72	bool HasFP32Denormals;
73	bool HasFP64FP16Denormals;
74	static constexpr bool InlinerVectorBonusPercent = `0`;
75
76	static const FeatureBitset InlineFeatureIgnoreList;
77
78	const GCNSubtarget getST() const* { return ST; }
79	const SITargetLowering getTLI() const* { return TLI; }
80
81	static inline int getFullRateInstrCost() {
82	return TargetTransformInfo::TCC_Basic;
83	}
84
85	static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
86	return CostKind == TTI::TCK_CodeSize ? `2`
87	: `2` * TargetTransformInfo::TCC_Basic;
88	}
89
90	// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
91	// should be 2 or 4.
92	static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
93	return CostKind == TTI::TCK_CodeSize ? `2`
94	: `4` * TargetTransformInfo::TCC_Basic;
95	}
96
97	// On some parts, normal fp64 operations are half rate, and others
98	// quarter. This also applies to some integer operations.
99	int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
100
101	std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type Ty) const*;
102
103	public:
104	explicit GCNTTIImpl(const AMDGPUTargetMachine TM, const* Function &F);
105
106	bool hasBranchDivergence(const Function F = nullptr) const*;
107
108	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
109	TTI::UnrollingPreferences &UP,
110	OptimizationRemarkEmitter *ORE);
111
112	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
113	TTI::PeelingPreferences &PP);
114
115	TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
116	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
117	return TTI::PSK_FastHardware;
118	}
119
120	unsigned getNumberOfRegisters(unsigned RCID) const;
121	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
122	unsigned getMinVectorRegisterBitWidth() const;
123	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
124	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
125	unsigned ChainSizeInBytes,
126	VectorType VecTy) const*;
127	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
128	unsigned ChainSizeInBytes,
129	VectorType VecTy) const*;
130	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
131
132	bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
133	unsigned AddrSpace) const;
134	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
135	unsigned AddrSpace) const;
136	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
137	unsigned AddrSpace) const;
138
139	int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
140	Type *getMemcpyLoopLoweringType(
141	LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
142	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
143	std::optional<uint32_t> AtomicElementSize) const;
144
145	void getMemcpyLoopResidualLoweringType(
146	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
147	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
148	unsigned SrcAlign, unsigned DestAlign,
149	std::optional<uint32_t> AtomicCpySize) const;
150	unsigned getMaxInterleaveFactor(ElementCount VF);
151
152	bool getTgtMemIntrinsic(IntrinsicInst Inst, MemIntrinsicInfo &Info) const*;
153
154	InstructionCost getArithmeticInstrCost(
155	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
156	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
157	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
158	ArrayRef<const Value *> Args = std::nullopt,
159	const Instruction CxtI = nullptr*);
160
161	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
162	const Instruction I = nullptr*);
163
164	bool isInlineAsmSourceOfDivergence(const CallInst *CI,
165	ArrayRef<unsigned> Indices = {}) const;
166
167	using BaseT::getVectorInstrCost;
168	InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
169	TTI::TargetCostKind CostKind,
170	unsigned Index, Value Op0, Value Op1);
171
172	bool isReadRegisterSourceOfDivergence(const IntrinsicInst ReadReg) const*;
173	bool isSourceOfDivergence(const Value V) const*;
174	bool isAlwaysUniform(const Value V) const*;
175
176	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
177	if (ToAS == AMDGPUAS::FLAT_ADDRESS) {
178	switch (FromAS) {
179	case AMDGPUAS::GLOBAL_ADDRESS:
180	case AMDGPUAS::CONSTANT_ADDRESS:
181	case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
182	case AMDGPUAS::LOCAL_ADDRESS:
183	case AMDGPUAS::PRIVATE_ADDRESS:
184	return true;
185	default:
186	break;
187	}
188	return false;
189	}
190	if ((FromAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
191	ToAS == AMDGPUAS::CONSTANT_ADDRESS) \|\|
192	(FromAS == AMDGPUAS::CONSTANT_ADDRESS &&
193	ToAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT))
194	return true;
195	return false;
196	}
197
198	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
199	return AMDGPU::addrspacesMayAlias(AS1: AS0, AS2: AS1);
200	}
201
202	unsigned getFlatAddressSpace() const {
203	// Don't bother running InferAddressSpaces pass on graphics shaders which
204	// don't use flat addressing.
205	if (IsGraphics)
206	return -`1`;
207	return AMDGPUAS::FLAT_ADDRESS;
208	}
209
210	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
211	Intrinsic::ID IID) const;
212
213	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
214	return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
215	AS != AMDGPUAS::PRIVATE_ADDRESS;
216	}
217
218	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
219	Value NewV) const*;
220
221	bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
222	const Value Op1, InstCombiner &IC) const*;
223	std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
224	IntrinsicInst &II) const;
225	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
226	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
227	APInt &UndefElts2, APInt &UndefElts3,
228	std::function<void(Instruction , unsigned*, APInt, APInt &)>
229	SimplifyAndSetOp) const;
230
231	InstructionCost getVectorSplitCost() { return `0`; }
232
233	InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
234	ArrayRef<int> Mask,
235	TTI::TargetCostKind CostKind, int Index,
236	VectorType *SubTp,
237	ArrayRef<const Value *> Args = std::nullopt,
238	const Instruction CxtI = nullptr*);
239
240	bool areInlineCompatible(const Function *Caller,
241	const Function Callee) const*;
242
243	unsigned getInliningThresholdMultiplier() const { return `11`; }
244	unsigned adjustInliningThreshold(const CallBase CB) const*;
245	unsigned getCallerAllocaCost(const CallBase CB, const* AllocaInst AI) const*;
246
247	int getInlinerVectorBonusPercent() const { return InlinerVectorBonusPercent; }
248
249	InstructionCost getArithmeticReductionCost(
250	unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
251	TTI::TargetCostKind CostKind);
252
253	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
254	TTI::TargetCostKind CostKind);
255	InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
256	FastMathFlags FMF,
257	TTI::TargetCostKind CostKind);
258
259	/// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
260	unsigned getCacheLineSize() const override { return `128`; }
261
262	/// How much before a load we should place the prefetch instruction.
263	/// This is currently measured in number of IR instructions.
264	unsigned getPrefetchDistance() const override;
265
266	/// \return if target want to issue a prefetch in address space \p AS.
267	bool shouldPrefetchAddressSpace(unsigned AS) const override;
268	};
269
270	} // end namespace llvm
271
272	#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
273

source code of llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h